diff --git a/README.rst b/README.rst
index 55077f6a..79b0c7ad 100644
--- a/README.rst
+++ b/README.rst
@@ -57,6 +57,7 @@ The MS³PIP Python application can perform the following tasks:
 - ``predict-batch``: Predict fragmentation spectra for a batch of peptides.
 - ``predict-library``: Predict a spectral library from protein FASTA file.
 - ``correlate``: Compare predicted and observed intensities and optionally compute correlations.
+- ``correlate-single``: Compare predicted and observed intensities for a single peptide spectrum.
 - ``get-training-data``: Extract feature vectors and target intensities from observed spectra for
   training.
 - ``annotate-spectra``: Annotate peaks in observed spectra.
diff --git a/conversion_tools/MSF_to_MS2PIP_SpecLib.py b/conversion_tools/MSF_to_MS2PIP_SpecLib.py
deleted file mode 100644
index a075ca0e..00000000
--- a/conversion_tools/MSF_to_MS2PIP_SpecLib.py
+++ /dev/null
@@ -1,298 +0,0 @@
-"""
-## MSF and MGF to MS2PIP SpecLib
-Reads an MSF file (SQLite DB), combines it with the matched (multiple) MGF files and writes a spectral library as 1 MS2PIP PEPREC and MGF file. Filters by a given FDR threshold, using q-values calculated from decoy hits or from Percolator.
-
-*MSF_to_MS2PIP_SpecLib.py*
-**Input:** MSF and MGF files
-**Output:** Matched PEPREC and MGF file
-
-```
-usage: MSF_to_MS2PIP_SpecLib.py [-h] [-s MSF_FOLDER] [-g MGF_FOLDER]
-                                [-o OUTNAME] [-f FDR_CUTOFF] [-p] [-c]
-
-Convert Sequest MSF and MGF to MS2PIP spectral library.
-
-optional arguments:
-  -h, --help            show this help message and exit
-  -s MSF_FOLDER, --msf MSF_FOLDER
-                        Folder with Sequest MSF files (default: "msf")
-  -g MGF_FOLDER, --mgf MGF_FOLDER
-                        Folder with MGF spectrum files (default: "mgf")
-  -o OUTNAME, --out OUTNAME
-                        Name for output files (default: "SpecLib")
-  -f FDR_CUTOFF, --fdr FDR_CUTOFF
-                        FDR cut-off value to filter PSMs (default: 0.01)
-  -p                    Use Percolator q-values instead of calculating them
-                        from TDS (default: False)
-  -c                    Combine multiple MSF files into one spectral library
-                        (default: False)
-```
-"""
-
-# --------------
-# Import modules
-# --------------
-import argparse
-import re
-import sqlite3
-import pandas as pd
-from glob import glob
-from os import getcwd
-
-
-# ----------------------------------------------------------------
-# Calculate q-Value from PeptideScores using Target-Decoy strategy
-# ----------------------------------------------------------------
-def CalcQVal(Peptides, conn):
-    PeptideScores = pd.read_sql("select * from 'PeptideScores';", conn)
-    PeptideScores_Decoy = pd.read_sql("select * from 'PeptideScores_Decoy';", conn)
-    if len(PeptideScores_Decoy) == 0:
-        print("No decoy PSMs found: cannot calculate q-value.")
-        exit(1)
-    PeptideScores['Decoy'] = False
-    PeptideScores_Decoy['Decoy'] = True
-    PeptideScores = PeptideScores.append(PeptideScores_Decoy)
-
-    PeptideScores.sort_values('ScoreValue', ascending=False, inplace=True)
-    PeptideScores['CumSum_Decoys'] = PeptideScores['Decoy'].cumsum()
-    PeptideScores['CumSum_Targets'] = (~PeptideScores['Decoy']).cumsum()
-    PeptideScores['q-Value'] = PeptideScores['CumSum_Decoys'] / PeptideScores['CumSum_Targets']
-    PeptideScores = PeptideScores[~PeptideScores['Decoy']][['PeptideID', 'q-Value']]
-
-    Peptides = Peptides.merge(PeptideScores, on='PeptideID')
-
-    return(Peptides)
-
-
-# ------------------------------------------------------
-# Get Percolator q-value and PEP out of CustomDataFields
-# ------------------------------------------------------
-def GetPercolatorQVal(Peptides, conn):
-    CustomDataFields = pd.read_sql("select FieldID, DisplayName from 'CustomDataFields';", conn)
-    CustomDataFields.index = CustomDataFields['FieldID']
-    CustomDataFields_dict = CustomDataFields['DisplayName'].to_dict()
-
-    CustomDataPeptides = pd.read_sql("select * from CustomDataPeptides;", conn)
-    CustomDataPeptides['FieldName'] = [CustomDataFields_dict[ID] for ID in CustomDataPeptides['FieldID']]
-    CustomDataPeptides = CustomDataPeptides.pivot(values='FieldValue', index='PeptideID', columns='FieldName').reset_index()
-
-    Peptides = Peptides.merge(CustomDataPeptides, on='PeptideID')
-
-    return(Peptides)
-
-
-# --------
-# Get PTMs
-# --------
-def ConcatPTMs(series):
-        return(series.str.cat(sep='|'))
-
-
-def GetPTMs(Peptides, conn):
-    AminoAcidModifications = pd.read_sql("select * from 'AminoAcidModifications'", conn)
-    PeptidesAminoAcidModifications = pd.read_sql("select * from 'PeptidesAminoAcidModifications'", conn)
-    PeptidesTerminalModifications = pd.read_sql("select * from 'PeptidesTerminalModifications'", conn)
-
-    # Get amino acid IDs and names in dict for easy access
-    AminoAcidModifications.index = AminoAcidModifications.AminoAcidModificationID
-    AminoAcidModifications_dict = AminoAcidModifications['ModificationName'].to_dict()
-
-    # Combine terminal and normal PTMs into one dataframe
-    PeptidesTerminalModifications.columns = ['ProcessingNodeNumber', 'PeptideID', 'AminoAcidModificationID']
-    PeptidesTerminalModifications['PositionMS2PIP'] = 0
-    PeptidesAminoAcidModifications['PositionMS2PIP'] = PeptidesAminoAcidModifications['Position'] + 1
-    Modifications = PeptidesAminoAcidModifications.append(PeptidesTerminalModifications, ignore_index=True)
-
-    # Translate PTM IDs to names
-    Modifications['ModificationName'] = [AminoAcidModifications_dict[ID] for ID in Modifications['AminoAcidModificationID']]
-
-    # Concatenate PTMs to MS2PIP notation per peptide
-    Modifications.sort_values('PositionMS2PIP', inplace=True)
-    Modifications['NotationMS2PIP'] = Modifications['PositionMS2PIP'].map(str) + '|' + Modifications['ModificationName']
-    MS2PIPmods = pd.DataFrame(Modifications.groupby('PeptideID')['NotationMS2PIP'].apply(ConcatPTMs))
-    MS2PIPmods.rename(columns={'NotationMS2PIP': 'Modifications'}, inplace=True)
-    MS2PIPmods.reset_index(inplace=True)
-    Peptides = Peptides.merge(MS2PIPmods, on='PeptideID')
-
-    return(Peptides)
-
-
-# ------------------------------------------
-# Get spectrum properties and RAW file names
-# ------------------------------------------
-def GetSpectrumProperties(Peptides, conn):
-    cols = 'SpectrumID, Charge, Mass, LastScan, FirstScan, ScanNumbers, MassPeakID'
-    SpectrumHeaders = pd.read_sql("select {} from SpectrumHeaders".format(cols), conn)
-
-    FileInfos = pd.read_sql("select FileID, FileName from FileInfos", conn)
-    FileInfos['FileName'] = FileInfos['FileName'].str.split('\\')
-    FileInfos['FileName'] = [name[-1].split('.raw')[0].split('.mzML')[0] for name in FileInfos['FileName']]
-
-    MassPeaks = pd.read_sql("select MassPeakID, FileID from MassPeaks;", conn)
-    MassPeaks = MassPeaks.merge(FileInfos, on='FileID')
-
-    SpectrumHeaders = SpectrumHeaders.merge(MassPeaks, on='MassPeakID')
-    Peptides = Peptides.merge(SpectrumHeaders, on='SpectrumID')
-
-    return(Peptides)
-
-
-# -------------------------------------------------------
-# Filter Peptide list for non-redundancy and low q-values
-# -------------------------------------------------------
-def FilterForSpecLib(Peptides, FDR_CutOff):
-    # Filter on q-value
-    len_before = len(Peptides)
-    Peptides = Peptides[Peptides['q-Value'] <= FDR_CutOff].copy()
-    len_after = len(Peptides)
-    print("q-value cut-off {}: Kept {} out of {} peptides.".format(FDR_CutOff, len_after, len_before))
-
-    # Remove duplicate peptides: keep the one with the highest q-value
-    len_before = len(Peptides)
-    Peptides.sort_values('q-Value', inplace=True)
-    Peptides = Peptides[~Peptides.duplicated(['Sequence', 'Modifications', 'Charge'], keep='first')].copy()
-    len_after = len(Peptides)
-    print("Removed duplicate peptides: Kept {} out of {} peptides.".format(len_after, len_before))
-
-    # Remove multiple peptides mapping to the same spectrum: keep the one with the highest q-value
-    len_before = len(Peptides)
-    Peptides = Peptides[~Peptides.duplicated(['SpectrumID', 'FileName'], keep='first')].copy()
-    len_after = len(Peptides)
-    print("Removed duplicate spectra: Kept {} out of {} peptides.".format(len_after, len_before))
-
-    # Annotate tryptic peptides
-    Peptides['Tryptic'] = ((Peptides['Sequence'].str.endswith('K')) | (Peptides['Sequence'].str.endswith('R')))
-
-    # Set PeptideID as index
-    Peptides.index = Peptides['PeptideID']
-
-    return(Peptides)
-
-
-# ------------------------------------------------------------------
-# Scan multiple MGF files for spectra present in Peptides data frame
-# ------------------------------------------------------------------
-def ScanMGF(df_in, mgf_folder, outname='SpecLib.mgf'):
-    with open(outname, 'w') as out:
-        count_runs = 0
-        count = 0
-        runs = df_in['FileName'].unique()
-        print("Scanning MGF files: {} runs to do. Now working on run: ".format(len(runs)), end='')
-        for run in runs:
-            count_runs += 1
-            if count_runs % 10 == 0:
-                print(str(count_runs), end='')
-            else:
-                print('.', end='')
-            spec_dict = dict((v, k) for k, v in df_in[(df_in['FileName'] == run)]['FirstScan'].to_dict().items())
-
-            # Parse file
-            found = False
-            with open('{}/{}.mgf'.format(mgf_folder, str(run)), 'r') as f:
-                for i, line in enumerate(f):
-                    if 'TITLE' in line:
-                        ScanNum = int(re.split('scan=|"', line)[-2])
-                        if ScanNum in spec_dict:
-                            found = True
-                            out.write("BEGIN IONS\n")
-                            line = "TITLE=" + str(spec_dict[ScanNum]) + '\n'
-                            count += 1
-                    if 'END IONS' in line:
-                        if found:
-                            out.write(line + '\n')
-                            found = False
-                    if found and line[-4:] != '0.0\n':
-                        out.write(line)
-
-    print("\n{}/{} spectra found and written to new MGF file.".format(count, len(df_in)))
-
-
-# ------------------------
-# Write MS2PIP PEPREC file
-# ------------------------
-def WritePEPREC(Peptides, outname='SpecLib.peprec'):
-    peprec = Peptides[['PeptideID', 'Modifications', 'Sequence', 'Charge']]
-    peprec.columns = ['spec_id', 'modifications', 'peptide', 'charge']
-    peprec.to_csv(outname, sep=' ', na_rep='-', index=None)
-
-
-# ---------------
-# Argument parser
-# ---------------
-def ArgParse():
-    parser = argparse.ArgumentParser(description='Convert Sequest MSF and MGF to MS2PIP spectral library.')
-    parser.add_argument('-s', '--msf', dest='msf_folder', action='store', default='msf',
-                        help='Folder with Sequest MSF files (default: "msf")')
-    parser.add_argument('-g', '--mgf', dest='mgf_folder', action='store', default='mgf',
-                        help='Folder with MGF spectrum files (default: "mgf")')
-    parser.add_argument('-o', '--out', dest='outname', action='store', default='SpecLib',
-                        help='Name for output files (default: "SpecLib")')
-    parser.add_argument('-f', '--fdr', dest='FDR_CutOff', action='store', default=0.01, type=float,
-                        help='FDR cut-off value to filter PSMs (default: 0.01)')
-    parser.add_argument('-p', dest='percolator', action='store_true', default=False,
-                        help='Use Percolator q-values instead of calculating them from TDS (default: False)')
-    parser.add_argument('-c', dest='combine_msf', action='store_true', default=False,
-                        help='Combine multiple MSF files into one spectral library (default: False)')
-    args = parser.parse_args()
-
-    return(args)
-
-
-# ----
-# Run!
-# ----
-def run():
-    args = ArgParse()
-    msf_files = glob("{}/*.msf".format(args.msf_folder))
-    msf_count = 0
-
-    if args.combine_msf:
-        Peptides = pd.DataFrame()
-        for msf_filename in msf_files:
-            msf_count += 1
-            print("Adding MSF file {} of {}...".format(msf_count, len(msf_files)))
-            conn = sqlite3.connect(msf_filename)
-            Peptides_tmp = pd.read_sql("select * from Peptides;", conn)
-            if args.percolator:
-                Peptides_tmp = GetPercolatorQVal(Peptides_tmp, conn)
-            else:
-                Peptides_tmp = CalcQVal(Peptides_tmp, conn)
-            Peptides_tmp = GetPTMs(Peptides_tmp, conn)
-            Peptides_tmp = GetSpectrumProperties(Peptides_tmp, conn)
-            Peptides = Peptides.append(Peptides_tmp)
-
-        Peptides = FilterForSpecLib(Peptides, args.FDR_CutOff)
-
-        outname_appendix = msf_files[0]
-        outname_appendix = outname_appendix.replace('/', '')
-        outname_appendix = outname_appendix.replace('.msf', '')
-
-        ScanMGF(Peptides, args.mgf_folder, outname='{}_{}_Combined.mgf'.format(args.outname, outname_appendix))
-        WritePEPREC(Peptides, outname='{}_{}_Combined.peprec'.format(args.outname, outname_appendix))
-
-    else:
-        for msf_filename in msf_files:
-            msf_count += 1
-            print("\nWorking on MSF file {} of {}...".format(msf_count, len(msf_files)))
-            conn = sqlite3.connect(msf_filename)
-            Peptides = pd.read_sql("select * from Peptides;", conn)
-            if args.percolator:
-                Peptides = GetPercolatorQVal(Peptides, conn)
-            else:
-                Peptides = CalcQVal(Peptides, conn)
-            Peptides = GetPTMs(Peptides, conn)
-            Peptides = GetSpectrumProperties(Peptides, conn)
-            Peptides = FilterForSpecLib(Peptides, args.FDR_CutOff)
-
-            outname_appendix = msf_filename
-            outname_appendix = outname_appendix.replace('/', '')
-            outname_appendix = outname_appendix.replace('.msf', '')
-
-            ScanMGF(Peptides, args.mgf_folder, outname='{}_{}.mgf'.format(args.outname, outname_appendix))
-            WritePEPREC(Peptides, outname='{}_{}.peprec'.format(args.outname, outname_appendix))
-
-    print("Ready!")
-
-
-if __name__ == "__main__":
-    run()
diff --git a/conversion_tools/Split_MS2PIP_SpecLib.py b/conversion_tools/Split_MS2PIP_SpecLib.py
deleted file mode 100644
index b459a3ee..00000000
--- a/conversion_tools/Split_MS2PIP_SpecLib.py
+++ /dev/null
@@ -1,118 +0,0 @@
-"""
-## Split MS2PIP Spectral Library
-Split MS2PIP spectral library (PEPREC and MGF file) into a train and test set.
-
-*Split_MS2PIP_SpecLib.py*
-**Input:** PEPREC and MGF file
-**Output:** PEPREC and MGF files for both train and test data set.
-
-```
-usage: Split_MS2PIP_SpecLib.py [-h] [-o OUT_FILENAME] [-f TEST_FRACTION]
-                               peprec_file mgf_file
-
-Split MS2PIP spectral library (PEPREC and MGF file) into a train and test set.
-
-positional arguments:
-  peprec_file       PEPREC file input
-  mgf_file          MGF file input
-
-optional arguments:
-  -h, --help        show this help message and exit
-  -o OUT_FILENAME   Name for output files (default: "SpecLib")
-  -f TEST_FRACTION  Fraction of input to use for test data set (default: 0.1)
-```
-"""
-
-# --------------
-# Import modules
-# --------------
-import argparse
-import numpy as np
-import pandas as pd
-
-
-# --------------
-# Split PEPREC
-# --------------
-def SplitPeprec(peprec, test_fraction):
-    msk = np.random.rand(len(peprec)) > test_fraction
-    peprec_train = peprec[msk]
-    peprec_test = peprec[~msk]
-    return(peprec_train, peprec_test)
-
-
-# --------------
-# Split MGF file
-# --------------
-def SplitMGF(peprec_train, peprec_test, mgf_filename, out_filename='SpecLib'):
-    count_train = 0
-    count_test = 0
-    print("{} spectra to go...".format(len(peprec_train) + len(peprec_test)), end='')
-    with open('{}_Train.mgf'.format(out_filename), 'w') as out_train:
-        with open('{}_Test.mgf'.format(out_filename), 'w') as out_test:
-            spec_ids_train = list(peprec_train['spec_id'])
-            spec_ids_test = list(peprec_test['spec_id'])
-            found_train = False
-            found_test = False
-            with open(mgf_filename, 'r') as mgf:
-                for i, line in enumerate(mgf):
-                    if 'TITLE' in line:
-                        spec_id = int(line.split('TITLE=')[1])
-                        if spec_id in spec_ids_train:
-                            found_train = True
-                            count_train += 1
-                            out_train.write("BEGIN IONS\n")
-                            if count_train % 100 == 0:
-                                print('.', end='')
-                        elif spec_id in spec_ids_test:
-                            found_test = True
-                            count_test += 1
-                            out_test.write("BEGIN IONS\n")
-                            if count_test % 100 == 0:
-                                print('.', end='')
-                    if 'END IONS' in line:
-                        if found_train:
-                            out_train.write(line + '\n')
-                            found_train = False
-                        elif found_test:
-                            out_test.write(line + '\n')
-                            found_test = False
-                    if found_train:
-                        out_train.write(line)
-                    elif found_test:
-                        out_test.write(line)
-
-    print("\nReady! Wrote {}/{} spectra to {}_Train.mgf and {}/{} spectra to {}_Test.mgf.".format(count_train, len(peprec_train),
-                                                                                                  out_filename, count_test, len(peprec_test),
-                                                                                                  out_filename))
-
-
-# ---------------
-# Argument parser
-# ---------------
-def ArgParse():
-    parser = argparse.ArgumentParser(description='Split MS2PIP spectral library (PEPREC and MGF file) into a train and test set.')
-    parser.add_argument('peprec_file', action='store', help='PEPREC file input')
-    parser.add_argument('mgf_file', action='store', help='MGF file input')
-    parser.add_argument('-o', dest='out_filename', action='store', default='SpecLib',
-                        help='Name for output files (default: "SpecLib")')
-    parser.add_argument('-f', dest='test_fraction', action='store', default=0.1, type=float,
-                        help='Fraction of input to use for test data set (default: 0.1)')
-    args = parser.parse_args()
-    return(args)
-
-
-# ----
-# Run!
-# ----
-def run():
-    args = ArgParse()
-    peprec = pd.read_csv(args.peprec_file, sep=' ')
-    (peprec_train, peprec_test) = SplitPeprec(peprec, args.test_fraction)
-    SplitMGF(peprec_train, peprec_test, args.mgf_file, args.out_filename)
-    peprec_train.to_csv('{}_Train.peprec'.format(args.out_filename), sep=' ', na_rep='-', index=False)
-    peprec_test.to_csv('{}_Test.peprec'.format(args.out_filename), sep=' ', na_rep='-', index=False)
-
-
-if __name__ == "__main__":
-    run()
diff --git a/conversion_tools/blib_converter.py b/conversion_tools/blib_converter.py
deleted file mode 100755
index b38429b4..00000000
--- a/conversion_tools/blib_converter.py
+++ /dev/null
@@ -1,151 +0,0 @@
-#!/usr/bin/env python3
-import argparse
-import sys
-import zlib
-import numpy
-import sqlalchemy
-import pyteomics.mgf
-import ms2pip.config_parser
-import ms2pip.peptides
-
-from collections import defaultdict, namedtuple
-from sqlalchemy import (MetaData, Table, Column, Integer, String, Float,
-                        ForeignKey, TypeDecorator)
-from sqlalchemy.sql import select
-from sqlalchemy.dialects.sqlite import BLOB
-
-
-class CompressedArray(TypeDecorator):
-    """Compressed binary arrays stored as BLOB in SQLite."""
-
-    impl = BLOB
-
-    def __init__(self, dtype, *args, **kwargs):
-        super().__init__(*args, **kwargs)
-        self.dtype = dtype
-
-    def process_bind_param(self, value, dialect):
-        # TODO: allow configuration of compression level
-        return zlib.compress(value.tobytes(), 3)
-
-    def process_result_value(self, value, dialect):
-        decompressed = zlib.decompress(value)
-        return numpy.frombuffer(decompressed, dtype=self.dtype)
-
-    def copy(self):
-        return CompressedArray(self.dtype, self.impl.length)
-
-
-metadata = MetaData()
-
-RefSpectra = Table('RefSpectra', metadata,
-                   Column('id', Integer, primary_key=True),
-                   Column('peptideSeq', String(200)),
-                   Column('precursorMZ', Float),
-                   Column('precursorCharge', Integer),
-                   Column('retentionTime', Float),
-                   )
-
-Modifications = Table('Modifications', metadata,
-                      Column('id', Integer, primary_key=True),
-                      Column('RefSpectraID', Integer, ForeignKey('RefSpectra.id')),
-                      Column('position', Integer),
-                      Column('mass', Float),
-                      )
-
-RefSpectraPeaks = Table('RefSpectraPeaks', metadata,
-                        Column('RefSpectraID', Integer, ForeignKey('RefSpectra.id')),
-                        Column('peakMZ', CompressedArray(numpy.double)),
-                        Column('peakIntensity', CompressedArray(numpy.single)),
-                        )
-
-Modification = namedtuple("Modification", "name, amino_acid")
-
-
-def open_sqlite(filename):
-    engine = sqlalchemy.create_engine(f"sqlite:///{filename}")
-    metadata.bind = engine
-    return engine.connect()
-
-
-def get_modification_config(config_file):
-    mods = defaultdict(list)
-
-    config = ms2pip.config_parser.ConfigParser(filepath=config_file)
-    modifications = ms2pip.peptides.Modifications()
-    modifications.add_from_ms2pip_modstrings(config.config['ms2pip']['ptm'])
-
-    for name, mod in modifications._all_modifications:
-        mods[mod['mass_shift']].append(Modification(name, mod['amino_acid']))
-    return mods
-
-
-def read_peptides(filename, modifications):
-    with open_sqlite(filename) as connection:
-        for spec in connection.execute(RefSpectra.select()):
-            spec_mods = []
-            for modification in connection.execute(Modifications.select().where(Modifications.c.RefSpectraID == spec.id)):
-                for mod in modifications[modification.mass]:
-                    if mod.amino_acid == 'N-term' and modification.position == 1:
-                        spec_mods.append(f"0|{mod.name}")
-                    elif mod.amino_acid == 'C-term' and modification.position == len(spec.peptideSeq):
-                        spec_mods.append(f"-1|{mod.name}")
-                    elif mod.amino_acid == spec.peptideSeq[modification.position-1]:
-                        spec_mods.append(f"{modification.position}|{mod.name}")
-            spec_mods = '|'.join(spec_mods) if spec_mods else '-'
-            yield f"{spec.id} {spec_mods} {spec.peptideSeq} {spec.precursorCharge}"
-
-
-def read_spectra(filename):
-    with open_sqlite(filename) as connection:
-        for spec in connection.execute(select([RefSpectra, RefSpectraPeaks]).select_from(RefSpectra.join(RefSpectraPeaks, RefSpectra.c.id == RefSpectraPeaks.c.RefSpectraID))):
-            yield {
-                'm/z array': spec.peakMZ,
-                'intensity array': spec.peakIntensity,
-                'params': {
-                    'title': spec.id,
-                    'pepmass': spec.precursorMZ,
-                    'charge': spec.precursorCharge,
-                    'rtinseconds': spec.retentionTime
-                }
-            }
-
-
-def blib_to_peprec(blib_filename, peprec_file, modifications):
-    for peptide in read_peptides(blib_filename, modifications):
-        peprec_file.write(f"{peptide}\n")
-    peprec_file.flush()
-
-
-def blib_to_mgf(blib_filename, mgf_file):
-    pyteomics.mgf.write(read_spectra(blib_filename), output=mgf_file)
-
-
-def main():
-    parser = argparse.ArgumentParser(description='Convert BiblioSpec Spectral Library to peprec and/or MGF files.')
-    parser.add_argument('blib_filename',
-                        help='input blib file')
-    parser.add_argument('--peprec', nargs='?', type=argparse.FileType('w'),
-                        const=sys.stdout,
-                        help='write peprec file')
-    parser.add_argument('--mgf', nargs='?', type=argparse.FileType('w'),
-                        const=sys.stdout,
-                        help='write MGF file')
-    parser.add_argument('--config',
-                        help='MS2PIP config file')
-
-    args = parser.parse_args()
-
-    if args.peprec and not args.config:
-        parser.error("--peprec requires --config")
-
-    if args.peprec:
-        modifications = get_modification_config(args.config)
-        blib_to_peprec(args.blib_filename, args.peprec, modifications)
-
-    if args.mgf:
-        blib_to_mgf(args.blib_filename, args.mgf)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/conversion_tools/calc_correlations.py b/conversion_tools/calc_correlations.py
deleted file mode 100644
index 4133304e..00000000
--- a/conversion_tools/calc_correlations.py
+++ /dev/null
@@ -1,36 +0,0 @@
-"""
-Calculate correlations from a pred_and_emp.csv file or calculate median
-correlations by ion type from correlations.csv
-"""
-
-
-import argparse
-import pandas as pd
-from ms2pipC import calc_correlations
-
-
-def argument_parser():
-    parser = argparse.ArgumentParser()
-    parser.add_argument("pae", metavar="<pae or correlations file>",
-                        help="pred_and_emp.csv or correlations.csv file")
-    args = parser.parse_args()
-    return args
-
-
-def main():
-    args = argument_parser()
-    all_preds = pd.read_csv(args.pae)
-    if args.pae.endswith('pred_and_emp.csv'):
-        print('Computing correlations...')
-        correlations = calc_correlations(all_preds)
-        output_filename = args.pae.replace('pred_and_emp.csv', 'correlations.csv')
-        print("Writing to {}".format(output_filename))
-        correlations.to_csv("{}_correlations.csv".format(output_filename), index=True)
-    else:
-        correlations = all_preds
-    print("Median correlations: ")
-    print(correlations.groupby('ion')['pearsonr'].median())
-
-
-if __name__ == '__main__':
-    main()
diff --git a/conversion_tools/calc_perc_found_peaks.py b/conversion_tools/calc_perc_found_peaks.py
deleted file mode 100644
index 8afcd4ec..00000000
--- a/conversion_tools/calc_perc_found_peaks.py
+++ /dev/null
@@ -1,53 +0,0 @@
-"""
-Calculate percentage of found peaks
-
-Returns the percentage of values in the series that ar not close to log2(0.001),
-which is the value MS2PIP gives a peak if it was not found in the spectrum.
-"""
-
-import sys
-from math import isclose
-
-import numpy as np
-import pandas as pd
-
-
-def calc_percentage_found_peaks(series):
-    """
-    Calculate percentage of found peaks
-    
-    Returns the percentage of values in the series that ar not close to log2(0.001),
-    which is the value MS2PIP gives a peak if it was not found in the spectrum.
-    """
-    zero = np.log2(0.001)
-    try:
-        x = series.apply(lambda x: isclose(x, zero, abs_tol=0.001)).value_counts(
-            normalize=True
-        )[False]
-    except KeyError:
-        x = 0.0
-    return x
-
-
-def main():
-    if sys.argv[1].endswith(".csv"):
-        vectors = pd.read_csv(sys.argv[1])
-    elif sys.argv[1].endswith(".h5"):
-        vectors = pd.read_hdf(sys.argv[1], key="table")
-    elif sys.argv[1].endswith("pkl"):
-        vectors = pd.read_pkl(sys.argv[1])
-    else:
-        print("Unknown file extension")
-        exit(1)
-
-    for ion_type in [
-        col.split("targets_")[1]
-        for col in vectors.columns
-        if col.startswith("targets_")
-    ]:
-        perc_found = calc_percentage_found_peaks(vectors["targets_" + ion_type])
-        print("Percentage of found {}-ion peaks: {}".format(ion_type, perc_found))
-
-
-if __name__ == "__main__":
-    main()
diff --git a/conversion_tools/extpsmreport_to_peprec.py b/conversion_tools/extpsmreport_to_peprec.py
deleted file mode 100644
index d7b4c98b..00000000
--- a/conversion_tools/extpsmreport_to_peprec.py
+++ /dev/null
@@ -1,130 +0,0 @@
-"""
-Extended PSM Report to PEPREC
-extpsmreport_to_peprec.py
-
-Create a PEPREC file out of a PeptideShaker Extended PSM Report. Attention: The
-resulting PEPREC file will contain all PSMs and is not filtered on FDR or decoy
-hits.
-"""
-
-__author__ = "Tim Van Den Bossche"
-__credits__ = ["Tim Van Den Bossche", "Ralf Gabriels", "Sven Degroeve", "Lennart Martens"]
-__license__ = "Apache License, Version 2.0"
-__version__ = "0.1"
-__email__ = "Ralf.Gabriels@ugent.be"
-
-
-# Native libraries
-import argparse
-
-# Third party libraries
-import pandas as pd
-
-
-def modifications(modified_seq):
-    # initiate variables for nterm, seq and cterm
-    mod_list = list()
-    nterm, seq, cterm = modified_seq.split("-")
-
-    # initiatle variable for nterm
-    pyro_bool = False
-
-    # initiate variables for seq
-    mod_index = 0
-    mod_description = False  # to check if it's an amino acid (False) or a description in < ... > (True)
-
-    # check amino terminus for modifications
-    if nterm == "ace":
-        mod_list.append("0|Acetyl")
-    elif nterm == "pyro":
-        pyro_bool = True
-    elif nterm != "NH2":
-        print("Unknown N-terminal modification: {}".format(nterm))
-
-    # check internal sequence
-    for char in seq:
-        if char == "<":
-            mod_peprec = "{}|".format(mod_index)
-            mod_name = ""
-            mod_description = True
-        elif char == ">":
-            mod_description = False
-            if mod_name == 'ox':
-                mod_peprec += 'Oxidizedmethionine'  # allow only oxidation of Met!!
-            elif mod_name == 'cmm':
-                mod_peprec += 'Carbamidomethylation'
-            elif mod_name == 'deam':
-                mod_peprec += 'Deamidated'
-            else:
-                print("Unknown internal modification: {}".format(mod_name))
-            mod_list.append("{}".format(mod_peprec))  # peprec format
-            mod_peprec = ""
-
-        else:
-            if pyro_bool:
-                if char == 'C':
-                    mod_name = "Pyro-carbamidomethyl"
-                elif char == 'Q':
-                    mod_name = "Gln->pyro-Glu"
-                elif char == 'E':
-                    mod_name = "Glu->pyro-Glu"
-                elif char == 'P':
-                    mod_name = "Pro->pyro-Glu"
-                else:
-                    print("Unknown N-terminal pyro modification from {}".format(char))
-                mod_list.append("1|{}".format(mod_name))
-                pyro_bool = False
-                mod_index += 1
-                mod_name = ""
-            else:
-                if mod_description:
-                    mod_name += char
-                else:
-                    mod_index += 1
-
-    mods_peprec = "|".join(mod_list)
-    if mods_peprec == "":
-        mods_peprec = "-"
-    return mods_peprec
-
-
-def ArgParse():
-    parser = argparse.ArgumentParser(description='Create a PEPREC file out of a PeptideShaker Extended PSM Report.')
-    parser.add_argument("input", metavar="<extended_psm_report>",
-                        help="Path to input Extended PSM Report")
-    parser.add_argument('-o', '--out', dest='outname', action='store', default='SpecLib',
-                        help='Name for output file (default: "SpecLib")')
-    args = parser.parse_args()
-
-    return(args)
-
-
-def main():
-    args = ArgParse()
-
-    df = pd.read_excel(str(args.input))
-
-    spec_id = list()
-    mods_peprec = list()
-    peptide = list()
-    charge = list()
-    label = list()
-
-    for index, row in df.iterrows():
-        spec_id.append(row["Spectrum Title"])
-        mods_peprec.append(modifications(row["Modified Sequence"]))
-        peptide.append(row["Sequence"])
-        charge.append(row["Measured Charge"][:-1])
-        if row["Decoy"] == 0:
-            label.append(1)
-        elif row["Decoy"] == 1:
-            label.append(-1)
-        else:
-            print("No target/decoy label available for {}".format(row["Spectrum Title"]))
-
-    d = {"spec_id": spec_id, "modifications": mods_peprec, "peptide": peptide, "charge": charge, "label":label}
-    peprec_df = pd.DataFrame(data=d, columns=["spec_id", "modifications", "peptide", "charge", "label"])
-    peprec_df.to_csv('{}.peprec'.format(args.outname), index=False, sep="\t", quotechar="'")
-
-if __name__ == "__main__":
-    main()
diff --git a/conversion_tools/fasta_to_peprec.py b/conversion_tools/fasta_to_peprec.py
deleted file mode 100644
index 02a95e06..00000000
--- a/conversion_tools/fasta_to_peprec.py
+++ /dev/null
@@ -1,71 +0,0 @@
-"""
-## Create PEPREC file from a FASTA file
-Takes every protein in a FASTA file, generates a PEPREC file with all tryptic peptides (check global variables to set charge states, min/max lengths and number of missed cleavages)
-
-*fasta_to_peprec.py*
-**Input:** FASTA file
-**Output:** PEPREC file
-
-**Requirements:** Biopython to parse FASTA file; Pyteomics for in silico cleavage; tqdm for progress bar.
-
-```
-usage: fasta_to_peprec.py [-h] fasta_file
-
-Generate a PEPREC file for all proteins in a fasta file
-
-positional arguments:
-  fasta_file             FASTA file with proteins of interest
-
-optional arguments:
-  -h, --help            show this help message and exit
-```
-"""
-
-# --------------
-# Import modules
-# --------------
-from Bio import SeqIO
-from pyteomics.parser import cleave
-from pyteomics.parser import expasy_rules
-import argparse
-from tqdm import tqdm
-
-parser = argparse.ArgumentParser()
-
-parser.add_argument('fasta_file', help='FASTA file with all proteins of interest')
-
-args = parser.parse_args()
-
-# --------------
-# Global variables
-# --------------
-CHARGES = [1, 2, 3]
-NUM_MISSED = 2
-MIN_LEN = 8
-MAX_LEN = 50
-
-peprec_file = open(args.fasta_file.replace('.fasta', '.PEPREC'), 'wt')
-peprec_file.write('spec_id modifications peptide charge protein\n')
-
-n_prots = (sum(1 for x in SeqIO.parse(open(args.fasta_file),'fasta')))
-print('{} proteins in fasta file \n'.format(n_prots))
-
-fasta_sequences = SeqIO.parse(open(args.fasta_file),'fasta')
-aa = set(["A", "C", "D", "E", "F", "G", "H", "L", "I", "K", "M", "N", "P", "Q", "R", "S", "T", "V", "W", "Y"])
-
-i = 0 # identifier for each (peptide, charge)
-
-for fasta in tqdm(fasta_sequences, total=n_prots):
-    prot, seq = fasta.id, str(fasta.seq)
-    # seq = seq.replace('L','I')
-    RKcut = cleave(seq, '[KR]', NUM_MISSED, MIN_LEN)
-    # RKcut = cleave(seq, expasy_rules['trypsin'], NUM_MISSED, MIN_LEN)
-    for k in RKcut:
-        if (set(k) <= aa) & (len(k) <= MAX_LEN): # to exclude peptides with aa which aren't the standard
-            for charge in CHARGES:
-                i += 1
-                to_write = '{} {} {} {} {}\n'.format(i, '', k, charge, prot)
-                peprec_file.write(to_write)
-        else: continue
-
-peprec_file.close()
diff --git a/conversion_tools/maxquant_to_peprec.py b/conversion_tools/maxquant_to_peprec.py
deleted file mode 100644
index 9162cb09..00000000
--- a/conversion_tools/maxquant_to_peprec.py
+++ /dev/null
@@ -1,191 +0,0 @@
-__author__ = "Ralf Gabriels"
-__credits__ = ["Ralf Gabriels", "Sven Degroeve", "Lennart Martens"]
-__license__ = "Apache License, Version 2.0"
-__version__ = "0.1"
-__email__ = "Ralf.Gabriels@ugent.be"
-
-
-# Native libraries
-import re
-import logging
-import argparse
-
-# Third party libraries
-import pandas as pd
-from tqdm import tqdm
-
-
-def maxquant_to_peprec(evidence_file, msms_file,
-                       ptm_mapping={'(ox)': 'Oxidation', '(ac)': 'Acetyl', '(cm)': 'Carbamidomethyl'},
-                       fixed_modifications=[]):
-    """
-    Make an MS2PIP PEPREC file starting from the MaxQuant Evidence.txt and
-    MSMS.txt files.
-
-    Positional arguments:
-    `evidence_file`: str with the file location of the Evidence.txt file
-    `msms_file`: str with the file location of the MSMS.txt file
-
-    Keyword arguments:
-    `ptm_mapping` (dict) is used to convert the MaxQuant PTM labels to PSI-MS
-    modification names. For correct parsing, the key should always include the
-    two brackets.
-    `fixed_modifications` (list of tuples, [(aa, ptm)]) can contain fixed
-    modifications to be added to the peprec. E.g. `[('C', 'cm')]`. The first
-    tuple element contains the one-letter amino acid code. The second tuple
-    element contains a two-character label for the PTM. This PTM also needs
-    to be present in the `ptm_mapping` dictionary.
-    """
-
-    for (aa, mod) in fixed_modifications:
-        if re.fullmatch('[A-Z]|n_term', aa) is None:
-            raise ValueError("Fixed modification amino acid `{}` should be a single capital character (e.g. `C`) or `n_term`.".format(aa))
-        if re.fullmatch('[a-z]{2}', mod) is None:
-            raise ValueError("Fixed modification label `{}` can only contain two non-capital characters. E.g. `cm`".format(mod))
-
-    logging.debug("Start converting MaxQuant output to MS2PIP PEPREC")
-
-    # Read files and merge into one dataframe
-    evidence = pd.read_csv(evidence_file, sep='\t')
-    msms = pd.read_csv(msms_file, sep='\t', low_memory=False)
-    m = evidence.merge(msms[['Scan number', 'Scan index', 'id']], left_on='Best MS/MS', right_on='id')
-
-    # Filter data
-    logging.debug("Removing decoys, contaminants, spectrum redundancy and peptide redundancy")
-    len_dict = {
-        'len_original': len(m),
-        'len_rev': len(m[m['Reverse'] == '+']),
-        'len_con': len(m[m['Potential contaminant'] == '+']),
-        'len_spec_red': len(m[m.duplicated(['Raw file', 'Scan number'], keep='first')]),
-        'len_pep_red': len(m[m.duplicated(['Sequence', 'Modifications', 'Charge'], keep='first')])
-    }
-
-    m = m.sort_values('Score', ascending=False)
-    m = m[m['Reverse'] != '+']
-    m = m[m['Potential contaminant'] != '+']
-    m = m[~m.duplicated(['Raw file', 'Scan number'], keep='first')]
-    m = m[~m.duplicated(['Sequence', 'Modifications', 'Charge'], keep='first')]
-    m.sort_index(inplace=True)
-
-    len_dict['len_filtered'] = len(m)
-
-    print_psm_counts = """
-    Original number of PSMs: {len_original}
-     - {len_rev} decoys
-     - {len_con} potential contaminants
-     - {len_spec_red} redundant spectra
-     - {len_pep_red} redundant peptides
-    = {len_filtered} PSMs in spectral library
-    """.format(**len_dict)
-
-    logging.info(print_psm_counts)
-    logging.info("Spectral library FDR estimated from PEPs: {:.4f}".format(m['PEP'].sum()/len(m)))
-
-    # Parse modifications
-    logging.debug("Parsing modifications")
-    for aa, mod in fixed_modifications:
-        if aa == 'n_term':
-            m['Modified sequence'] = ['_({})'.format(mod) + seq[1:] if seq[:2] != '_(' else seq for seq in m['Modified sequence']]
-        else:
-            m['Modified sequence'] = m['Modified sequence'].str.replace(aa, '{}({})'.format(aa, mod))
-
-    pattern = r'\([a-z].\)'
-    m['Parsed modifications'] = ['|'.join(['{}|{}'.format(m.start(0) - 1 - i*4, ptm_mapping[m.group()]) for i, m in enumerate(re.finditer(pattern, s))]) for s in m['Modified sequence']]
-    m['Parsed modifications'] = ['-' if mods == '' else mods for mods in m['Parsed modifications']]
-
-    # Prepare PEPREC columns
-    logging.debug("Preparing PEPREC columns")
-    m['Protein list'] = m['Proteins'].str.split(';')
-    m['MS2PIP ID'] = range(len(m))
-    peprec_cols = ['MS2PIP ID', 'Sequence', 'Parsed modifications', 'Charge', 'Protein list',
-                   'Retention time', 'Score', 'Delta score', 'PEP', 'Scan number', 'Scan index', 'Raw file']
-    peprec = m[peprec_cols].sort_index().copy()
-
-    col_mapping = {
-        'MS2PIP ID': 'spec_id',
-        'Sequence': 'peptide',
-        'Parsed modifications': 'modifications',
-        'Charge': 'charge',
-        'Protein list': 'protein_list',
-        'Retention time': 'rt',
-        'Score': 'andromeda_score',
-        'Delta score': 'andromeda_delta_score',
-        'PEP': 'andromeda_pep',
-        'Scan number': 'scan_number',
-        'Scan index': 'scan_index',
-        'Raw file': 'raw_file'
-    }
-
-    peprec = peprec.rename(columns=col_mapping)
-
-    logging.debug('PEPREC ready!')
-
-    return peprec
-
-
-def scan_mgf(peprec, mgf_folder, filename_col='raw_file', scan_num_col='scan_number', outname='SpecLib.mgf'):
-    with open(outname, 'w') as out:
-        count_runs = 0
-        count = 0
-        runs = peprec[filename_col].unique()
-        logging.info("Scanning MGF files...")
-        for run in tqdm(runs):
-            spec_dict = dict(('msmsid:F{:06d}'.format(v), k) for k, v in peprec[(peprec[filename_col] == run)][scan_num_col].to_dict().items())
-
-            # Parse file
-            found = False
-            with open('{}/{}.mgf'.format(mgf_folder, str(run)), 'r') as f:
-                for i, line in enumerate(f):
-                    if 'TITLE' in line:
-                        scan_num = re.split('TITLE=|,', line)[1]  # Edit to match MGF title notation
-                        if scan_num in spec_dict:
-                            found = True
-                            out.write("BEGIN IONS\n")
-                            line = "TITLE=" + str(spec_dict[scan_num]) + '\n'
-                            count += 1
-                    if 'END IONS' in line:
-                        if found:
-                            out.write(line + '\n')
-                            found = False
-                    if found and line[-4:] != '0.0\n':
-                        out.write(line)
-    logging.info("%i/%i spectra found and written to new MGF file.", count, len(peprec))
-
-
-def ArgParse():
-    parser = argparse.ArgumentParser(description='Convert MaxQuant txt files and MGF to MS2PIP spectral library.')
-    parser.add_argument('-t', '--txt', dest='txt_folder', action='store', default='txt',
-                        help='Folder with MaxQuant txt files (default: "msf")')
-    parser.add_argument('-g', '--mgf', dest='mgf_folder', action='store', default='mgf',
-                        help='Folder with MGF spectrum files (default: "mgf")')
-    parser.add_argument('-o', '--out', dest='outname', action='store', default='SpecLib',
-                        help='Name for output files (default: "SpecLib")')
-    args = parser.parse_args()
-
-    return(args)
-
-
-def main():
-    args = ArgParse()
-
-    logging.basicConfig(
-        format='%(asctime)s - %(levelname)s - %(message)s',
-        datefmt='%Y-%m-%d %H:%M:%S',
-        level=logging.DEBUG
-    )
-
-    peprec = maxquant_to_peprec(
-        '{}/evidence.txt'.format(args.txt_folder),
-        '{}/msms.txt'.format(args.txt_folder),
-        ptm_mapping={'(ox)': 'Oxidation', '(ac)': 'Acetyl', '(cm)': 'Carbamidomethyl', '(tn)': 'TMT6plexN', '(tk)': 'TMT6plex'},
-        fixed_modifications=[('C', 'cm'), ('n_term', 'tn'), ('K', 'tk')]
-    )
-
-    peprec.to_csv('{}.peprec'.format(args.outname), sep=' ', index=False)
-
-    scan_mgf(peprec, args.mgf_folder, outname='{}.mgf'.format(args.outname))
-
-
-
-if __name__ == '__main__':
-    main()
diff --git a/conversion_tools/misc_functions.py b/conversion_tools/misc_functions.py
deleted file mode 100644
index f9ef1102..00000000
--- a/conversion_tools/misc_functions.py
+++ /dev/null
@@ -1,85 +0,0 @@
-"""
-Miscellaneous functions regarding MS2PIP file conversions.
-"""
-
-
-import re
-import pandas as pd
-
-
-def add_fixed_mods(peprec, fixed_mods=None, n_term=None, c_term=None):
-    """
-    Add 'fixed' modifications to all peptides in an MS2PIP PEPREC file.
-    Return list with MS2PIP modifications with fixed mods added.
-
-    Positional arguments:
-    peprec - MS2PIP PEPREC DataFrame
-
-    Keyword arguments:
-    fixed_mods - List of tuples. First tuple element is amino acid, second tuple
-    element is modification name. E.g. `[('K', 'TMT6plex')]`
-    n_term - Name of fixed N-terminal modification to add
-    c_term - Name of fixed C-terminal modification to add
-    """
-
-    if not fixed_mods:
-        fixed_mods = []
-
-    result = []
-
-    for _, row in peprec.iterrows():
-        mods = row['modifications']
-        if mods == '-':
-            mods = []
-        else:
-            mods = mods.split('|')
-
-        current_mods = list(zip([int(i) for i in mods[::2]], mods[1::2]))
-
-        for aa, mod in fixed_mods:
-            current_mods.extend([(m.start()+1, mod) for m in re.finditer(aa, row['peptide'])])
-
-        if n_term and not 0 in [i for i, n in current_mods]:
-            current_mods.append((0, n_term))
-        if c_term and not -1 in [i for i, n in current_mods]:
-            current_mods.append((-1, c_term))
-
-        current_mods = sorted(current_mods, key=lambda x: x[0])
-        current_mods = '|'.join(['|'.join(m) for m in [(str(i), n) for i, n in current_mods]])
-        result.append(current_mods)
-
-    return result
-
-
-def peprec_add_charges(peprec_filename, mgf_filename, overwrite=False):
-    """
-    Get precursor charges from MGF file and add them to a PEPREC
-    """
-    peprec = pd.read_csv(peprec_filename, sep=' ', index_col=None)
-
-    if not overwrite and 'charge' in peprec.columns:
-        print('Charges already in PEPREC')
-        return None
-
-    spec_count = 0
-    charges = {}
-    with open(mgf_filename, 'rt') as f:
-        for line in f:
-            if line.startswith('TITLE='):
-                title = line[6:].strip()
-                spec_count += 1
-            if line.startswith('CHARGE='):
-                charge = line[7:].strip()
-                charges[title] = charge
-
-    if not spec_count == len(charges):
-        print('Something went wrong')
-        return None
-
-    peprec['charge'] = peprec['spec_id'].map(charges)
-    
-    new_peprec_filename = re.sub('\.peprec$|\.PEPREC$', '', peprec_filename) + '_withcharges.peprec'
-    peprec.to_csv(new_peprec_filename, sep=' ', index=False)
-
-    print('PEPREC with charges written to ' + new_peprec_filename)
-    return peprec
\ No newline at end of file
diff --git a/conversion_tools/mztab_to_peprec.py b/conversion_tools/mztab_to_peprec.py
deleted file mode 100644
index a08e6301..00000000
--- a/conversion_tools/mztab_to_peprec.py
+++ /dev/null
@@ -1,115 +0,0 @@
-"""
-Convert mzTab to MS2PIP PEPREC spectral library (only unique peptides)
-"""
-
-import argparse
-import pandas as pd
-
-
-def argument_parser():
-    parser = argparse.ArgumentParser(
-        description='Convert mzTab to MS2PIP PEPREC spectral library (only\
-        unique peptides)'
-    )
-    parser.add_argument(
-        'filename', metavar="<mztab filename>",
-        help='filename of mzTab to convert.'
-    )
-    args = parser.parse_args()
-    return args
-
-
-def get_peptide_table_start(filename):
-    """
-    Get line number of mzTab peptide table start.
-    """
-    with open(filename, 'rt') as f:
-        for num, line in enumerate(f, 1):
-            if line[:3] == 'PSH':
-                return num
-
-
-def parse_mods(modifications, psimod_mapping=None):
-    """
-    Convert mzTab modifications to MS2PIP PEPREC modifications.
-
-    N-term and C-term modifications are not supported!
-    Entries without modifications should be given as '-'
-
-    modifications - Pandas Series containing mzTab-formatted modifications
-    psimod_mapping - dictionary that maps PSI-MOD entries to desired modification names
-    """
-    if not psimod_mapping:
-        psimod_mapping = psimod_mapping = {
-            'MOD:00394': 'Acetyl',
-            'MOD:00400': 'Deamidated',
-            'MOD:00425': 'Oxidation',
-            'MOD:01214': 'Carbamidomethyl'
-        }
-
-    parsed_mods = []
-    for mods in modifications.fillna('-').str.split(',|-'):
-        if mods == ['', '']:
-            parsed_mods.append('-')
-        else:
-            pos = [str(int(p) + 1) for p in mods[::2]]
-            names = [psimod_mapping[n] for n in mods[1::2]]
-            parsed_mods.append('|'.join(['|'.join(i) for i in list(zip(pos, names))]))
-
-    return parsed_mods
-
-
-def main():
-    args = argument_parser()
-
-    start = get_peptide_table_start(args.filename)
-    mztab = pd.read_csv(args.filename, sep='\t', skiprows=start-1)
-
-    # Filter unique peptides
-    unique_count = mztab.duplicated(['sequence', 'modifications', 'charge']).value_counts().to_dict()
-    if False in unique_count:
-        unique_count = unique_count[False]
-    else:
-        unique_count = 0
-    print("mzTab contains {} unique peptides".format(unique_count))
-
-    mztab.sort_values('search_engine_score[1]', ascending=False, inplace=True)
-    mztab = mztab[~mztab.duplicated(['sequence', 'modifications', 'charge'])].copy()
-
-
-    # Check modifications present in mzTab
-    flatten = lambda l: [item for sublist in l for item in sublist]
-    unique_mods = set(flatten([mods[1::2] for mods in mztab['modifications'].dropna().str.split(',|-')]))
-    print("mzTab contains these modifications: {}".format(unique_mods))
-
-    mztab['parsed_modifications'] = parse_mods(mztab['modifications'])
-
-    peprec_cols = [
-        'PSM_ID',
-        'sequence',
-        'parsed_modifications',
-        'charge',
-        'retention_time',
-        'calc_mass_to_charge',
-        'search_engine_score[1]',
-    ]
-
-    peprec_col_mapping = {
-        'PSM_ID': 'spec_id',
-        'sequence': 'peptide',
-        'parsed_modifications': 'modifications',
-        'charge': 'charge',
-        'retention_time': 'rt',
-        'calc_mass_to_charge': 'mz',
-        'search_engine_score[1]': 'score'
-    }
-
-    peprec = mztab[peprec_cols]
-    peprec = peprec.rename(columns=peprec_col_mapping)
-
-    filename_stripped = '.'.join(args.filename.split('.')[:-1])
-    peprec.to_csv(filename_stripped + '.peprec', sep=' ', index=False)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/conversion_tools/peprec_add_phospho_suffix.py b/conversion_tools/peprec_add_phospho_suffix.py
deleted file mode 100644
index 3d59deff..00000000
--- a/conversion_tools/peprec_add_phospho_suffix.py
+++ /dev/null
@@ -1,91 +0,0 @@
-"""
-## PEPREC: Add Phospho Suffix
-Adds amino acid suffix to "Phospho" modifications in PEPREC file. "Phospho" becomes,
-for instance, "PhosphoY". Also, for unmodified peptides, a hyphen is added to the
-PEPREC file.
-
-*peprec_add_phospho_suffix.py*
-**Input:** Folder with PEPREC files
-**Output:** PEPREC files with amino acid suffix added to "Phospho" modifications
-usage: peprec_add_phospho_suffix.py [-h] [-f PEPREC_FOLDER] [-r]
-
-Add amino acid suffix to "Phospho" in modifications column in PEPREC file(s).
-
-optional arguments:
-  -h, --help        show this help message and exit
-  -f PEPREC_FOLDER  Folder with input PEPREC files (default: "")
-  -r                Replace the original PEPREC files instead of writing a new
-                    file (default: False)
-"""
-
-# --------------
-# Import modules
-# --------------
-import argparse
-import pandas as pd
-from glob import glob
-
-
-# ---------------------------------------------------------
-# Find all PEPREC files in folder in a case-insensitive way
-# ---------------------------------------------------------
-def FindFiles(pattern):
-    def either(c):
-        return '[%s%s]' % (c.lower(), c.upper()) if c.isalpha() else c
-    return glob(''.join(map(either, pattern)))
-
-
-# ------------------------------------
-# Add AA suffix to 'Phospho' in PEPREC
-# ------------------------------------
-def AddPhosphoSuffix(peprec):
-    peprec = peprec.fillna('-')
-    peprec['modifications'] = [[x for x in zip([x for (i, x) in enumerate(mods) if i % 2 == 0], [x for (i, x) in enumerate(mods) if i % 2 != 0])] for mods in peprec['modifications'].str.split('|')]
-    res = []
-    for _, row in peprec.iterrows():
-        if row['modifications']:
-            res.append([(mod[0], mod[1] + row['peptide'][int(mod[0]) - 1]) if mod[1] == 'Phospho' else mod for mod in row['modifications']])
-        else:
-            res.append('-')
-    res = ['|'.join(['|'.join(mod) for mod in row]) for row in res]
-    peprec['modifications'] = res
-    return(peprec)
-
-
-# ---------------
-# Argument parser
-# ---------------
-def ArgParse():
-    parser = argparse.ArgumentParser(description='Add amino acid suffix to "Phospho" in modifications column in PEPREC file(s).')
-    parser.add_argument('-f', dest='peprec_folder', action='store', default='',
-                        help='Folder with input PEPREC files (default: "")')
-    parser.add_argument('-r', dest='replace', action="store_true", default=False,
-                        help='Replace the original PEPREC files instead of writing a new file (default: False)')
-    args = parser.parse_args()
-    return(args)
-
-
-# ----
-# Run!
-# ----
-def run():
-    args = ArgParse()
-    if args.peprec_folder:
-        files = FindFiles("{}/*.peprec".format(args.peprec_folder))
-    else:
-        files = FindFiles("*.peprec")
-
-    if not files:
-        print("No PEPREC files found.")
-    else:
-        for peprec_file in files:
-            peprec = pd.read_csv(peprec_file, sep=' ')
-            peprec_out = AddPhosphoSuffix(peprec)
-            if not args.replace:
-                peprec_file = "{}_WithSuffix.peprec".format(peprec_file.strip('.peprec'))
-            peprec_out.to_csv(peprec_file, index=None, sep=' ', na_rep='-')
-        print("Ready!")
-
-
-if __name__ == "__main__":
-    run()
diff --git a/conversion_tools/predictions_to_ms2.py b/conversion_tools/predictions_to_ms2.py
deleted file mode 100644
index 10c2b85a..00000000
--- a/conversion_tools/predictions_to_ms2.py
+++ /dev/null
@@ -1,95 +0,0 @@
-"""
-## Create MS2+SSL files (SkyLine spectral library formats) from MS2PIP predictions
-Takes spectra predicted by MS2PIP and write SkyLine spectral library format MS2+SSL
-
-*predictions_to_ms2.py*
-**Input:** MS2PIP predictions file
-**Output:** MS2 and SSL files
-
-**Requirements:** Pyteomics for mass calculations; tqdm for progress bar.
-
-```
-usage: predictions_to_ms2.py [-h] pep_file
-
-Generate MS2 and SSL files from MS2PIP predictions
-
-positional arguments:
-  pep_file             PEPREC file used to generate predictions
-
-optional arguments:
-  -h, --help            show this help message and exit
-```
-"""
-
-# --------------
-# Import modules
-# --------------
-import sys
-import argparse
-from time import localtime, strftime
-from operator import itemgetter
-from pyteomics import mass
-from tqdm import tqdm
-
-parser = argparse.ArgumentParser()
-
-parser.add_argument("pep_file", help="PEPREC file used to get the ms2pip predictions")
-args = parser.parse_args()
-
-pep_dict = {}
-# pep_dic[spec_id] = [sequence, charge]
-pep = open(args.pep_file)
-for line in pep:
-    split_line = line.split(' ')
-    # first line must be the header, and first header must be spec_id:
-    if line.startswith('spec_id'):
-        seq_id = split_line.index('peptide')
-        ch_id =  split_line.index('charge')
-        continue
-    pep_dict[split_line[0]] = [split_line[2], int(split_line[3])]
-
-pred_dict = {}
-# pred_dict[spec_id] = [[mz_list], [int_list]]
-pred = open(args.pep_file.replace(".PEPREC", "_predictions.csv"))
-for line in pred:
-    if line.startswith('spec_id'): continue
-    split_line = line.strip().split(',')
-    if split_line[0] in pred_dict.keys():
-        pred_dict[split_line[0]][0].append(float(split_line[5]))
-        pred_dict[split_line[0]][1].append(float(split_line[6]))
-    else:
-        pred_dict[split_line[0]] = [[float(split_line[5])], [float(split_line[6])]]
-
-ssl_output = open(args.pep_file + ".ssl", "w+")
-ssl_output.write('file\tscan\tcharge\tsequence\n')
-
-sys.stdout.write("writing {} file\n".format(args.pep_file.replace(".PEPREC", ".ssl")))
-
-peptides = pred_dict.keys()
-
-ms2_name = args.pep_file.replace(".PEPREC",".ms2")
-sys.stdout.write("writing {} file\n".format(ms2_name))
-ms2_output = open(ms2_name, "w+")
-ms2_output.write("H\tCreationDate\t{}\n".format(strftime("%Y-%m-%d %H:%M:%S", localtime())))
-ms2_output.write("H\tExtractor\tMS2PIP Predictions\n")
-
-for i, peptide in tqdm(enumerate(pep_dict.keys()), total=len(pep_dict.keys())):
-    intensities = list(map(lambda x: (2**x)+0.001, pred_dict[peptide][1]))
-    spectrum = zip(pred_dict[peptide][0], intensities)
-    spectrum = sorted(spectrum, key=itemgetter(0))
-
-    seq = pep_dict[peptide][0]
-    prec_mass = mass.calculate_mass(sequence=seq)
-    charge = pep_dict[peptide][1]
-
-    ssl_output.write("{}\t{}\t{}\t{}\n".format(ms2_name, peptide, charge, seq))
-    ms2_output.write("S\t{}\t{}\n".format(peptide, prec_mass))
-    ms2_output.write("Z\t{}\t{}\n".format(int(charge), int(charge)*prec_mass))
-    ms2_output.write("D\tseq\t{}\n".format(seq))
-
-    for mz, inte in spectrum:
-        ms2_output.write("{}\t{}\n".format(mz, inte))
-
-ms2_output.close()
-
-ssl_output.close()
diff --git a/conversion_tools/sample_peptides_n.py b/conversion_tools/sample_peptides_n.py
deleted file mode 100644
index 05686477..00000000
--- a/conversion_tools/sample_peptides_n.py
+++ /dev/null
@@ -1,110 +0,0 @@
-#!/usr/bin/env python
-import sys
-import pandas as pd
-#import numpy as np
-#import cPickle as pickle
-
-
-def return_len(x):
-    return len(x)
-
-
-def is_mod(x):
-    if "|" in str(x):
-        tmp = x.split("|")
-        ok = True
-        for i in range(1, len(tmp), 2):
-            if (tmp[i] != "CAM") & (tmp[i] != "Oxidation"):
-                ok = False
-        if ok:
-            return 0
-        else:
-            return 1
-    return 0
-
-
-def return_charge(x):
-    return tmap[x]
-
-
-def is_tryp(x):
-    if (x[-1] == 'R') | (x[-1] == 'K'):
-        return 1
-    return 0
-
-
-def add_pepmass(x):
-    return tmap[x]
-
-
-def add_cpepmass(x):
-    return 18.010601+sum([AminoMass[a] for a in x[2]])+x[6]*57.02146
-
-
-def num_CAM(x):
-    n = 0
-    if "|" in str(x):
-        tmp = x.split("|")
-        # ok = True
-        for i in range(1, len(tmp), 2):
-            if tmp[i] == "CAM":
-                n += 1
-    return n
-
-
-def isA(x):
-    return x[0]
-
-
-tmap = {}
-
-AminoMass = {
-    'A': 71.037114, 'C': 103.009185, 'D': 115.026943, 'E': 129.042593, 'F': 147.068414, 'G': 57.021464,
-    'H': 137.058912, 'I': 113.084064, 'K': 128.094963, 'L': 113.084064, 'M': 131.040485, 'N': 114.042927,
-    'P': 97.052764, 'Q': 128.058578, 'R': 156.101111, 'S': 87.032028, 'T': 101.047679, 'V': 99.068414,
-    'W': 186.079313, 'Y': 163.063329,
-}
-
-f = open(sys.argv[2])
-title = ""
-# pepmass = 0
-charge = 0
-while True:
-    rows = f.readlines(1000000)
-    if not rows:
-        break
-    for row in rows:
-        row = row.rstrip()
-        if row == "":
-            continue
-        if row[:5] == "TITLE":
-            title = row[6:]
-        elif row[:6] == "CHARGE":
-            charge = int(row[7:9].replace("+", ""))
-        # elif row[:7] == "PEPMASS":
-            # pepmass = float(row[8:].split()[0])
-        elif row[:8] == "END IONS":
-            # tmap[title] = (float(pepmass) * (charge)) - ((charge)*1.007825035)
-            tmap[title] = charge
-
-# pickle.dump( tmap, open( "tmap.p", "wb" ) )
-# tmap = pickle.load( open( "tmap.p", "rb" ) )
-
-sys.stderr.write("done\n")
-
-sys.stderr.write('reading file\n')
-data = pd.read_csv(sys.argv[1], sep=' ')
-data['peplen'] = data['peptide'].apply(return_len)
-data['mod'] = data['modifications'].apply(is_mod)
-data['charge'] = data['spec_id'].apply(return_charge)
-
-# data['numcam'] = data['modifications'].apply(num_CAM)
-# data['pepmass'] = data['spec_id'].apply(add_pepmass)
-# data['cpepmass'] = data.apply(add_cpepmass,axis=1)
-# data['diff'] = np.abs(data['pepmass']-data['cpepmass'])
-
-data = data[data.peplen >= 8]
-data = data[data['mod'] == 0]
-
-datatmp = data.drop_duplicates(subset=['peptide', 'charge'], keep="last")
-datatmp[['spec_id', 'modifications', 'peptide', 'charge']].to_csv(sys.argv[1] + ".ms2pip", index=False, sep=" ")
diff --git a/conversion_tools/scan_mgf.py b/conversion_tools/scan_mgf.py
deleted file mode 100644
index 1dbecf92..00000000
--- a/conversion_tools/scan_mgf.py
+++ /dev/null
@@ -1,148 +0,0 @@
-"""
-Scan MGF files in a given folder for spectra present in a given PEPREC file and
-write those spectra to a new MGF file.
-"""
-
-
-__author__ = "Ralf Gabriels"
-__credits__ = ["Ralf Gabriels", "Sven Degroeve", "Lennart Martens"]
-__license__ = "Apache License, Version 2.0"
-__version__ = "0.1"
-__email__ = "Ralf.Gabriels@ugent.be"
-
-
-import argparse
-import mmap
-import os
-
-import pandas as pd
-
-try:
-    from tqdm import tqdm
-except ImportError:
-    USE_TQDM = False
-else:
-    USE_TQDM = True
-
-
-def argument_parser():
-    parser = argparse.ArgumentParser(
-        description="Scan MGF files in a given folder for spectra present in a\
-        given PEPREC file and write those spectra to a new MGF file."
-    )
-    parser.add_argument(
-        "mgf",
-        metavar="<mgf>",
-        help="Path to MGF file or directory containing MGF files. If a\
-        directory is given, the peprec file must have an additional column\
-        `mgf_filename` that contains the respective MGF filename.",
-    )
-    parser.add_argument(
-        "peprec",
-        metavar="<peprec>",
-        help="Path to PEPREC file that contains the peptides to scan for.",
-    )
-    # parser.add_argument(
-    #     "--tqdm", action="store_true", dest='use_tqdm', default=False,
-    #     help="Use tqdm to display progress bar."
-    # )
-    args = parser.parse_args()
-    return args
-
-
-def get_num_lines(file_path):
-    fp = open(file_path, "r+")
-    buf = mmap.mmap(fp.fileno(), 0)
-    lines = 0
-    while buf.readline():
-        lines += 1
-    return lines
-
-
-def scan_mgf(
-    df_in,
-    mgf_folder,
-    outname="scan_mgf_result.mgf",
-    filename_col="mgf_filename",
-    spec_title_col="spec_id",
-    use_tqdm=False,
-):
-    if df_in[filename_col].iloc[0][-4:] in [".mgf", ".MGF"]:
-        file_suffix = ""
-    else:
-        file_suffix = ".mgf"
-
-    with open(outname, "w") as out:
-        count_runs = 0
-        count = 0
-        runs = df_in[filename_col].unique()
-        print(
-            "Scanning MGF files: {} runs to do. Now working on run: ".format(len(runs)),
-            end="",
-        )
-        for run in runs:
-            count_runs += 1
-            if count_runs % 10 == 0:
-                print(str(count_runs), end="")
-            else:
-                print(".", end="")
-
-            spec_dict = dict(
-                (v, k)
-                for k, v in df_in[(df_in[filename_col] == run)][spec_title_col]
-                .to_dict()
-                .items()
-            )
-
-            found = False
-            current_mgf_file = "{}/{}{}".format(mgf_folder, str(run), file_suffix)
-            with open(current_mgf_file, "r") as f:
-                if use_tqdm:
-                    mgf_iterator = tqdm(f, total=get_num_lines(current_mgf_file))
-                else:
-                    mgf_iterator = f
-                for line in mgf_iterator:
-                    if "TITLE" in line:
-                        title = line[6:].strip()
-                        if title in spec_dict:
-                            found = True
-                            out.write("BEGIN IONS\n")
-                            # line = "TITLE=" + str(spec_dict[title]) + "\n"
-                            count += 1
-                    if "END IONS" in line:
-                        if found:
-                            out.write(line + "\n")
-                            found = False
-                    if found and line[-4:] != "0.0\n":
-                        out.write(line)
-
-    print(
-        "\n{}/{} spectra found and written to new MGF file.".format(count, len(df_in))
-    )
-
-
-def main():
-    args = argument_parser()
-
-    peprec = pd.read_csv(args.peprec, sep=" ")
-
-    if os.path.isfile(args.mgf):
-        mgf = os.path.dirname(args.mgf)
-        peprec["mgf_filename"] = os.path.basename(args.mgf)
-    elif os.path.isdir(args.mgf):
-        assert (
-            "mgf_file" in peprec.columns
-        ), "Path to MGF directory was given, but PEPREC does not contain a `mgf_file` \
-column."
-        mgf = args.mgf
-    else:
-        print(f"{args.mgf} does not exist.")
-        exit(1)
-
-    outname = os.path.splitext(args.mgf)[0] + "_scanned.mgf"
-
-    scan_mgf(peprec, mgf, outname=outname, use_tqdm=USE_TQDM)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/conversion_tools/speclib_to_mgf.py b/conversion_tools/speclib_to_mgf.py
deleted file mode 100644
index aebf7b7f..00000000
--- a/conversion_tools/speclib_to_mgf.py
+++ /dev/null
@@ -1,236 +0,0 @@
-#!/usr/bin/env python3
-"""
-Convert MSP and SPTXT spectral library files.
-
-Writes three files: mgf with the spectra; PEPREC with the peptide sequences;
-meta with additional metainformation.
-
-Arguments:
-    arg1 path to spectral library file
-    arg2 prefix for spec_id
-
-"""
-
-import re
-import sys
-import logging
-
-
-AMINO_MASSES = {
-    "A": 71.037114,
-    "C": 103.009185,
-    "D": 115.026943,
-    "E": 129.042593,
-    "F": 147.068414,
-    "G": 57.021464,
-    "H": 137.058912,
-    "I": 113.084064,
-    "K": 128.094963,
-    "L": 113.084064,
-    "M": 131.040485,
-    "N": 114.042927,
-    "P": 97.052764,
-    "Q": 128.058578,
-    "R": 156.101111,
-    "S": 87.032028,
-    "T": 101.047679,
-    "V": 99.068414,
-    "W": 186.079313,
-    "Y": 163.063329,
-}
-PROTON_MASS = 1.007825035
-WATER_MASS = 18.010601
-
-
-def setup_logging():
-    """Initiate logging."""
-    root_logger = logging.getLogger()
-    handler = logging.StreamHandler()
-    handler.setFormatter(
-        logging.Formatter("%(asctime)s %(levelname)s %(module)s %(message)s")
-    )
-    root_logger.addHandler(handler)
-    root_logger.setLevel(logging.INFO)
-
-
-def parse_peprec_mods(mods, ptm_list):
-    """Parse PEPREC modification string out of MSP Mod string."""
-    if mods.split("/")[0] != "0":
-        num_mods = mods[0]
-        mod_list = [mod.split(",") for mod in mods.split("/")[1:]]
-
-        peprec_mods = []
-        for location, aa, name in mod_list:
-            if not (location == "0" and name == "iTRAQ"):
-                location = str(int(location) + 1)
-            peprec_mods.append(location)
-            peprec_mods.append(name)
-
-            if name not in ptm_list:
-                ptm_list[name] = 1
-            else:
-                ptm_list[name] += 1
-
-        peprec_mods = "|".join(peprec_mods)
-
-    else:
-        peprec_mods = "-"
-
-    return peprec_mods
-
-
-def validate(spec_id, peptide, charge, mods, reported_mw):
-    """Validate amino acids and reported peptide mass."""
-    invalid_aas = ["B", "J", "O", "U", "X", "Z"]
-    if any(aa in invalid_aas for aa in peptide):
-        logging.warning("Peptide with non-canonical amino acid found: %s", peptide)
-
-    elif (
-        mods.split("/")[0] == "0"
-    ):  # Cannot validate mass of peptide with unknown modification
-        calculated = WATER_MASS + sum([AMINO_MASSES[x] for x in peptide])
-        reported = float(reported_mw) * float(charge) - float(charge) * PROTON_MASS
-        if abs(calculated - reported) > 0.5:
-            logging.warning(
-                "Reported MW does not match calculated mass for spectrum %s", spec_id
-            )
-
-
-def parse_speclib(speclib_filename, title_prefix, speclib_format="msp"):
-    """Parse MSP file."""
-    filename = ".".join(speclib_filename.split(".")[:-1])
-    fpip = open(filename + ".peprec", "w")
-    fpip.write("spec_id modifications peptide charge\n")
-    fmgf = open(filename + ".mgf", "w")
-    fmeta = open(filename + ".meta", "w")
-
-    with open(speclib_filename) as f:
-        mod_dict = {}
-        spec_id = 1
-        peak_sep = None
-        peptide = None
-        charge = None
-        parentmz = None
-        mods = None
-        purity = None
-        HCDenergy = None
-        read_spec = False
-        mgf = ""
-
-        for row in f:
-            if read_spec:
-                # Infer peak int/mz separator
-                if not peak_sep:
-                    if "\t" in row:
-                        peak_sep = "\t"
-                    elif " " in row:
-                        peak_sep = " "
-                    else:
-                        raise ValueError("Invalid peak separator")
-
-                line = row.rstrip().split(peak_sep)
-
-                # Read all peaks, so save to output files and set read_spec to False
-                if row[0].isdigit():
-                    # Continue reading spectrum
-                    mgf += " ".join([line[0], line[1]]) + "\n"
-                    continue
-
-                # Last peak reached, finish up spectrum
-                else:
-                    validate(spec_id, peptide, charge, mods, parentmz)
-                    peprec_mods = parse_peprec_mods(mods, mod_dict)
-                    fpip.write(
-                        "{}{} {} {} {}\n".format(
-                            title_prefix, spec_id, peprec_mods, peptide, charge
-                        )
-                    )
-                    fmeta.write(
-                        "{}{} {} {} {} {} {}\n".format(
-                            title_prefix,
-                            spec_id,
-                            charge,
-                            peptide,
-                            parentmz,
-                            purity,
-                            HCDenergy,
-                        )
-                    )
-
-                    buf = "BEGIN IONS\n"
-                    buf += "TITLE=" + title_prefix + str(spec_id) + "\n"
-                    buf += "CHARGE=" + str(charge) + "\n"
-                    buf += "PEPMASS=" + parentmz + "\n"
-                    fmgf.write("{}{}END IONS\n\n".format(buf, mgf))
-
-                    spec_id += 1
-                    read_spec = False
-                    mgf = ""
-
-            if row.startswith("Name:"):
-                line = row.rstrip().split(" ")
-                tmp = line[1].split("/")
-                peptide = tmp[0].replace("(O)", "")
-                if speclib_format == "sptxt":
-                    peptide = re.sub(r"\[\d*\]|[a-z]", "", peptide)
-                charge = tmp[1].split("_")[0]
-                continue
-
-            elif row.startswith("Comment:"):
-                line = row.rstrip().split(" ")
-                for i in range(1, len(line)):
-                    if line[i].startswith("Mods="):
-                        tmp = line[i].split("=")
-                        mods = tmp[1]
-                    if line[i].startswith("Parent="):
-                        tmp = line[i].split("=")
-                        parentmz = tmp[1]
-                    if line[i].startswith("Purity="):
-                        tmp = line[i].split("=")
-                        purity = tmp[1]
-                    if line[i].startswith("HCD="):
-                        tmp = line[i].split("=")
-                        HCDenergy = tmp[1].replace("eV", "")
-                continue
-
-            elif row.startswith("Num peaks:") or row.startswith("NumPeaks:"):
-                read_spec = True
-                continue
-
-    fmgf.close()
-    fpip.close()
-    fmeta.close()
-
-    return spec_id, mod_dict
-
-
-def main():
-    """Run CLI."""
-    # Get arguments
-    speclib_filename = sys.argv[1]
-    title_prefix = sys.argv[2]
-
-    speclib_ext = speclib_filename.split(".")[-1]
-    if speclib_ext.lower() == "sptxt":
-        speclib_format = "sptxt"
-    elif speclib_ext.lower() == "msp":
-        speclib_format = "msp"
-    else:
-        raise ValueError("Unknown spectral library format: `%s`" % speclib_ext)
-
-    logging.info("Converting %s to MGF, PEPREC and meta file", speclib_filename)
-
-    num_peptides, mod_dict = parse_speclib(
-        speclib_filename, title_prefix, speclib_format=speclib_format
-    )
-
-    logging.info(
-        "Finished!\nSpectral library contains %i peptides and the following modifications: %s",
-        num_peptides,
-        mod_dict,
-    )
-
-
-if __name__ == "__main__":
-    setup_logging()
-    main()
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 39e1974e..9020a820 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -11,6 +11,7 @@
    installation
    usage
    prediction-models
+   webserver-api
 
 
 .. toctree::
diff --git a/docs/source/usage.rst b/docs/source/usage.rst
index 119fd7d4..7b63206f 100644
--- a/docs/source/usage.rst
+++ b/docs/source/usage.rst
@@ -148,8 +148,14 @@ Any unresolvable modification will result in an error. If needed, PSM files can
 Spectrum file
 -------------
 
-In the :ref:`correlate` and :ref:`get-training-data` usage modes, an MGF or mzML file with observed
-spectra must be provided to MS²PIP.
+In the :ref:`correlate` and :ref:`get-training-data` usage modes, a spectrum file with observed
+spectra must be provided to MS²PIP. Spectrum files in mzML, MGF, Thermo raw, and Bruker raw formats
+are supported.
+
+.. note::
+
+    To read Thermo raw files, the
+    `.NET runtime <https://learn.microsoft.com/en-us/dotnet/core/install/>`_ must be installed.
 
 Make sure that the PSM file ``spectrum_id`` matches the MGF ``TITLE`` field or mzML ``nativeID``
 fields. If the values of these fields are different, but the PSM file ``spectrum_id`` is embedded
diff --git a/docs/source/webserver-api.md b/docs/source/webserver-api.md
new file mode 100644
index 00000000..67ad0f7c
--- /dev/null
+++ b/docs/source/webserver-api.md
@@ -0,0 +1,110 @@
+# MS2PIP Server API
+MS2PIP Server can be accessed through the webpage or through a RESTful API. While this page provides an example Python script for contacting the API, the Swagger-generated documentation can be found here: https://iomics.ugent.be/ms2pip/api/v2.
+
+The following Python function contacts the MS2PIP Server RESTful API:
+
+```python
+def run_ms2pip_server(peptides, frag_method, ptm_list, url='https://iomics.ugent.be/ms2pip/api/v2'):
+    # Check if all columns are present in dataframe
+    for col in ['spec_id', 'peptide', 'charge', 'modifications']:
+        if col not in peptides.columns:
+            print("{} is missing from peptides DataFrame".format(col))
+            return None
+
+    # Split-up into batches of 100 000 peptides (maximum MS2PIP Server accepts per request)
+    batch_size = 100000
+    result = pd.DataFrame()
+    for i in list(range(0, len(peptides), batch_size)):
+        print("Working on batch {}/{}".format(int(i / batch_size + 1), len(peptides) // batch_size + 1))
+        peptides_batch = peptides.iloc[i:i+batch_size, :]
+
+        # Combine data into dictionary for json post request
+        input_data = {
+            "params": {
+                "frag_method": frag_method,
+                "ptm": ptm_list
+            },
+            "peptides": peptides_batch.to_dict(orient='list')
+        }
+
+        # Post data to server and get task id
+        response = requests.post('{}/task'.format(url), json=input_data)
+        if 'task_id' not in response.json():
+            if 'error' in response.json():
+                print("Server error: {}".format(response.json()['error']))
+            else:
+                print("Something went wrong")
+            return None
+        task_id = response.json()['task_id']
+        print("Received task id: {}".format(task_id))
+
+        # Check server task status and get result when ready
+        sleep(1)
+        response = requests.get('{}/task/{}/status'.format(url, task_id))
+        state = response.json()['state']
+
+        if state != 'SUCCESS':
+            print("Check MS2PIP Server status every 5 seconds", end='')
+            state = 'PENDING'
+            pending_count = 0
+
+            while state == 'PENDING' or state == 'PROGRESS':
+                sleep(5)
+                print('.', end='')
+                response = requests.get('{}/task/{}/status'.format(url, task_id))
+                state = response.json()['state']
+
+                # Do not keep looping if task state is stuck on PENDING, it might have failed silently
+                if state == 'PENDING':
+                    pending_count += 1
+                if pending_count > 24:
+                    print("\nSomething went wrong")
+                    return None
+            print('')
+
+        if state == 'SUCCESS':
+            response = requests.get('{}/task/{}/result'.format(url, task_id))
+            result_batch = pd.DataFrame.from_dict(response.json())['ms2pip_out']
+            result_batch = result_batch[['spec_id', 'charge', 'ion', 'ionnumber', 'mz', 'prediction']]
+            result = result.append(result_batch)
+            print("Result received", end='\n\n')
+        else:
+            error_message = response.json()['status']
+            print("Something went wrong: {}".format(error_message))
+            return None
+
+    print("Finished with all batches!")
+
+    return result
+```
+
+The function takes three arguments: `peptides`, `frag_method` and `ptm_list`. `peptides` is an MS2PIP PEPREC-formatted Pandas DataFrame. `ptm_list` is a list of MS2PIP formatted PTM definitions. A detailed explanation of both data structures can be found on the [MS2PIP Server webpage](https://iomics.ugent.be/ms2pip/#howto). `frag_method` is the specific model with which you want to predict peak intensities (e.g. HCD, CID, iTRAQ...) Checkout [README.md](https://github.com/compomics/ms2pip_c/#ms2pip-models) for a list of all available models.
+
+The function also takes an argument `url`, in which you can provide a custom URL to the server. By default this is `https://iomics.ugent.be/ms2pip/api`.
+
+An example for running the function:
+```python
+# Define arguments
+frag_method = 'HCD'
+ptm_list = [
+    'Oxidation,15.994915,M',
+    'Carbamidomethyl,57.021464,C',
+    'PhosphoS,79.966331,S',
+    'PhosphoT,79.966331,T',
+    'PhosphoY,79.966331,Y',
+    'Glu->pyro-Glu,-18.010565,E',
+    'Gln->pyro-Glu,-17.026549,Q',
+    'Pyro-carbamidomethyl,39.994915,C',
+    'Deamidated,0.984016,N',
+    'iTRAQ,144.102063,N-term'
+]
+peptides = pd.DataFrame(data={
+    'spec_id': ['peptide1', 'peptide2'],
+    'peptide': ['STCINTFWLIVK', 'GRLNTFILK'],
+    'modifications': ['3|Carbamidomethyl', '-'],
+    'charge': [2, 3]
+})
+
+# Run function
+result = run_ms2pip_server(peptides, frag_method, ptm_list)
+```
diff --git a/train_scripts/train_xgboost_c.py b/train_scripts/train_xgboost_c.py
deleted file mode 100644
index 7fa84174..00000000
--- a/train_scripts/train_xgboost_c.py
+++ /dev/null
@@ -1,370 +0,0 @@
-import os
-import argparse
-import pandas as pd
-import numpy as np
-import xgboost as xgb
-from math import ceil
-from operator import itemgetter
-from itertools import product
-from scipy.stats import pearsonr
-import matplotlib.pyplot as plt
-
-
-def evalerror_pearson(preds, dtrain):
-	labels = dtrain.get_label()
-	return 'pearsonr', pearsonr(preds, labels)[0]
-
-
-def convert_model_to_c(bst, args, numf, filename):
-	bst.dump_model("{}_dump.raw.txt".format(filename))
-	num_nodes = []
-	mmax = 0
-	with open("{}_dump.raw.txt".format(filename)) as f:
-		for row in f:
-			if row.startswith('booster'):
-				if row.startswith('booster[0]'):
-					mmax = 0
-				else:
-					num_nodes.append(mmax + 1)
-					mmax = 0
-				continue
-			l = int(row.rstrip().replace(' ', '').split(':')[0])
-			if l > mmax:
-				mmax = l
-	num_nodes.append(mmax + 1)
-	forest = []
-	tree = None
-	b = 0
-	with open("{}_dump.raw.txt".format(filename)) as f:
-		for row in f:
-			if row.startswith('booster'):
-				if row.startswith('booster[0]'):
-					tree = [0] * num_nodes[b]
-					b += 1
-				else:
-					forest.append(tree)
-					tree = [0] * num_nodes[b]
-					b += 1
-				continue
-			l = row.rstrip().replace(' ', '').split(':')
-			if l[1][:4] == "leaf":
-				tmp = l[1].split('=')
-				tree[int(l[0])] = [-1, float(tmp[1]), -1, -1]  # !!!!
-			else:
-				tmp = l[1].split('yes=')
-				tmp[0] = tmp[0].replace('[Features', '')
-				tmp[0] = tmp[0].replace('[Feature', '')
-				tmp[0] = tmp[0].replace('[f', '')
-				tmp[0] = tmp[0].replace(']', '')
-				tmp2 = tmp[0].split('<')
-				if float(tmp2[1]) < 0:
-					tmp2[1] = 1
-				tmp3 = tmp[1].split(",no=")
-				tmp4 = tmp3[1].split(',')
-				tree[int(l[0])] = [int(tmp2[0]), int(ceil(float(tmp2[1]))), int(tmp3[0]), int(tmp4[0])]
-		forest.append(tree)
-
-		with open('{}.c'.format(filename), 'w') as fout:
-			fout.write("static float score_{}_{}(unsigned int* v){{\n".format(args.model_name, args.type))
-			fout.write("float s = 0.;\n")
-			for tt in range(len(forest)):
-				fout.write(tree_to_code(forest[tt], 0, 1))
-			fout.write("\nreturn s;}\n")
-
-		"""
-		with open('{}.pyx'.format(filename), 'w') as fout:
-			fout.write("cdef extern from \"{}.c\":\n".format(filename))
-			fout.write("\tfloat score_{}(short unsigned short[{}] v)\n\n".format(args.type, numf))
-			fout.write("def myscore(sv):\n")
-			fout.write("\tcdef unsigned short[{}] v = sv\n".format(numf))
-			fout.write("\treturn score_{}(v)\n".format(args.type))
-		"""
-
-	os.remove("{}_dump.raw.txt".format(filename))
-
-
-def tree_to_code(tree, pos, padding):
-	p = "\t" * padding
-	if tree[pos][0] == -1:
-		if tree[pos][1] < 0:
-			return p + "s = s {};\n".format(tree[pos][1])
-		else:
-			return p + "s = s + {};\n".format(tree[pos][1])
-	return p + "if (v[{}]<{}){{\n{}}}\n{}else{{\n{}}}".format(tree[pos][0], tree[pos][1], tree_to_code(tree, tree[pos][2], padding + 1), p, tree_to_code(tree, tree[pos][3], padding + 1))
-
-
-def load_data(vector_filename, ion_type):
-	# Read file
-	if vector_filename.split('.')[-1] == 'pkl':
-		vectors = pd.read_pickle(vector_filename)
-	elif vector_filename.split('.')[-1] == 'h5':
-		#vectors = pd.read_hdf(vector_filename, key='table', stop=1000)
-		vectors = pd.read_hdf(vector_filename, key='table')
-	else:
-		print("Unsuported feature vector format")
-		exit(1)
-	print("{} contains {} feature vectors".format(args.vectors, len(vectors)))
-
-	#vectors = vectors[vectors["ce"]==2]
-
-	# Extract targets for given ion type
-	target_names = list(vectors.columns[vectors.columns.str.contains('targets')])
-	if not 'targets_{}'.format(ion_type) in target_names:
-		print("Targets for {} could not be found in vector file.".format(ion_type))
-		print("Vector file only contains these targets: {}".format(target_names))
-		exit(1)
-	targets = vectors.pop('targets_{}'.format(ion_type))
-	target_names.remove('targets_{}'.format(ion_type))
-	vectors.drop(labels=target_names, axis=1, inplace=True)
-
-	# Get psmids
-	psmids = vectors.pop('psmid')
-	# Cast vectors to numpy float for XGBoost
-	vectors.astype(np.float32)
-
-	return(vectors, targets, psmids)
-
-
-def get_params_combinations(params):
-	keys, values = zip(*params.items())
-	combinations = [dict(zip(keys, v)) for v in product(*values)]
-	return(combinations)
-
-
-def get_best_params(df, params_grid):
-	params = {}
-	best = df[df['test-rmse-mean'] == df['test-rmse-mean'].min()]
-	for p in params_grid.keys():
-		params[p] = best[p].iloc[0]
-	# num_boost_round = best['boosting-round'].iloc[0]
-	return(params)
-
-
-def gridsearch(xtrain, params, params_grid):
-	cols = ['boosting-round', 'test-rmse-mean', 'test-rmse-std', 'train-rmse-mean', 'train-rmse-std']
-	cols.extend(sorted(params_grid.keys()))
-	result = pd.DataFrame(columns=cols)
-
-	count = 1
-	combinations = get_params_combinations(params_grid)
-
-	for param_overrides in combinations:
-		print("Working on combination {}/{}".format(count, len(combinations)))
-		count += 1
-		params.update(param_overrides)
-		tmp = xgb.cv(params, xtrain, nfold=5, num_boost_round=200, early_stopping_rounds=10, verbose_eval=10)
-		tmp['boosting-round'] = tmp.index
-		for param in param_overrides.keys():
-			tmp[param] = param_overrides[param]
-		result = result.append(tmp)
-
-	print("Grid search ready!\n")
-
-	return(result)
-
-
-def print_logo():
-	logo = """
-	 _____ _____ _____ _____ _____
-	|_   _| __  |  _  |     |   | |
-	  | | |    -|     |-   -| | | |
-	  |_| |__|__|__|__|_____|_|___|
-
-	"""
-	print(logo)
-
-
-if __name__ == "__main__":
-	print_logo()
-	print("Using XGBoost version {}".format(xgb.__version__))
-
-	parser = argparse.ArgumentParser(description='XGBoost training')
-	parser.add_argument('vectors', metavar='<_vectors.pkl>',
-						help='feature vector file')
-	parser.add_argument('model_name', metavar='<model_name>',
-						help='model name (e.g. HCD')
-	parser.add_argument('type', metavar='<type>',
-						help='model type')
-	parser.add_argument('-c', metavar='INT', action="store", dest='num_cpu', default=24,
-						help='number of CPUs to use')
-	parser.add_argument('-t', metavar='INT', action="store", dest='num_trees', default=30,
-						help='number of trees in XGBoost model')
-	parser.add_argument('-e', metavar='FILE', action="store", dest='vectorseval',
-						help='additional evaluation file')
-	parser.add_argument("-p", action="store_true", dest='make_plots', default=False,
-						help="output plots")
-	parser.add_argument("-g", action="store_true", dest='gridsearch', default=False,
-						help="perform Grid Search CV to select best parameters")
-	args = parser.parse_args()
-
-	np.random.seed(1)
-
-	filename = "{}_{}_{}".format(args.vectors.split('.')[-2], args.num_trees, args.type)
-	print("Using output filename {}".format(filename))
-
-	print("Loading train and test data...")
-	vectors, targets, psmids = load_data(args.vectors, args.type)
-	
-	
-	print("Splitting up into train and test set...")
-	upeps = psmids.unique()
-	np.random.shuffle(upeps)
-	test_psms = upeps[:int(len(upeps) * 0.2)]
-
-	train_vectors = vectors[~psmids.isin(test_psms)]
-	train_targets = targets[~psmids.isin(test_psms)]
-	train_psmids = psmids[~psmids.isin(test_psms)]
-	
-	#train_targets.hist(bins=50)
-	#plt.show()
-
-	test_vectors = vectors[psmids.isin(test_psms)]
-	test_targets = targets[psmids.isin(test_psms)]
-	test_psmids = psmids[psmids.isin(test_psms)]
-
-	"""
-	train_vectors = train_vectors[train_targets > 0]
-	train_psmids = train_psmids[train_targets > 0]
-	train_targets = np.log2(train_targets[train_targets > 0])
-	test_vectors = test_vectors[test_targets > 0]
-	test_psmids = test_psmids[test_targets > 0]
-	test_targets = np.log2(test_targets[test_targets > 0])
-	train_targets = [1 if x==0 else 0 for x in train_targets]
-	test_targets = [1 if x==0 else 0 for x in test_targets]
-	"""
-	numf = len(train_vectors.columns.values)
-
-
-	# Rename features to understand decision tree dump
-	train_vectors.columns = ['Feature' + str(i) for i in range(len(train_vectors.columns))]
-	test_vectors.columns = ['Feature' + str(i) for i in range(len(test_vectors.columns))]
-	
-	print(train_vectors.shape)
-
-	print("Creating train and test DMatrix...")
-	xtrain = xgb.DMatrix(train_vectors, label=train_targets)
-	xtest = xgb.DMatrix(test_vectors, label=test_targets)
-	evallist = [(xtest, 'test'),(xtrain, 'train')]
-
-	# If needed, repeat for evaluation vectors, and create evallist
-	if args.vectorseval:
-		print("Loading eval data...")
-		eval_vectors, eval_targets, eval_psmids = load_data(args.vectorseval, args.type)
-		eval_vectors.columns = ['Feature' + str(i) for i in range(len(eval_vectors.columns))]
-		print("Creating eval DMatrix...")
-		xeval = xgb.DMatrix(eval_vectors, label=eval_targets)
-		del eval_vectors
-
-	# Remove items to save memory
-	del vectors, targets, psmids, train_vectors, train_targets, train_psmids, test_vectors
-
-	# Set XGBoost default parameters
-	params = {
-		"nthread": int(args.num_cpu),
-		"objective": "reg:linear",
-		#"objective": "binary:logistic",
-		"eval_metric": 'mae',
-		#"eval_metric": 'aucpr',
-		"silent": 1,
-		"eta": 0.5,
-		"max_depth": 9,
-		"grow_policy":"lossguide",
-		"max_leaves":100,
-		"min_child_weight": 50,
-		"gamma": 0.1,
-		"subsample": 1,
-		#"lambda" : 2,
-		# "colsample_bytree": 1,
-		# "max_delta_step": 0,
-	}
-
-	if args.gridsearch:
-		print("Performing GridSearchCV...")
-		params_grid = {
-			'max_depth': [7, 8, 9],
-			'min_child_weight': [300, 350, 400],
-			'gamma': [0, 1],
-			'max_delta_step': [0]
-		}
-
-		gs = gridsearch(xtrain, params, params_grid)
-		gs.to_csv('{}_GridSearchCV.csv'.format(filename))
-		best_params = get_best_params(gs, params_grid)
-		params.update(best_params)
-		print("Using best parameters: {}".format(best_params))
-
-	print("Training XGBoost model...")
-	#bst = xgb.train(params, xtrain, int(args.num_trees), evallist, early_stopping_rounds=10, maximize=False)
-	bst = xgb.train(params, xtrain, int(args.num_trees), evallist, maximize=False)
-
-	bst.save_model("{}.xgboost".format(filename))
-
-	# Load previously saved model here, if necessary
-	# bst = xgb.Booster({'nthread':23}) #init model
-	# bst.load_model(filename+'.xgboost') # load data
-
-	print("Writing model to C code...")
-	convert_model_to_c(bst, args, numf, filename)
-
-	print("Analyzing newly made model...")
-	# Get feature importances
-	importance = bst.get_fscore()
-	importance = sorted(importance.items(), key=itemgetter(1))
-	importance_list = []
-	with open("{}_importance.csv".format(filename), "w") as f:
-		f.write("Name,F-score\n")
-		for feat, n in importance[:]:
-			importance_list.append(feat)
-			f.write("{},{}\n".format(feat, str(n)))
-
-	# Print feature importances
-	print_feature_importances = False
-	if print_feature_importances:
-		print('[')
-		for l in importance_list:
-			print("'{}',".format(l), end='')
-		print(']')
-
-	# Use new model to make predictions on eval/test set and write to csv
-	tmp = pd.DataFrame()
-	if args.vectorseval:
-		predictions = bst.predict(xeval)
-		tmp['target'] = list(eval_targets.values)
-		tmp['predictions'] = predictions
-		tmp['psmid'] = list(eval_psmids.values)
-	else:
-		predictions = bst.predict(xtest)
-		tmp['target'] = list(test_targets.values)
-		tmp['predictions'] = predictions
-		tmp['psmid'] = list(test_psmids.values)
-	tmp.to_csv("{}_predictions.csv".format(filename), index=False)
-	# tmp.to_pickle("{}_predictions.pkl".format(filename))
-
-	"""
-	for ch in range(8, 20):
-		print("len {}".format(ch))
-		n1 = 0
-		n2 = 0
-		tmp3 = tmp[tmp.peplen == ch]
-		for pid in tmp3.psmid.unique().values:
-			tmp2 = tmp3[tmp3.psmid == pid]
-			print(pid)
-			for (t, p) in zip(tmp2.target, tmp2.predictions):
-				print("{} {}".format(t, p))
-			# n1 += pearsonr(tmp2.target, tmp2.predictions)[0]
-			n1 += np.mean(np.abs(tmp2.target - tmp2.predictions))
-			# print(n1)
-			n2 += 1
-		print(n1 / n2)
-	"""
-
-	if args.make_plots:
-		plt.figure()
-		plt.scatter(x=test_targets, y=predictions)
-		plt.title('Test set')
-		plt.xlabel('Target')
-		plt.ylabel('Prediction')
-		plt.savefig("{}_test.png".format(filename))
-		plt.show()
-
-	print("Ready!")