From d25d4c28721b3e9d87421bafcec5c37263ce0e60 Mon Sep 17 00:00:00 2001
From: Janos Gabler <janos.gabler@gmail.com>
Date: Mon, 5 Jun 2023 17:35:00 +0200
Subject: [PATCH] Make tranquilo an optional dependency (#464)

---
 .envs/testenv-linux.yml                       |    2 +-
 .envs/testenv-others.yml                      |    2 +-
 .github/workflows/main.yml                    |    4 +-
 CHANGES.md                                    |   21 +
 docs/rtd_environment.yml                      |    1 -
 docs/source/conf.py                           |    1 +
 environment.yml                               |    2 +-
 pyproject.toml                                |    1 +
 setup.cfg                                     |    1 -
 src/estimagic/algorithms.py                   |   46 +
 src/estimagic/benchmarking/cartis_roberts.py  |   10 +-
 src/estimagic/benchmarking/run_benchmark.py   |    2 +-
 src/estimagic/config.py                       |   16 +
 src/estimagic/optimization/__init__.py        |   46 -
 src/estimagic/optimization/get_algorithm.py   |    2 +-
 .../subsolvers/_conjugate_gradient_fast.py    |  138 --
 .../subsolvers/_steihaug_toint_fast.py        |  207 ---
 .../optimization/subsolvers/_trsbox_fast.py   |  658 ----------
 .../optimization/subsolvers/bntr_fast.py      | 1167 -----------------
 .../optimization/subsolvers/gqtpar_fast.py    |  668 ----------
 src/estimagic/optimization/tranquilo.py       |   29 +
 .../optimization/tranquilo/__init__.py        |    0
 .../tranquilo/acceptance_decision.py          |  244 ----
 .../tranquilo/acceptance_sample_size.py       |   69 -
 .../optimization/tranquilo/adjust_radius.py   |   42 -
 .../tranquilo/aggregate_models.py             |  152 ---
 .../optimization/tranquilo/bounds.py          |   28 -
 .../optimization/tranquilo/clustering.py      |   75 --
 .../tranquilo/estimate_variance.py            |   61 -
 .../optimization/tranquilo/filter_points.py   |  129 --
 .../optimization/tranquilo/fit_models.py      |  507 -------
 .../optimization/tranquilo/geometry.py        |   24 -
 .../optimization/tranquilo/get_component.py   |  231 ----
 .../optimization/tranquilo/handle_infinity.py |   49 -
 .../optimization/tranquilo/history.py         |  261 ----
 .../optimization/tranquilo/models.py          |  295 -----
 .../optimization/tranquilo/options.py         |  210 ---
 .../optimization/tranquilo/poisedness.py      |  211 ---
 .../tranquilo/process_arguments.py            |  314 -----
 .../optimization/tranquilo/region.py          |  152 ---
 .../optimization/tranquilo/rho_noise.py       |   87 --
 .../optimization/tranquilo/sample_points.py   |  466 -------
 .../tranquilo/solve_subproblem.py             |  200 ---
 .../optimization/tranquilo/tranquilo.py       |  467 -------
 .../optimization/tranquilo/volume.py          |   81 --
 .../optimization/tranquilo/weighting.py       |   27 -
 .../optimization/tranquilo/wrap_criterion.py  |   56 -
 .../tranquilo/wrapped_subsolvers.py           |   94 --
 .../visualization/visualize_tranquilo.py      |  590 ---------
 .../optimization/subsolvers/test_bntr_fast.py |  551 --------
 .../subsolvers/test_gqtpar_fast.py            |   98 --
 .../subsolvers/test_gqtpar_lambdas.py         |   20 -
 .../subsolvers/test_minimize_trust_region.py  |  484 -------
 tests/optimization/test_history_collection.py |    2 +-
 tests/optimization/test_many_algorithms.py    |    2 +-
 .../optimization/test_quadratic_subsolvers.py |   11 -
 .../test_with_nonlinear_constraints.py        |    2 +-
 .../tranquilo/test_acceptance_decision.py     |  140 --
 .../tranquilo/test_acceptance_sample_size.py  |   86 --
 .../tranquilo/test_adjust_radius.py           |  104 --
 .../tranquilo/test_aggregate_models.py        |   79 --
 tests/optimization/tranquilo/test_bounds.py   |   38 -
 .../optimization/tranquilo/test_clustering.py |   34 -
 .../tranquilo/test_estimate_variance.py       |   44 -
 .../tranquilo/test_filter_points.py           |   48 -
 .../optimization/tranquilo/test_fit_models.py |  145 --
 .../tranquilo/test_get_component.py           |  170 ---
 .../tranquilo/test_handle_infinity.py         |   15 -
 tests/optimization/tranquilo/test_history.py  |  230 ----
 tests/optimization/tranquilo/test_models.py   |  190 ---
 tests/optimization/tranquilo/test_options.py  |   56 -
 .../optimization/tranquilo/test_poisedness.py |  388 ------
 .../tranquilo/test_process_arguments.py       |  137 --
 tests/optimization/tranquilo/test_region.py   |  128 --
 .../optimization/tranquilo/test_rho_noise.py  |   78 --
 .../tranquilo/test_sample_points.py           |  171 ---
 .../tranquilo/test_solve_subproblem.py        |   45 -
 .../optimization/tranquilo/test_tranquilo.py  |  234 ----
 tests/optimization/tranquilo/test_volume.py   |  104 --
 .../optimization/tranquilo/test_weighting.py  |    7 -
 .../tranquilo/test_wrap_criterion.py          |   61 -
 .../visualization/test_visualize_tranquilo.py |   40 -
 82 files changed, 133 insertions(+), 11955 deletions(-)
 create mode 100644 src/estimagic/algorithms.py
 delete mode 100644 src/estimagic/optimization/subsolvers/_conjugate_gradient_fast.py
 delete mode 100644 src/estimagic/optimization/subsolvers/_steihaug_toint_fast.py
 delete mode 100644 src/estimagic/optimization/subsolvers/_trsbox_fast.py
 delete mode 100644 src/estimagic/optimization/subsolvers/bntr_fast.py
 delete mode 100644 src/estimagic/optimization/subsolvers/gqtpar_fast.py
 create mode 100644 src/estimagic/optimization/tranquilo.py
 delete mode 100644 src/estimagic/optimization/tranquilo/__init__.py
 delete mode 100644 src/estimagic/optimization/tranquilo/acceptance_decision.py
 delete mode 100644 src/estimagic/optimization/tranquilo/acceptance_sample_size.py
 delete mode 100644 src/estimagic/optimization/tranquilo/adjust_radius.py
 delete mode 100644 src/estimagic/optimization/tranquilo/aggregate_models.py
 delete mode 100644 src/estimagic/optimization/tranquilo/bounds.py
 delete mode 100644 src/estimagic/optimization/tranquilo/clustering.py
 delete mode 100644 src/estimagic/optimization/tranquilo/estimate_variance.py
 delete mode 100644 src/estimagic/optimization/tranquilo/filter_points.py
 delete mode 100644 src/estimagic/optimization/tranquilo/fit_models.py
 delete mode 100644 src/estimagic/optimization/tranquilo/geometry.py
 delete mode 100644 src/estimagic/optimization/tranquilo/get_component.py
 delete mode 100644 src/estimagic/optimization/tranquilo/handle_infinity.py
 delete mode 100644 src/estimagic/optimization/tranquilo/history.py
 delete mode 100644 src/estimagic/optimization/tranquilo/models.py
 delete mode 100644 src/estimagic/optimization/tranquilo/options.py
 delete mode 100644 src/estimagic/optimization/tranquilo/poisedness.py
 delete mode 100644 src/estimagic/optimization/tranquilo/process_arguments.py
 delete mode 100644 src/estimagic/optimization/tranquilo/region.py
 delete mode 100644 src/estimagic/optimization/tranquilo/rho_noise.py
 delete mode 100644 src/estimagic/optimization/tranquilo/sample_points.py
 delete mode 100644 src/estimagic/optimization/tranquilo/solve_subproblem.py
 delete mode 100644 src/estimagic/optimization/tranquilo/tranquilo.py
 delete mode 100644 src/estimagic/optimization/tranquilo/volume.py
 delete mode 100644 src/estimagic/optimization/tranquilo/weighting.py
 delete mode 100644 src/estimagic/optimization/tranquilo/wrap_criterion.py
 delete mode 100644 src/estimagic/optimization/tranquilo/wrapped_subsolvers.py
 delete mode 100644 src/estimagic/visualization/visualize_tranquilo.py
 delete mode 100644 tests/optimization/subsolvers/test_bntr_fast.py
 delete mode 100644 tests/optimization/subsolvers/test_gqtpar_fast.py
 delete mode 100644 tests/optimization/subsolvers/test_gqtpar_lambdas.py
 delete mode 100644 tests/optimization/subsolvers/test_minimize_trust_region.py
 delete mode 100644 tests/optimization/tranquilo/test_acceptance_decision.py
 delete mode 100644 tests/optimization/tranquilo/test_acceptance_sample_size.py
 delete mode 100644 tests/optimization/tranquilo/test_adjust_radius.py
 delete mode 100644 tests/optimization/tranquilo/test_aggregate_models.py
 delete mode 100644 tests/optimization/tranquilo/test_bounds.py
 delete mode 100644 tests/optimization/tranquilo/test_clustering.py
 delete mode 100644 tests/optimization/tranquilo/test_estimate_variance.py
 delete mode 100644 tests/optimization/tranquilo/test_filter_points.py
 delete mode 100644 tests/optimization/tranquilo/test_fit_models.py
 delete mode 100644 tests/optimization/tranquilo/test_get_component.py
 delete mode 100644 tests/optimization/tranquilo/test_handle_infinity.py
 delete mode 100644 tests/optimization/tranquilo/test_history.py
 delete mode 100644 tests/optimization/tranquilo/test_models.py
 delete mode 100644 tests/optimization/tranquilo/test_options.py
 delete mode 100644 tests/optimization/tranquilo/test_poisedness.py
 delete mode 100644 tests/optimization/tranquilo/test_process_arguments.py
 delete mode 100644 tests/optimization/tranquilo/test_region.py
 delete mode 100644 tests/optimization/tranquilo/test_rho_noise.py
 delete mode 100644 tests/optimization/tranquilo/test_sample_points.py
 delete mode 100644 tests/optimization/tranquilo/test_solve_subproblem.py
 delete mode 100644 tests/optimization/tranquilo/test_tranquilo.py
 delete mode 100644 tests/optimization/tranquilo/test_volume.py
 delete mode 100644 tests/optimization/tranquilo/test_weighting.py
 delete mode 100644 tests/optimization/tranquilo/test_wrap_criterion.py
 delete mode 100644 tests/visualization/test_visualize_tranquilo.py

diff --git a/.envs/testenv-linux.yml b/.envs/testenv-linux.yml
index 0535fecb6..a4cd42372 100644
--- a/.envs/testenv-linux.yml
+++ b/.envs/testenv-linux.yml
@@ -16,13 +16,13 @@ dependencies:
   - click  # run, tests
   - cloudpickle  # run, tests
   - joblib  # run, tests
-  - numba  # run, tests
   - numpy>=1.17.0  # run, tests
   - pandas  # run, tests
   - plotly  # run, tests
   - pybaum >= 0.1.2  # run, tests
   - scipy>=1.2.1  # run, tests
   - sqlalchemy  # run, tests
+  - tranquilo>=0.0.4  # dev, tests
   - pip:  # dev, tests, docs
       - DFO-LS  # dev, tests
       - Py-BOBYQA  # dev, tests
diff --git a/.envs/testenv-others.yml b/.envs/testenv-others.yml
index 475f786c2..33093b602 100644
--- a/.envs/testenv-others.yml
+++ b/.envs/testenv-others.yml
@@ -15,13 +15,13 @@ dependencies:
   - click  # run, tests
   - cloudpickle  # run, tests
   - joblib  # run, tests
-  - numba  # run, tests
   - numpy>=1.17.0  # run, tests
   - pandas  # run, tests
   - plotly  # run, tests
   - pybaum >= 0.1.2  # run, tests
   - scipy>=1.2.1  # run, tests
   - sqlalchemy  # run, tests
+  - tranquilo>=0.0.4  # dev, tests
   - pip:  # dev, tests, docs
       - DFO-LS  # dev, tests
       - Py-BOBYQA  # dev, tests
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 895c6cfa5..cae534bd7 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -21,9 +21,9 @@ jobs:
         os:
           - ubuntu-latest
         python-version:
-          - '3.8'
           - '3.9'
           - '3.10'
+          - '3.11'
     steps:
       - uses: actions/checkout@v3
       - name: create build environment
@@ -54,9 +54,9 @@ jobs:
           - macos-latest
           - windows-latest
         python-version:
-          - '3.8'
           - '3.9'
           - '3.10'
+          - '3.11'
     steps:
       - uses: actions/checkout@v3
       - name: create build environment
diff --git a/CHANGES.md b/CHANGES.md
index e399c1891..1c6d797ba 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -4,6 +4,27 @@ This is a record of all past estimagic releases and what went into them in rever
 chronological order. We follow [semantic versioning](https://semver.org/) and all
 releases are available on [Anaconda.org](https://anaconda.org/OpenSourceEconomics/estimagic).
 
+Following the [scientific python guidelines](https://scientific-python.org/specs/spec-0000/)
+we drop the official support for Python 3.8.
+
+
+## 0.4.6
+
+This release drastically improves the optimizer benchmarking capabilities, especially
+with noisy functions and parallel optimizers. It makes tranquilo and numba optional
+dependencies and is the first version of estimagic to be compatible with Python
+3.11.
+
+
+- {gh}`464` Makes tranquilo and numba optional dependencies ({ghuser}`janosg`)
+- {gh}`461` Updates docstrings for procss_benchmark_results ({ghuser}`segsell`)
+- {gh}`460` Fixes several bugs in the processing of benchmark results with noisy
+  functions ({ghuser}`janosg`)
+- {gh}`459` Prepares benchmarking functionality for parallel optimizers
+  ({ghuser}`mpetrosian` and {ghuser}`janosg`)
+- {gh}`457` Removes some unused files ({ghuser}`segsell`)
+- {gh}`455` Improves a local pre-commit hook ({ghuser}`ChristianZimpelmann`)
+
 
 ## 0.4.5
 
diff --git a/docs/rtd_environment.yml b/docs/rtd_environment.yml
index 5d38e9afd..68eed648b 100644
--- a/docs/rtd_environment.yml
+++ b/docs/rtd_environment.yml
@@ -17,7 +17,6 @@ dependencies:
   - ipython_genutils
   - myst-nb
   - pydata-sphinx-theme<=0.12.0
-  - numba
   - pybaum
   - matplotlib
   - seaborn
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 58abca26c..ba12b8dfb 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -77,6 +77,7 @@
     "petsc4py",
     "statsmodels",
     "numba",
+    "tranquilo",
 ]
 
 extlinks = {
diff --git a/environment.yml b/environment.yml
index 3b7b6e85a..f326dc071 100644
--- a/environment.yml
+++ b/environment.yml
@@ -22,7 +22,6 @@ dependencies:
   - click  # run, tests
   - cloudpickle  # run, tests
   - joblib  # run, tests
-  - numba  # run, tests
   - numpy>=1.17.0  # run, tests
   - pandas  # run, tests
   - plotly  # run, tests
@@ -35,6 +34,7 @@ dependencies:
   - sphinx-copybutton  # docs
   - sphinx-panels  # docs
   - sphinxcontrib-bibtex  # docs
+  - tranquilo>=0.0.4  # dev, tests
   - pip:  # dev, tests, docs
       - DFO-LS  # dev, tests
       - Py-BOBYQA  # dev, tests
diff --git a/pyproject.toml b/pyproject.toml
index b5b3f2483..5a4c08015 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -91,6 +91,7 @@ filterwarnings = [
     "ignore:Widget._widget_types is deprecated",
     "ignore:Widget.widget_types is deprecated",
     "ignore:Widget.widgets is deprecated",
+    "ignore:Parallelization together with",
 ]
 addopts = ["--doctest-modules"]
 markers = [
diff --git a/setup.cfg b/setup.cfg
index ab268cd59..6c16c830a 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -39,7 +39,6 @@ install_requires =
     click
     cloudpickle
     joblib
-    numba
     numpy>=1.17.0
     pandas
     plotly
diff --git a/src/estimagic/algorithms.py b/src/estimagic/algorithms.py
new file mode 100644
index 000000000..56d787f55
--- /dev/null
+++ b/src/estimagic/algorithms.py
@@ -0,0 +1,46 @@
+import inspect
+
+from estimagic.optimization import (
+    bhhh,
+    cyipopt_optimizers,
+    fides_optimizers,
+    nag_optimizers,
+    neldermead,
+    nlopt_optimizers,
+    pounders,
+    pygmo_optimizers,
+    scipy_optimizers,
+    simopt_optimizers,
+    tao_optimizers,
+    tranquilo,
+)
+
+MODULES = [
+    cyipopt_optimizers,
+    fides_optimizers,
+    nag_optimizers,
+    nlopt_optimizers,
+    pygmo_optimizers,
+    scipy_optimizers,
+    simopt_optimizers,
+    tao_optimizers,
+    bhhh,
+    neldermead,
+    pounders,
+    tranquilo,
+]
+
+ALL_ALGORITHMS = {}
+AVAILABLE_ALGORITHMS = {}
+for module in MODULES:
+    func_dict = dict(inspect.getmembers(module, inspect.isfunction))
+    for name, func in func_dict.items():
+        if hasattr(func, "_algorithm_info"):
+            ALL_ALGORITHMS[name] = func
+            if func._algorithm_info.is_available:
+                AVAILABLE_ALGORITHMS[name] = func
+
+
+GLOBAL_ALGORITHMS = [
+    name for name, func in ALL_ALGORITHMS.items() if func._algorithm_info.is_global
+]
diff --git a/src/estimagic/benchmarking/cartis_roberts.py b/src/estimagic/benchmarking/cartis_roberts.py
index 8554f01f0..293ed1272 100644
--- a/src/estimagic/benchmarking/cartis_roberts.py
+++ b/src/estimagic/benchmarking/cartis_roberts.py
@@ -16,8 +16,16 @@
 """
 from functools import partial
 
-from numba import njit
 import numpy as np
+from estimagic.config import IS_NUMBA_INSTALLED
+
+if IS_NUMBA_INSTALLED:
+    from numba import njit
+else:
+
+    def njit(func):
+        return func
+
 
 from estimagic.benchmarking.more_wild import (
     brown_almost_linear,
diff --git a/src/estimagic/benchmarking/run_benchmark.py b/src/estimagic/benchmarking/run_benchmark.py
index 4b0e3bc99..e02203a95 100644
--- a/src/estimagic/benchmarking/run_benchmark.py
+++ b/src/estimagic/benchmarking/run_benchmark.py
@@ -10,7 +10,7 @@
 import numpy as np
 
 from estimagic import batch_evaluators
-from estimagic.optimization import AVAILABLE_ALGORITHMS
+from estimagic.algorithms import AVAILABLE_ALGORITHMS
 from estimagic.optimization.optimize import minimize
 from pybaum import tree_just_flatten
 from estimagic.parameters.tree_registry import get_registry
diff --git a/src/estimagic/config.py b/src/estimagic/config.py
index 165cf0e4e..dd0fcde1c 100644
--- a/src/estimagic/config.py
+++ b/src/estimagic/config.py
@@ -87,6 +87,22 @@
     IS_SIMOPT_INSTALLED = True
 
 
+try:
+    import tranquilo  # noqa: F401
+except ImportError:
+    IS_TRANQUILO_INSTALLED = False
+else:
+    IS_TRANQUILO_INSTALLED = True
+
+
+try:
+    import numba  # noqa: F401
+except ImportError:
+    IS_NUMBA_INSTALLED = False
+else:
+    IS_NUMBA_INSTALLED = True
+
+
 # =================================================================================
 # Dashboard Defaults
 # =================================================================================
diff --git a/src/estimagic/optimization/__init__.py b/src/estimagic/optimization/__init__.py
index c5e93112a..e69de29bb 100644
--- a/src/estimagic/optimization/__init__.py
+++ b/src/estimagic/optimization/__init__.py
@@ -1,46 +0,0 @@
-import inspect
-
-from estimagic.optimization import (
-    bhhh,
-    cyipopt_optimizers,
-    fides_optimizers,
-    nag_optimizers,
-    neldermead,
-    nlopt_optimizers,
-    pounders,
-    pygmo_optimizers,
-    scipy_optimizers,
-    simopt_optimizers,
-    tao_optimizers,
-)
-from estimagic.optimization.tranquilo import tranquilo
-
-MODULES = [
-    cyipopt_optimizers,
-    fides_optimizers,
-    nag_optimizers,
-    nlopt_optimizers,
-    pygmo_optimizers,
-    scipy_optimizers,
-    simopt_optimizers,
-    tao_optimizers,
-    bhhh,
-    neldermead,
-    pounders,
-    tranquilo,
-]
-
-ALL_ALGORITHMS = {}
-AVAILABLE_ALGORITHMS = {}
-for module in MODULES:
-    func_dict = dict(inspect.getmembers(module, inspect.isfunction))
-    for name, func in func_dict.items():
-        if hasattr(func, "_algorithm_info"):
-            ALL_ALGORITHMS[name] = func
-            if func._algorithm_info.is_available:
-                AVAILABLE_ALGORITHMS[name] = func
-
-
-GLOBAL_ALGORITHMS = [
-    name for name, func in ALL_ALGORITHMS.items() if func._algorithm_info.is_global
-]
diff --git a/src/estimagic/optimization/get_algorithm.py b/src/estimagic/optimization/get_algorithm.py
index ef07978aa..553465ba8 100644
--- a/src/estimagic/optimization/get_algorithm.py
+++ b/src/estimagic/optimization/get_algorithm.py
@@ -9,7 +9,7 @@
     list_of_dicts_to_dict_of_lists,
 )
 from estimagic.logging.write_to_database import update_row
-from estimagic.optimization import ALL_ALGORITHMS
+from estimagic.algorithms import ALL_ALGORITHMS
 from estimagic.utilities import propose_alternatives
 
 
diff --git a/src/estimagic/optimization/subsolvers/_conjugate_gradient_fast.py b/src/estimagic/optimization/subsolvers/_conjugate_gradient_fast.py
deleted file mode 100644
index 6e46215bb..000000000
--- a/src/estimagic/optimization/subsolvers/_conjugate_gradient_fast.py
+++ /dev/null
@@ -1,138 +0,0 @@
-"""Implementation of the Conjugate Gradient algorithm."""
-import numpy as np
-from numba import njit
-
-
-@njit
-def minimize_trust_cg_fast(
-    model_gradient, model_hessian, trustregion_radius, gtol_abs, gtol_rel
-):
-    """Minimize the quadratic subproblem via (standard) conjugate gradient.
-
-    Solve the trust-region quadratic subproblem:
-      min_x   g.T @ x + 0.5 * x.T @ H @ x
-        s.t.   ||x|| <= trustregion_radius
-
-    approximately, where g denotes the gradient and H the hessian of the quadratic
-    model (i.e. the linear terms and square_terms), respectively.
-
-    Args:
-        model_gradient (np.ndarray): 1d array of shape (n,) containing the
-            gradient (i.e. linear terms) of the quadratic model.
-        model_hessian (np.ndarray): 2d array of shape (n, n) containing the
-            hessian (i.e .square terms) of the quadratic model.
-        trustregion_radius (float): Radius of the trust-region.
-        gtol_abs (float): Convergence tolerance for the absolute gradient norm.
-        gtol_rel (float): Convergence tolerance for the relative gradient norm.
-
-    Returns:
-        np.ndarray: Solution vector of shape (n,).
-
-    """
-    n = len(model_gradient)
-    max_iter = n * 2
-    x_candidate = np.zeros(n)
-
-    residual = model_gradient
-    direction = -model_gradient
-
-    gradient_norm = np.linalg.norm(residual)
-    stop_tol = max(gtol_abs, gtol_rel * gradient_norm)
-
-    for _ in range(max_iter):
-        if gradient_norm <= stop_tol:
-            break
-
-        square_terms = direction.T @ model_hessian @ direction
-
-        distance_to_boundary = _get_distance_to_trustregion_boundary(
-            x_candidate, direction, trustregion_radius
-        )
-
-        # avoid divide by zero warning
-        if square_terms > 0:
-            step_size = (residual @ residual) / square_terms
-        else:
-            step_size = np.inf
-
-        if square_terms <= 0 or step_size > distance_to_boundary:
-            x_candidate = x_candidate + distance_to_boundary * direction
-            break
-
-        x_candidate, residual, direction = _update_vectors_for_next_iteration(
-            x_candidate, residual, direction, model_hessian, step_size
-        )
-        gradient_norm = np.linalg.norm(residual)
-
-    return x_candidate
-
-
-@njit
-def _update_vectors_for_next_iteration(
-    x_candidate, residual, direction, hessian, alpha
-):
-    """Update candidate, residual, and direction vectors for the next iteration.
-
-    Args:
-        x_candidate (np.ndarray): Candidate vector of shape (n,).
-        residual (np.ndarray): Array of residuals of shape (n,). The residual vector
-            is defined as `r = Ax - b`, where `A` denotes the hessian matrix and `b` the
-            gradient vector of the quadratic trust-region subproblem.
-            `r` is equivalent to the first derivative of the quadratic subproblem.
-        direction (np.ndarray): Direction vector of shape (n,).
-
-    Returns:
-            x_candidate (np.ndarray): Updated candidate vector of shape (n,).
-            residual_new (np.ndarray): Updated array of residuals of shape (n,).
-            direction (np.darray): Updated direction vector of shape (n,).
-
-    """
-    residual_new = np.zeros(len(residual))
-    nom = 0.0
-    denom = 0.0
-    for i in range(len(x_candidate)):
-        x_candidate[i] = x_candidate[i] + alpha * direction[i]
-        temp = 0
-        for j in range(len(x_candidate)):
-            temp += hessian[i, j] * direction[j]
-        residual_new[i] = temp * alpha + residual[i]
-
-        nom += residual_new[i] * residual_new[i]
-        denom += residual[i] * residual[i]
-    beta = nom / denom
-    direction = -residual_new + beta * direction
-
-    return x_candidate, residual_new, direction
-
-
-@njit
-def _get_distance_to_trustregion_boundary(candidate, direction, radius):
-    """Compute the distance of the candidate vector to trustregion boundary.
-
-    The positive distance sigma is defined in Eculidean norm, as follows:
-
-        || x + sigma * d || = radius
-
-    where x denotes the candidate vector, and d the direction vector.
-
-    Args:
-        candidate(np.ndarray): Candidate vector of shape (n,).
-        direction (np.ndarray): Direction vector of shape (n,).
-        radius (floar): Radius of the trust-region
-
-    Returns:
-        float: The candidate vector's distance to the trustregion
-            boundary.
-
-    """
-    cc = 0
-    cd = 0
-    dd = 0
-    for i in range(len(direction)):
-        cc += candidate[i] ** 2
-        dd += direction[i] ** 2
-        cd += candidate[i] * direction[i]
-    sigma = -cd + np.sqrt(cd * cd + dd * (radius**2 - cc))
-    sigma = sigma / dd
-
-    return sigma
diff --git a/src/estimagic/optimization/subsolvers/_steihaug_toint_fast.py b/src/estimagic/optimization/subsolvers/_steihaug_toint_fast.py
deleted file mode 100644
index 1e87fcfad..000000000
--- a/src/estimagic/optimization/subsolvers/_steihaug_toint_fast.py
+++ /dev/null
@@ -1,207 +0,0 @@
-"""Implementation of the Steihaug-Toint Conjugate Gradient algorithm."""
-import numpy as np
-from numba import njit
-
-
-@njit
-def minimize_trust_stcg_fast(model_gradient, model_hessian, trustregion_radius):
-    """Minimize the quadratic subproblem via Steihaug-Toint conjugate gradient.
-
-    Solve the quadratic trust-region subproblem:
-
-      min_x   g.T @ x + 0.5 * x.T @ hess @ x
-        s.t.   ||x|| <= trustregion_radius
-
-    approximately, where g denotes the gradient and hess the hessian of the quadratic
-    model (i.e. the linear terms and square_terms), respectively.
-
-    The Steihaug-Toint conjugate gradient method is based on Steihaug
-    (:cite:`Steihaug1983`) and Toint (:cite:`Toint1981`).
-
-    Args:
-        model_gradient (np.ndarray): 1d array of shape (n,) containing the
-            gradient (i.e. linear terms) of the quadratic model.
-        model_hessian (np.ndarray): 2d array of shape (n, n) containing the
-            hessian (i.e .square terms) of the quadratic model.
-        trustregion_radius (float): Radius of the trust-region.
-
-    Returns:
-        np.ndarray: Solution vector of shape (n,).
-
-    """
-    abstol = 1e-50
-    rtol = 1e-5
-    divtol = 10_000
-
-    n = len(model_gradient)
-    radius_sq = trustregion_radius**2
-
-    residual = -model_gradient
-    rr = residual.T @ residual
-
-    x_candidate = np.zeros(n)
-
-    max_iter = min(n, 10_000)
-
-    z = np.linalg.pinv(model_hessian) @ residual
-    rz = residual @ residual
-
-    n_iter = 0
-    diverged = False
-    converged = False
-
-    norm_r = np.sqrt(rr)
-    norm_r0 = norm_r
-    if rtol * norm_r0 >= abstol:
-        ttol = rtol * norm_r0
-    else:
-        ttol = abstol
-
-    converged, diverged = _check_convergence(
-        norm_r, norm_r0, abstol, ttol, divtol, converged, diverged
-    )
-
-    p = model_hessian @ z
-    z = model_hessian @ p
-    n_iter += 1
-
-    kappa = p @ z
-
-    dp = 0
-    norm_d = 0
-    norm_p = p @ p
-
-    if kappa <= 0:
-        converged = True
-
-        x_candidate, z, n_iter = _update_candidate_vector_and_iteration_number(
-            x_candidate,
-            residual,
-            p,
-            z,
-            model_gradient,
-            model_hessian,
-            rr,
-            trustregion_radius,
-            norm_p,
-            n_iter,
-        )
-
-    for _ in range(max_iter):
-        alpha = rz / kappa
-        norm_dp1 = norm_d + alpha * (2 * dp + alpha * norm_p)
-
-        if trustregion_radius != 0 and norm_dp1 >= radius_sq:
-            converged = True
-
-            if norm_p > 0:
-                x_candidate = _take_step_to_trustregion_boundary(
-                    x_candidate, p, dp, radius_sq, norm_d, norm_p
-                )
-
-            break
-
-        x_candidate = x_candidate + alpha * p
-        residual = residual - alpha * (model_hessian @ p)
-
-        norm_d = x_candidate @ x_candidate
-
-        rzm1 = rz
-        rz = residual @ residual
-
-        norm_r = np.linalg.norm(residual)
-
-        converged, diverged = _check_convergence(
-            norm_r, norm_r0, abstol, ttol, divtol, converged, diverged
-        )
-
-        if converged or diverged:
-            break
-
-        beta = rz / rzm1
-
-        if abs(beta) <= 0:
-            diverged = True
-            break
-
-        if n_iter >= max_iter:
-            diverged = True
-            break
-
-        p = residual + beta * p
-
-        dp = x_candidate @ p
-        norm_p = p @ p
-
-        z = model_hessian @ p
-        kappa = p @ z
-        n_iter += 1
-
-        if kappa <= 0:
-            converged = True
-
-            if trustregion_radius != 0 and norm_p > 0:
-                x_candidate = _take_step_to_trustregion_boundary(
-                    x_candidate, p, dp, radius_sq, norm_d, norm_p
-                )
-
-            break
-
-    return x_candidate
-
-
-@njit
-def _update_candidate_vector_and_iteration_number(
-    x_candidate,
-    residual,
-    p,
-    z,
-    model_gradient,
-    model_hessian,
-    rr,
-    radius,
-    norm_p,
-    n_iter,
-):
-    """Update candidate, z vector, and iteration number."""
-    radius_sq = radius**2
-
-    if radius != 0 and norm_p > 0:
-        # Take step to boundary
-        step = np.sqrt(radius_sq / norm_p)
-        x_candidate = x_candidate + step * p
-
-    elif radius != 0:
-        if radius_sq >= rr:
-            alpha = 1.0
-        else:
-            alpha = np.sqrt(radius_sq / rr)
-
-        x_candidate = x_candidate + alpha * residual
-        z = model_gradient - 0.5 * (model_hessian @ x_candidate)
-
-        n_iter += 1
-
-    return x_candidate, z, n_iter
-
-
-@njit
-def _take_step_to_trustregion_boundary(x_candidate, p, dp, radius_sq, norm_d, norm_p):
-    """Take step to trust-region boundary."""
-    step = (np.sqrt(dp * dp + norm_p * (radius_sq - norm_d)) - dp) / norm_p
-    x_candidate = x_candidate + step * p
-
-    return x_candidate
-
-
-@njit
-def _check_convergence(
-    rnorm, rnorm0, abstol, ttol, divtol, converged, diverged  # noqa: ARG001
-):
-    """Check for convergence."""
-    if rnorm <= ttol:
-        converged = True
-    elif rnorm >= divtol * rnorm0:
-        diverged = True
-
-    return converged, diverged
diff --git a/src/estimagic/optimization/subsolvers/_trsbox_fast.py b/src/estimagic/optimization/subsolvers/_trsbox_fast.py
deleted file mode 100644
index c5cf22533..000000000
--- a/src/estimagic/optimization/subsolvers/_trsbox_fast.py
+++ /dev/null
@@ -1,658 +0,0 @@
-"""Implementation of the quadratic trustregion solver TRSBOX."""
-import numpy as np
-from numba import njit
-
-
-@njit
-def minimize_trust_trsbox_fast(
-    model_gradient,
-    model_hessian,
-    trustregion_radius,
-    lower_bounds,
-    upper_bounds,
-):
-    """Minimize a qaudratic trust-region subproblem using the trsbox algorithm.
-
-    Solve the quadratic trust-region subproblem:
-      min_x   g.T @ x + 0.5 * x.T @ hess @ x
-        s.t.   ||x|| <= trustregion_radius
-               lower_bounds <= x <= upper_bounds
-
-    approximately, using an active-set approach, where g denotes the gradient
-    and hess the hessian of the quadratic model (i.e. the linear terms and
-    square_terms), respectively.
-
-    The subproblem is assumed to be centered, i.e. ``x_center`` is the zero vector.
-    The trsbox algorithm applies a conjugate gradient step in its main loop.
-
-    This implementation of the quadratic trsbox algorithm is based on
-    M. J. D. Powell (2009) "The BOBYQA algorithm for bound constrained
-    optimization without derivatives." (cite:`Powell2009`).
-
-    Some modifications to the termination conditions are taken from the
-    DFBOLS method by Zhang et al. (:cite:`Zhang2010`).
-
-    Args:
-        model_gradient (np.ndarray): 1d array of shape (n,) containing the
-            gradient (i.e. linear terms) of the quadratic model.
-        model_hessian (np.ndarray): 2d array of shape (n, n) containing the
-            hessian (i.e .square terms) of the quadratic model.
-        lower_bounds (np.ndarray): 1d array of shape (n,) with lower bounds
-            for the parameter vector x.
-        upper_bounds (np.ndarray): 1d array of shape (n,) with upper bounds
-            for the parameter vector x.
-        trustregion_radius (float): Radius of the trust-region.
-    Returns:
-        np.ndarray: Solution vector for the quadratic trust-region subproblem
-            of shape (n,).
-
-    """
-    n = len(model_gradient)
-    x_center = np.zeros(n)
-
-    n_iter = 0
-    n_fixed_variables = 0
-
-    x_bounded = np.zeros(n)
-    x_bounded[(x_center <= lower_bounds) & (model_gradient >= 0.0)] = -1
-    x_bounded[(x_center >= upper_bounds) & (model_gradient <= 0.0)] = 1
-
-    x_candidate = np.zeros(n)
-    gradient_projected = np.zeros(n)
-    gradient_candidate = model_gradient
-
-    total_reduction = np.zeros(1)
-    delta_sq = trustregion_radius**2
-    curve_min = -1.0
-    beta = 0
-
-    need_alt_trust_step = False
-    max_iter = 100 * n**2
-
-    # Main Conjugate Gradient loop
-    for _ in range(max_iter):
-        gradient_projected[x_bounded != 0] = 0
-        if beta == 0:
-            gradient_projected[x_bounded == 0] = -gradient_candidate[x_bounded == 0]
-        else:
-            gradient_projected[x_bounded == 0] = (
-                beta * gradient_projected[x_bounded == 0]
-                - gradient_candidate[x_bounded == 0]
-            )
-        gradient_projected_sumsq = gradient_projected @ gradient_projected
-
-        if gradient_projected_sumsq == 0:
-            need_alt_trust_step = False
-            break
-
-        if beta == 0:
-            gradient_sumsq = gradient_projected_sumsq
-            max_iter = n_iter + n - n_fixed_variables
-
-        if n_iter == 0:
-            gradient_sumsq_initial = gradient_sumsq
-
-        if (
-            gradient_sumsq <= 1.0e-6 * gradient_sumsq_initial
-            and gradient_sumsq <= 1.0e-18
-        ) or (
-            gradient_sumsq * np.array([delta_sq]) <= 1.0e-6 * total_reduction**2
-            and gradient_sumsq * np.array([delta_sq]) <= 1.0e-18
-        ):
-            need_alt_trust_step = False
-            break
-
-        hess_g = model_hessian @ gradient_projected
-        g_x = gradient_projected[x_bounded == 0] @ x_candidate[x_bounded == 0]
-        g_hess_g = gradient_projected[x_bounded == 0] @ hess_g[x_bounded == 0]
-        raw_distance = (
-            np.array([delta_sq])
-            - x_candidate[x_bounded == 0] @ x_candidate[x_bounded == 0]
-        )
-
-        if raw_distance <= 0:
-            need_alt_trust_step = True
-            break
-        step_len, distance_to_boundary = _take_unconstrained_step_up_to_boundary(
-            raw_distance, gradient_sumsq, gradient_projected_sumsq, g_x, g_hess_g
-        )
-
-        if step_len <= 1.0e-30:
-            need_alt_trust_step = False
-            break
-
-        step_len, index_bound_active = _take_constrained_step_up_to_boundary(
-            x_candidate, gradient_projected, step_len, lower_bounds, upper_bounds
-        )
-        current_reduction = 0.0
-        if step_len > 0:
-            n_iter += 1
-            (
-                x_candidate,
-                gradient_candidate,
-                current_reduction,
-                total_reduction,
-                curve_min,
-                gradient_sumsq,
-                gradient_sumsq_old,
-            ) = _update_candidate_vectors_and_reduction(
-                x_candidate,
-                x_bounded,
-                gradient_candidate,
-                gradient_projected,
-                step_len,
-                total_reduction,
-                curve_min,
-                index_bound_active,
-                gradient_projected_sumsq,
-                gradient_sumsq,
-                g_hess_g,
-                hess_g,
-            )
-
-        if index_bound_active != -1:
-            n_fixed_variables += 1
-            if gradient_projected[index_bound_active] >= 0:
-                x_bounded[index_bound_active] = 1
-            else:
-                x_bounded[index_bound_active] = -1
-
-            delta_sq = delta_sq - x_candidate[index_bound_active] ** 2
-            if delta_sq <= 0:
-                need_alt_trust_step = True
-                break
-
-            beta = 0
-            continue
-
-        if step_len >= distance_to_boundary:
-            need_alt_trust_step = True
-            break
-
-        if n_iter == max_iter or current_reduction <= 1.0e-6 * total_reduction:
-            need_alt_trust_step = False
-            break
-
-        beta = gradient_sumsq / gradient_sumsq_old
-        continue
-
-    if need_alt_trust_step:
-        curve_min = 0
-        x_candidate = _perform_alternative_trustregion_step(
-            x_candidate=x_candidate,
-            x_bounded=x_bounded,
-            gradient_candidate=gradient_candidate,
-            model_hessian=model_hessian,
-            lower_bounds=lower_bounds,
-            upper_bounds=upper_bounds,
-            n_fixed_variables=n_fixed_variables,
-            total_reduction=total_reduction,
-        )
-    else:
-        x_candidate = _apply_bounds_to_candidate_vector(
-            x_candidate, x_bounded, lower_bounds, upper_bounds
-        )
-
-    return x_candidate
-
-
-@njit
-def _perform_alternative_trustregion_step(
-    x_candidate,
-    x_bounded,
-    gradient_candidate,
-    model_hessian,
-    lower_bounds,
-    upper_bounds,
-    n_fixed_variables,
-    total_reduction,
-):
-    """Perform the alternative trust-region step."""
-    n = len(x_candidate)
-    max_iter = 100 * n**2
-
-    for _ in range(max_iter):
-        if n_fixed_variables >= n - 1:
-            x_candidate = _apply_bounds_to_candidate_vector(
-                x_candidate, x_bounded, lower_bounds, upper_bounds
-            )
-            break
-
-        search_direction = np.zeros(n)
-        search_direction[x_bounded == 0] = x_candidate[x_bounded == 0]
-
-        x_reduced = x_candidate[x_bounded == 0] @ x_candidate[x_bounded == 0]
-        x_grad = x_candidate[x_bounded == 0] @ gradient_candidate[x_bounded == 0]
-        gradient_reduced = (
-            gradient_candidate[x_bounded == 0] @ gradient_candidate[x_bounded == 0]
-        )
-        hess_s = model_hessian @ search_direction
-        hessian_reduced = hess_s
-
-        restart_alt_loop = False
-
-        for _ in range(max_iter):
-            raw_reduction = gradient_reduced * x_reduced - x_grad**2
-            if raw_reduction <= 1.0e-4 * total_reduction**2:
-                restart_alt_loop = False
-                break
-
-            search_direction, s_norm = _compute_new_search_direction_and_norm(
-                x_candidate,
-                x_bounded,
-                x_reduced,
-                gradient_candidate,
-                x_grad,
-                raw_reduction,
-            )
-
-            (
-                x_bounded,
-                index_active_bound,
-                n_fixed_variables,
-                active_bound,
-                bound_on_tangent,
-                free_variable_reached_bound,
-            ) = _calc_upper_bound_on_tangent(
-                x_candidate,
-                search_direction,
-                x_bounded,
-                lower_bounds,
-                upper_bounds,
-                n_fixed_variables,
-            )
-
-            if free_variable_reached_bound:
-                restart_alt_loop = True
-                break
-
-            hess_s = model_hessian @ search_direction
-
-            s_hess_s = np.sum(search_direction[x_bounded == 0] * hess_s[x_bounded == 0])
-            x_hess_s = np.sum(x_candidate[x_bounded == 0] * hess_s[x_bounded == 0])
-            x_hess_x = np.sum(
-                x_candidate[x_bounded == 0] * hessian_reduced[x_bounded == 0]
-            )
-
-            (
-                previous_reduction,
-                next_reduction,
-                max_reduction,
-                tangent,
-                index_angle_greatest_reduction,
-                n_angles,
-            ) = _calc_greatest_criterion_reduction(
-                bound_on_tangent, s_hess_s, x_hess_s, x_hess_x, x_grad, s_norm
-            )
-
-            if index_angle_greatest_reduction == -1:
-                restart_alt_loop = False
-                break
-
-            if index_angle_greatest_reduction < n_angles - 1:
-                tangent = _update_tangent(
-                    index_angle_greatest_reduction,
-                    bound_on_tangent,
-                    n_angles,
-                    next_reduction,
-                    previous_reduction,
-                    max_reduction,
-                )
-
-            cosine = (1.0 - tangent**2) / (1.0 + tangent**2)
-            sine = 2.0 * tangent / (1.0 + tangent**2)
-            current_reduction = _calc_new_reduction(
-                tangent, sine, s_hess_s, x_hess_x, x_hess_s, x_grad, s_norm
-            )
-
-            if current_reduction <= 0.0:
-                restart_alt_loop = False
-                break
-
-            (
-                x_candidate,
-                gradient_candidate,
-                x_grad,
-                gradient_reduced,
-                hessian_reduced,
-            ) = _update_candidate_vectors_and_reduction_alt_step(
-                x_candidate,
-                search_direction,
-                x_bounded,
-                gradient_candidate,
-                cosine,
-                sine,
-                hess_s,
-                hessian_reduced,
-            )
-
-            total_reduction = total_reduction + current_reduction
-            if (
-                index_active_bound.size > 0
-                and index_angle_greatest_reduction == n_angles - 1
-            ):
-                n_fixed_variables += 1
-                x_bounded[index_active_bound] = active_bound
-                restart_alt_loop = True
-                break
-
-            if current_reduction <= 0.01 * total_reduction:
-                restart_alt_loop = False
-                break
-
-            continue
-
-        if restart_alt_loop:
-            continue
-        else:
-            break
-
-    x_candidate = _apply_bounds_to_candidate_vector(
-        x_candidate, x_bounded, lower_bounds, upper_bounds
-    )
-
-    return x_candidate
-
-
-@njit
-def _apply_bounds_to_candidate_vector(
-    x_candidate,
-    x_bounded,
-    lower_bounds,
-    upper_bounds,
-):
-    """Force candidate vector to lie within bounds."""
-    x_candidate_new = np.zeros(len(x_candidate))
-    for i in range(len(x_candidate)):
-        if x_candidate[i] <= lower_bounds[i]:
-            x_candidate_new[i] = lower_bounds[i]
-        elif x_candidate[i] >= upper_bounds[i]:
-            x_candidate_new[i] = upper_bounds[i]
-        else:
-            x_candidate_new[i] = x_candidate[i]
-    x_candidate_new[x_bounded == -1] = lower_bounds[x_bounded == -1]
-    x_candidate_new[x_bounded == 1] = upper_bounds[x_bounded == 1]
-
-    return x_candidate_new
-
-
-@njit
-def _take_unconstrained_step_up_to_boundary(
-    raw_distance, gradient_sumsq, gradient_projected_sumsq, g_x, g_hess_g
-):
-    """Take unconstrained step, ignoring bounds, up to boundary."""
-    temp = np.sqrt(gradient_projected_sumsq * raw_distance + g_x**2)
-    if g_x >= 0:
-        distance_to_boundary = raw_distance / (temp + g_x)
-    else:
-        distance_to_boundary = (temp - g_x) / gradient_projected_sumsq
-    if g_hess_g <= 0:
-        step_len = distance_to_boundary[0]
-    else:
-        if distance_to_boundary <= gradient_sumsq / g_hess_g:
-            step_len = distance_to_boundary[0]
-        else:
-            step_len = gradient_sumsq / g_hess_g
-
-    return step_len, distance_to_boundary
-
-
-@njit
-def _update_candidate_vectors_and_reduction(
-    x_candidate,
-    x_bounded,
-    gradient_candidate,
-    gradient_projected,
-    step_len,
-    total_reduction,
-    curve_min,
-    index_bound_active,
-    gradient_projected_sumsq,
-    gradient_sumsq,
-    g_hess_g,
-    hess_g,
-):
-    """Update candidate vectors and the associated criterion reduction."""
-    current_min = g_hess_g / gradient_projected_sumsq
-
-    if index_bound_active == -1 and current_min > 0:
-        if curve_min != -1.0:
-            curve_min = min(curve_min, current_min)
-        else:
-            curve_min = current_min
-
-    gradient_sumsq_old = gradient_sumsq
-
-    gradient_candidate = gradient_candidate + step_len * hess_g
-    x_candidate = x_candidate + step_len * gradient_projected
-
-    gradient_sumsq = (
-        gradient_candidate[x_bounded == 0] @ gradient_candidate[x_bounded == 0]
-    )
-
-    current_reduction = max(
-        step_len * (gradient_sumsq_old - 0.5 * step_len * g_hess_g), 0
-    )
-    total_reduction = total_reduction + current_reduction
-
-    return (
-        x_candidate,
-        gradient_candidate,
-        current_reduction,
-        total_reduction,
-        curve_min,
-        gradient_sumsq,
-        gradient_sumsq_old,
-    )
-
-
-@njit
-def _take_constrained_step_up_to_boundary(
-    x_candidate, gradient_projected, step_len, lower_bounds, upper_bounds
-):
-    """Reduce step length, where boundary is hit, to preserve simple bounds."""
-    index_bound_active = -1
-    for i in range(len(x_candidate)):
-        if gradient_projected[i] != 0:
-            if gradient_projected[i] > 0:
-                step_len_constr = (
-                    upper_bounds[i] - x_candidate[i]
-                ) / gradient_projected[i]
-            else:
-                step_len_constr = (
-                    lower_bounds[i] - x_candidate[i]
-                ) / gradient_projected[i]
-            if step_len_constr < step_len:
-                step_len = step_len_constr
-                index_bound_active = i
-
-    return step_len, index_bound_active
-
-
-@njit
-def _calc_upper_bound_on_tangent(
-    x_candidate,
-    search_direction,
-    x_bounded,
-    lower_bounds,
-    upper_bounds,
-    n_fixed_variables,
-):
-    """Calculate upper bound on tangent of half the angle to the boundary."""
-    bound_on_tangent = 1
-    free_variable_reached_bound = False
-
-    for i in range(len(x_candidate)):
-        if x_bounded[i] == 0:
-            lower_bound_centered = x_candidate[i] - lower_bounds[i]
-            upper_bound_centered = upper_bounds[i] - x_candidate[i]
-
-            if lower_bound_centered <= 0.0:
-                n_fixed_variables += 1
-                x_bounded[i] = -1
-                free_variable_reached_bound = True
-                break
-
-            elif upper_bound_centered <= 0.0:
-                n_fixed_variables += 1
-                x_bounded[i] = 1
-                free_variable_reached_bound = True
-                break
-
-            ssq = x_candidate[i] ** 2 + search_direction[i] ** 2
-
-            ssq_lower = ssq - lower_bounds[i] ** 2
-            if ssq_lower > 0.0:
-                ssq_lower = np.sqrt(ssq_lower) - search_direction[i]
-                if bound_on_tangent * ssq_lower > lower_bound_centered:
-                    bound_on_tangent = lower_bound_centered / ssq_lower
-                    index_active_bound = np.array([i])
-                    active_bound = np.array([-1])
-
-            ssq_upper = ssq - upper_bounds[i] ** 2
-            if ssq_upper > 0.0:
-                ssq_upper = np.sqrt(ssq_upper) + search_direction[i]
-                if bound_on_tangent * ssq_upper > upper_bound_centered:
-                    bound_on_tangent = upper_bound_centered / ssq_upper
-                    index_active_bound = np.array([i])
-                    active_bound = np.array([1])
-
-    return (
-        x_bounded,
-        index_active_bound,
-        n_fixed_variables,
-        active_bound,
-        bound_on_tangent,
-        free_variable_reached_bound,
-    )
-
-
-@njit
-def _calc_greatest_criterion_reduction(
-    bound_on_tangent, s_hess_s, x_hess_s, x_hess_x, x_grad, s_norm
-):
-    """Calculate the greatest feasible reduction in the criterion function.
-
-    The largest reduction is found by looking at a range of equally spaced values of
-    ``tangent`` in the interval [0, ``bound_on_tangent``], where ``tangent`` is the
-    tangent of half the angle to the trust-region boundary.
-
-    """
-    previous_reduction = None
-    next_reduction = None
-
-    max_reduction = 0
-    index_angle_greatest_reduction = -1
-    old_reduction = 0
-    n_angles = int(17 * bound_on_tangent + 3.1)
-
-    for i in range(n_angles):
-        tangent = bound_on_tangent * (i + 1) / n_angles
-        sine = 2.0 * tangent / (1.0 + tangent**2)
-
-        new_reduction = _calc_new_reduction(
-            tangent, sine, s_hess_s, x_hess_x, x_hess_s, x_grad, s_norm
-        )
-
-        if new_reduction > max_reduction:
-            max_reduction = new_reduction
-            index_angle_greatest_reduction = i
-            previous_reduction = old_reduction
-        elif i == index_angle_greatest_reduction + 1:
-            next_reduction = new_reduction
-        old_reduction = new_reduction
-
-    return (
-        previous_reduction,
-        next_reduction,
-        max_reduction,
-        tangent,
-        index_angle_greatest_reduction,
-        n_angles,
-    )
-
-
-@njit
-def _update_candidate_vectors_and_reduction_alt_step(
-    x_candidate,
-    search_direction,
-    x_bounded,
-    gradient_candidate,
-    cosine,
-    sine,
-    hess_s,
-    hessian_reduced,
-):
-    """Update candidate vectors and the associated criterion reduction.
-
-    If the angle of the alternative iteration is restricted by a bound on a free
-    variable, that variable is fixed at the bound.
-
-    """
-    gradient_candidate += (cosine - 1.0) * hessian_reduced + sine * hess_s
-    x_candidate_new = np.zeros(len(x_candidate))
-    for i in range(len(x_candidate)):
-        if x_bounded[i] == 0:
-            x_candidate_new[i] = cosine * x_candidate[i] + sine * search_direction[i]
-        else:
-            x_candidate_new[i] = x_candidate[i]
-    x_grad = x_candidate_new[x_bounded == 0] @ gradient_candidate[x_bounded == 0]
-    gradient_reduced = (
-        gradient_candidate[x_bounded == 0] @ gradient_candidate[x_bounded == 0]
-    )
-    hessian_reduced = cosine * hessian_reduced + sine * hess_s
-
-    return (
-        x_candidate_new,
-        gradient_candidate,
-        x_grad,
-        gradient_reduced,
-        hessian_reduced,
-    )
-
-
-@njit
-def _compute_new_search_direction_and_norm(
-    x_candidate, x_bounded, x_reduced, gradient_candidate, x_grad, raw_reduction
-):
-    """Compute the new search direction and its norm."""
-    raw_reduction = np.sqrt(raw_reduction)
-    search_direction = np.zeros(len(x_candidate))
-
-    search_direction[x_bounded == 0] = (
-        x_grad * x_candidate[x_bounded == 0]
-        - x_reduced * gradient_candidate[x_bounded == 0]
-    ) / raw_reduction
-    s_norm = -raw_reduction
-
-    return search_direction, s_norm
-
-
-@njit
-def _calc_new_reduction(tangent, sine, s_hess_s, x_hess_x, x_hess_s, x_grad, s_norm):
-    """Calculate the new reduction in the criterion function."""
-    raw_reduction = s_hess_s + tangent * (tangent * x_hess_x - 2.0 * x_hess_s)
-    current_reduction = sine * (tangent * x_grad - s_norm - 0.5 * sine * raw_reduction)
-
-    return current_reduction
-
-
-@njit
-def _update_tangent(
-    index_angle_greatest_reduction,
-    bound_on_tangent,
-    n_angles,
-    next_reduction,
-    previous_reduction,
-    max_reduction,
-):
-    """Update the tangent of half the angle to the trust-region boundary."""
-    raw_reduction = (next_reduction - previous_reduction) / (
-        2.0 * max_reduction - previous_reduction - next_reduction
-    )
-    tangent = (
-        bound_on_tangent
-        * ((index_angle_greatest_reduction + 1) + 0.5 * raw_reduction)
-        / n_angles
-    )
-    return tangent
diff --git a/src/estimagic/optimization/subsolvers/bntr_fast.py b/src/estimagic/optimization/subsolvers/bntr_fast.py
deleted file mode 100644
index 50f8c4ad6..000000000
--- a/src/estimagic/optimization/subsolvers/bntr_fast.py
+++ /dev/null
@@ -1,1167 +0,0 @@
-"""Auxiliary functions for the quadratic BNTR trust-region subsolver."""
-import numpy as np
-from estimagic.optimization.subsolvers._conjugate_gradient_fast import (
-    minimize_trust_cg_fast,
-)
-from estimagic.optimization.subsolvers._steihaug_toint_fast import (
-    minimize_trust_stcg_fast,
-)
-from estimagic.optimization.subsolvers._trsbox_fast import (
-    minimize_trust_trsbox_fast,
-)
-from numba import njit
-
-EPSILON = np.finfo(float).eps ** (2 / 3)
-
-
-def bntr_fast(
-    model,
-    lower_bounds,
-    upper_bounds,
-    x_candidate,
-    *,
-    conjugate_gradient_method,
-    maxiter,
-    maxiter_gradient_descent,
-    gtol_abs,
-    gtol_rel,
-    gtol_scaled,
-    gtol_abs_conjugate_gradient,
-    gtol_rel_conjugate_gradient,
-):
-    """Minimize a bounded trust-region subproblem via Newton Conjugate Gradient method.
-
-    This function serves as a wrapper around the faster, numba-implementation of the
-    original BNTR algorithm.
-
-    The BNTR (Bounded Newton Trust Rregion) algorithm uses an active-set approach
-    to solve the symmetric system of equations:
-
-        hessian @ x = - gradient
-
-    only for the inactive parameters of x that lie within the bounds. The active-set
-    estimation employed here is based on Bertsekas (:cite:`Bertsekas1982`).
-
-    In the main loop, BNTR globalizes the Newton step using a trust-region method
-    based on the predicted versus actual reduction in the criterion function.
-    The trust-region radius is increased only if the accepted step is at the
-    trust-region boundary.
-
-
-    Args:
-        model (NamedTuple): NamedTuple containing the parameters of the
-            main model, i.e.:
-            - ``linear_terms`` (np.ndarray): 1d array of shape (n,)
-            - ``square_terms`` (np.ndarray): 2d array of shape (n,n).
-        lower_bounds (np.ndarray): 1d array of shape (n,) with lower bounds
-            for the parameter vector x.
-        upper_bounds (np.ndarray): 1d array of shape (n,) with upper bounds
-            for the parameter vector x.
-        x_candidate (np.ndarray): Initial guess for the solution of the subproblem.
-        conjugate_gradient_method (str): Method for computing the conjugate gradient
-            step. Available conjugate gradient methods are:
-                - "cg"
-                - "steihaug_toint"
-                - "trsbox" (default)
-        maxiter (int): Maximum number of iterations. If reached, terminate.
-        maxiter_gradient_descent (int): Maximum number of steepest descent iterations
-            to perform when the trust-region subsolver BNTR is used.
-        gtol_abs (float): Convergence tolerance for the absolute gradient norm.
-        gtol_rel (float): Convergence tolerance for the relative gradient norm.
-        gtol_scaled (float): Convergence tolerance for the scaled gradient norm.
-        gtol_abs_conjugate_gradient (float): Convergence tolerance for the absolute
-            gradient norm in the conjugate gradient step of the trust-region
-            subproblem ("BNTR").
-        gtol_rel_conjugate_gradient (float): Convergence tolerance for the relative
-            gradient norm in the conjugate gradient step of the trust-region
-            subproblem ("BNTR").
-
-    Returns:
-        (dict): Result dictionary containing the following keys:
-            - ``x`` (np.ndarray): Solution vector of the subproblem of shape (n,)
-            - ``criterion`` (float): Minimum function value associated with the
-                solution.
-            - ``n_iterations`` (int): Number of iterations the algorithm ran before
-                termination.
-            - ``success`` (bool): Boolean indicating whether a solution has been found
-                before reaching maxiter.
-
-    """
-
-    model_gradient = model.linear_terms
-    model_hessian = model.square_terms
-    (
-        x_candidate,
-        f_candidate,
-        niter,
-        converged,
-        convergence_reason,
-    ) = _bntr_fast_jitted(
-        model_gradient=model_gradient,
-        model_hessian=model_hessian,
-        lower_bounds=lower_bounds,
-        upper_bounds=upper_bounds,
-        x_candidate=x_candidate,
-        conjugate_gradient_method=conjugate_gradient_method,
-        maxiter=maxiter,
-        maxiter_gradient_descent=maxiter_gradient_descent,
-        gtol_abs=gtol_abs,
-        gtol_rel=gtol_rel,
-        gtol_scaled=gtol_scaled,
-        gtol_abs_conjugate_gradient=gtol_abs_conjugate_gradient,
-        gtol_rel_conjugate_gradient=gtol_rel_conjugate_gradient,
-    )
-
-    result = {
-        "x": x_candidate,
-        "criterion": f_candidate,
-        "n_iterations": niter,
-        "success": converged,
-        "message": convergence_reason,
-    }
-
-    return result
-
-
-@njit
-def _bntr_fast_jitted(
-    model_gradient,
-    model_hessian,
-    lower_bounds,
-    upper_bounds,
-    x_candidate,
-    conjugate_gradient_method,
-    maxiter,
-    maxiter_gradient_descent,
-    gtol_abs,
-    gtol_rel,
-    gtol_scaled,
-    gtol_abs_conjugate_gradient,
-    gtol_rel_conjugate_gradient,
-):
-    """Minimize a bounded trust-region subproblem via Newton Conjugate Gradient method.
-
-    Thi is the faster,  numba implmementation of the original BNTR algorithm that
-    gets wrapped in minimize_bntr_fast
-
-    The BNTR (Bounded Newton Trust Rregion) algorithm uses an active-set approach
-    to solve the symmetric system of equations:
-
-        hessian @ x = - gradient
-
-    only for the inactive parameters of x that lie within the bounds. The active-set
-    estimation employed here is based on Bertsekas (:cite:`Bertsekas1982`).
-
-    In the main loop, BNTR globalizes the Newton step using a trust-region method
-    based on the predicted versus actual reduction in the criterion function.
-    The trust-region radius is increased only if the accepted step is at the
-    trust-region boundary.
-
-
-    Args:
-        model_gradient (np.ndarray): 1d array of shape (n,) of the linear terms of
-            surrogate model.
-        model_hessian (np.ndarray): 2d array of shape (n,n) of the square terms of
-            the surrogate model.
-        lower_bounds (np.ndarray): 1d array of shape (n,) with lower bounds
-            for the parameter vector x.
-        upper_bounds (np.ndarray): 1d array of shape (n,) with upper bounds
-            for the parameter vector x.
-        x_candidate (np.ndarray): Initial guess for the solution of the subproblem.
-        conjugate_gradient_method (str): Method for computing the conjugate gradient
-            step. Available conjugate gradient methods are:
-                - "cg"
-                - "steihaug_toint"
-                - "trsbox" (default)
-        maxiter (int): Maximum number of iterations. If reached, terminate.
-        maxiter_gradient_descent (int): Maximum number of steepest descent iterations
-            to perform when the trust-region subsolver BNTR is used.
-        gtol_abs (float): Convergence tolerance for the absolute gradient norm.
-        gtol_rel (float): Convergence tolerance for the relative gradient norm.
-        gtol_scaled (float): Convergence tolerance for the scaled gradient norm.
-        gtol_abs_conjugate_gradient (float): Convergence tolerance for the absolute
-            gradient norm in the conjugate gradient step of the trust-region
-            subproblem ("BNTR").
-        gtol_rel_conjugate_gradient (float): Convergence tolerance for the relative
-            gradient norm in the conjugate gradient step of the trust-region
-            subproblem ("BNTR").
-
-    Returns:
-        x (np.ndarray): Solution vector of the subproblem of shape (n,)
-        criterion (float): Minimum function value associated with the
-            solution.
-        n_iterations (int): Number of iterations the algorithm ran before
-            termination.
-        success (bool): Boolean indicating whether a solution has been found
-            before reaching maxiter.
-
-    """
-
-    (
-        x_candidate,
-        f_candidate,
-        gradient_unprojected,
-        hessian_bounds_inactive,
-        trustregion_radius,
-        active_lower_bounds,
-        active_upper_bounds,
-        active_fixed_bounds,
-        inactive_bounds,
-        converged,
-        convergence_reason,
-    ) = _take_preliminary_gradient_descent_step_and_check_for_solution(
-        model_gradient,
-        model_hessian,
-        lower_bounds,
-        upper_bounds,
-        x_candidate,
-        maxiter_gradient_descent,
-        gtol_abs,
-        gtol_rel,
-        gtol_scaled,
-    )
-
-    for niter in range(maxiter + 1):
-        if converged:
-            break
-
-        x_old = x_candidate
-        f_old = f_candidate
-        accept_step = False
-
-        while not accept_step and not converged:
-            gradient_bounds_inactive = gradient_unprojected[inactive_bounds]
-            hessian_bounds_inactive = _find_hessian_submatrix_where_bounds_inactive(
-                model_hessian, inactive_bounds
-            )
-            (
-                conjugate_gradient_step,
-                conjugate_gradient_step_inactive_bounds,
-                cg_step_norm,
-            ) = _compute_conjugate_gradient_step(
-                x_candidate,
-                gradient_bounds_inactive,
-                hessian_bounds_inactive,
-                lower_bounds,
-                upper_bounds,
-                inactive_bounds=inactive_bounds,
-                active_lower_bounds=active_lower_bounds,
-                active_upper_bounds=active_upper_bounds,
-                active_fixed_bounds=active_fixed_bounds,
-                trustregion_radius=trustregion_radius,
-                conjugate_gradient_method=conjugate_gradient_method,
-                gtol_abs_conjugate_gradient=gtol_abs_conjugate_gradient,
-                gtol_rel_conjugate_gradient=gtol_rel_conjugate_gradient,
-                default_radius=100.00,
-                min_radius=1e-10,
-                max_radius=1e10,
-            )
-
-            x_unbounded = x_candidate + conjugate_gradient_step
-            x_candidate = _apply_bounds_to_x_candidate(
-                x_unbounded, lower_bounds, upper_bounds
-            )
-
-            predicted_reduction = (
-                _compute_predicted_reduction_from_conjugate_gradient_step(
-                    conjugate_gradient_step,
-                    conjugate_gradient_step_inactive_bounds,
-                    gradient_unprojected,
-                    gradient_bounds_inactive,
-                    hessian_bounds_inactive,
-                    inactive_bounds,
-                )
-            )
-
-            f_candidate = _evaluate_model_criterion(
-                x_candidate, model_gradient, model_hessian
-            )
-            actual_reduction = f_old - f_candidate
-
-            trustregion_radius_old = trustregion_radius
-            (
-                trustregion_radius,
-                accept_step,
-            ) = _update_trustregion_radius_conjugate_gradient(
-                f_candidate,
-                predicted_reduction,
-                actual_reduction,
-                cg_step_norm,
-                trustregion_radius,
-                min_radius=1e-10,
-                max_radius=1e10,
-                eta1=1.0e-4,
-                eta2=0.25,
-                eta3=0.50,
-                eta4=0.90,
-                alpha1=0.25,
-                alpha2=0.50,
-                alpha3=1.00,
-                alpha4=2.00,
-                alpha5=4.00,
-            )
-
-            if accept_step:
-                gradient_unprojected = model_gradient + model_hessian @ x_candidate
-
-                (
-                    active_lower_bounds,
-                    active_upper_bounds,
-                    active_fixed_bounds,
-                    inactive_bounds,
-                ) = _get_information_on_active_bounds(
-                    x_candidate,
-                    gradient_unprojected,
-                    lower_bounds,
-                    upper_bounds,
-                )
-            else:
-                x_candidate = x_old
-                f_candidate = f_old
-
-                if trustregion_radius == trustregion_radius_old:
-                    converged = True
-                    break
-
-            converged, convergence_reason = _check_for_convergence(
-                x_candidate,
-                f_candidate,
-                gradient_unprojected,
-                model_gradient,
-                lower_bounds,
-                upper_bounds,
-                converged,
-                convergence_reason,
-                niter,
-                maxiter=maxiter,
-                gtol_abs=gtol_abs,
-                gtol_rel=gtol_rel,
-                gtol_scaled=gtol_scaled,
-            )
-    return x_candidate, f_candidate, niter, converged, convergence_reason
-
-
-@njit
-def _take_preliminary_gradient_descent_step_and_check_for_solution(
-    model_gradient,
-    model_hessian,
-    lower_bounds,
-    upper_bounds,
-    x_candidate,
-    maxiter_gradient_descent,
-    gtol_abs,
-    gtol_rel,
-    gtol_scaled,
-):
-    """Take a preliminary gradient descent step and check if we found a solution.
-
-    Args:
-        model_gradient (np.ndarray): 1d array of shape (n,) with linear terms of the
-            main model.
-        model_hessian (np.ndarray): 2d array of shape (n,n) with square terms of
-            the main model
-        lower_bounds (np.ndarray): 1d array of shape (n,) with lower bounds
-            for the parameter vector x.
-        upper_bounds (np.ndarray): 1d array of shape (n,) with upper bounds
-            for the parameter vector x.
-        x_candidate (np.ndarray): Initial guess for the solution of the subproblem.
-        maxiter_gradient_descent (int): Maximum number of iterations in performing
-            gradient descent step
-        gtol_abs (float): Convergence tolerance for the absolute gradient norm.
-        gtol_rel (float): Convergence tolerance for the relative gradient norm.
-        gtol_scaled (float): Convergence tolerance for the scaled gradient norm.
-
-    Returns:
-        x_candidate (np.ndarray): Candidate for solution vector of shape (n,).
-        criterion_candidate (float): Candidate value for solution criterion.
-        gradient_unprojected (np.ndarray):
-
-    """
-
-    default_radius = 100.0
-    min_radius = 1e-10
-    max_radius = 1e10
-    theta = 0.25
-    mu1 = 0.35
-    mu2 = 0.50
-    gamma1 = 0.0625
-    gamma2 = 0.5
-    gamma3 = 2.0
-    gamma4 = 5.0
-
-    converged = False
-    convergence_reason = 0
-
-    criterion_candidate = _evaluate_model_criterion(
-        x_candidate, model_gradient, model_hessian
-    )
-
-    (
-        active_lower_bounds,
-        active_upper_bounds,
-        active_fixed_bounds,
-        inactive_bounds,
-    ) = _get_information_on_active_bounds(
-        x_candidate,
-        model_gradient,
-        lower_bounds,
-        upper_bounds,
-    )
-
-    gradient_unprojected = model_gradient + model_hessian @ x_candidate
-    gradient_projected = _project_gradient_onto_feasible_set(
-        gradient_unprojected, inactive_bounds
-    )
-    converged, convergence_reason = _check_for_convergence(
-        x_candidate=x_candidate,
-        f_candidate=criterion_candidate,
-        gradient_candidate=gradient_unprojected,
-        model_gradient=model_gradient,
-        lower_bounds=lower_bounds,
-        upper_bounds=upper_bounds,
-        converged=converged,
-        reason=convergence_reason,
-        niter=None,
-        maxiter=None,
-        gtol_abs=gtol_abs,
-        gtol_rel=gtol_rel,
-        gtol_scaled=gtol_scaled,
-    )
-
-    if converged:
-        hessian_inactive = model_hessian
-        trustregion_radius = default_radius
-    else:
-        hessian_inactive = _find_hessian_submatrix_where_bounds_inactive(
-            model_hessian, inactive_bounds
-        )
-
-        (
-            x_candidate_gradient_descent,
-            f_min_gradient_descent,
-            step_size_gradient_descent,
-            trustregion_radius,
-            radius_lower_bound,
-        ) = _perform_gradient_descent_step(
-            x_candidate=x_candidate,
-            f_candidate_initial=criterion_candidate,
-            gradient_projected=gradient_projected,
-            hessian_inactive=hessian_inactive,
-            model_gradient=model_gradient,
-            model_hessian=model_hessian,
-            lower_bounds=lower_bounds,
-            upper_bounds=upper_bounds,
-            inactive_bounds=inactive_bounds,
-            maxiter_steepest_descent=maxiter_gradient_descent,
-            default_radius=default_radius,
-            theta=theta,
-            mu1=mu1,
-            mu2=mu2,
-            gamma1=gamma1,
-            gamma2=gamma2,
-            gamma3=gamma3,
-            gamma4=gamma4,
-        )
-
-        if f_min_gradient_descent < criterion_candidate:
-            criterion_candidate = f_min_gradient_descent
-
-            x_unbounded = (
-                x_candidate_gradient_descent
-                - step_size_gradient_descent * gradient_projected
-            )
-            x_candidate = _apply_bounds_to_x_candidate(
-                x_unbounded, lower_bounds, upper_bounds
-            )
-
-            gradient_unprojected = model_gradient + model_hessian @ x_candidate
-            (
-                active_lower_bounds,
-                active_upper_bounds,
-                active_fixed_bounds,
-                inactive_bounds,
-            ) = _get_information_on_active_bounds(
-                x_candidate,
-                gradient_unprojected,
-                lower_bounds,
-                upper_bounds,
-            )
-
-            gradient_projected = _project_gradient_onto_feasible_set(
-                gradient_unprojected, inactive_bounds
-            )
-            hessian_inactive = _find_hessian_submatrix_where_bounds_inactive(
-                model_hessian, inactive_bounds
-            )
-
-            converged, convergence_reason = _check_for_convergence(
-                x_candidate=x_candidate,
-                f_candidate=criterion_candidate,
-                gradient_candidate=gradient_projected,
-                model_gradient=model_gradient,
-                lower_bounds=lower_bounds,
-                upper_bounds=upper_bounds,
-                converged=converged,
-                reason=convergence_reason,
-                niter=None,
-                maxiter=None,
-                gtol_abs=gtol_abs,
-                gtol_rel=gtol_rel,
-                gtol_scaled=gtol_scaled,
-            )
-
-        if not converged:
-            trustregion_radius = min(
-                max(min_radius, max(trustregion_radius, radius_lower_bound)), max_radius
-            )
-
-    return (
-        x_candidate,
-        criterion_candidate,
-        gradient_unprojected,
-        hessian_inactive,
-        trustregion_radius,
-        active_lower_bounds,
-        active_upper_bounds,
-        active_fixed_bounds,
-        inactive_bounds,
-        converged,
-        convergence_reason,
-    )
-
-
-@njit
-def _compute_conjugate_gradient_step(
-    x_candidate,
-    gradient_inactive,
-    hessian_inactive,
-    lower_bounds,
-    upper_bounds,
-    inactive_bounds,
-    active_lower_bounds,
-    active_upper_bounds,
-    active_fixed_bounds,
-    trustregion_radius,
-    conjugate_gradient_method,
-    gtol_abs_conjugate_gradient,
-    gtol_rel_conjugate_gradient,
-    default_radius,
-    min_radius,
-    max_radius,
-):
-    """Compute the bounded Conjugate Gradient trust-region step.
-    Args:
-        x_candidate (np.ndarray): Candidate solution vector of parameters of len n.
-        gradient_inactive (np.ndarray): Model gradient where parameter bounds are
-            inactive. The length depends on the number of inactive bounds.
-        hessian_inactive (np.ndarray): Model hessian where parameter bounds are
-            inactive. The shape depends on the number of inactive bounds.
-        lower_bounds (np.ndarray): 1d array of parameter lower bounds, of length n.
-        upper_bounds (np.ndarray): 1d array of parameter upper bounds, of length n.
-        inactive_bounds (np.ndarray): 1d array of indices where parameter bounds are
-            inactive.
-        active_lower_bounds (np.ndarray): 1d array of indices where lower bounds of
-            parameters are inactive.
-        active_upper_bounds (np.ndarray): 1d array of indices where upper bounds of
-            parameters are inactive.
-        trustregion_radius (float): Radius of the trust region.
-        conjugate_gradient_method (str): The method used in the trust region
-            minimization problem.
-        gtol_abs_conjugate_gradient (float): Convergence tolerance for the absolute
-            gradient norm.
-        gtol_rel_conjugate_gradient (float): Convergence tolerance for the realtive
-            gradient norm.
-        default_radius (float): Default trust-region radius.
-        min_radius (float): Lower bound on the trust-region radius.
-        max_radius (float): Upper bound on the trust-region radius.
-
-    Returns:
-        conjugate_gradient_step (np.ndarray): Conjugate gradient step,of lenght n, with
-            bounds applied to it.
-        step_inactive (np.ndarray): Conjugate gradient step,of length n, without bounds
-            applied to it
-        step_norm (float): Norm of the conjugate gradient step.
-    """
-    conjugate_gradient_step = np.zeros(len(x_candidate))
-
-    if not inactive_bounds.any():
-        # Save some computation and return an adjusted zero step
-        step_inactive = _apply_bounds_to_x_candidate(
-            x_candidate, lower_bounds, upper_bounds
-        )
-        step_norm = np.linalg.norm(step_inactive)
-
-        conjugate_gradient_step = _apply_bounds_to_conjugate_gradient_step(
-            step_inactive,
-            x_candidate,
-            lower_bounds,
-            upper_bounds,
-            inactive_bounds,
-            active_lower_bounds,
-            active_upper_bounds,
-            active_fixed_bounds,
-        )
-
-    else:
-        if conjugate_gradient_method == "cg":
-            step_inactive = minimize_trust_cg_fast(
-                gradient_inactive,
-                hessian_inactive,
-                trustregion_radius,
-                gtol_abs=gtol_abs_conjugate_gradient,
-                gtol_rel=gtol_rel_conjugate_gradient,
-            )
-            step_norm = np.linalg.norm(step_inactive)
-        elif conjugate_gradient_method == "steihaug_toint":
-            step_inactive = minimize_trust_stcg_fast(
-                gradient_inactive,
-                hessian_inactive,
-                trustregion_radius,
-            )
-            step_norm = np.linalg.norm(step_inactive)
-        elif conjugate_gradient_method == "trsbox":
-            step_inactive = minimize_trust_trsbox_fast(
-                gradient_inactive,
-                hessian_inactive,
-                trustregion_radius,
-                lower_bounds=lower_bounds[inactive_bounds],
-                upper_bounds=upper_bounds[inactive_bounds],
-            )
-            step_norm = np.linalg.norm(step_inactive)
-        else:
-            raise ValueError(
-                "Invalid method: {conjugate_gradient_method}. "
-                "Must be one of cg, steihaug_toint, trsbox."
-            )
-
-        if trustregion_radius == 0:
-            if step_norm > 0:
-                # Accept
-                trustregion_radius = min(max(min_radius, step_norm), max_radius)
-            else:
-                # Re-solve
-                trustregion_radius = min(max(default_radius, min_radius), max_radius)
-                if conjugate_gradient_method == "cg":
-                    step_inactive = minimize_trust_cg_fast(
-                        gradient_inactive,
-                        hessian_inactive,
-                        trustregion_radius,
-                        gtol_abs=gtol_abs_conjugate_gradient,
-                        gtol_rel=gtol_rel_conjugate_gradient,
-                    )
-                    step_norm = np.linalg.norm(step_inactive)
-                elif conjugate_gradient_method == "steihaug_toint":
-                    step_inactive = minimize_trust_stcg_fast(
-                        gradient_inactive,
-                        hessian_inactive,
-                        trustregion_radius,
-                    )
-                    step_norm = np.linalg.norm(step_inactive)
-                elif conjugate_gradient_method == "trsbox":
-                    step_inactive = minimize_trust_trsbox_fast(
-                        gradient_inactive,
-                        hessian_inactive,
-                        trustregion_radius,
-                        lower_bounds=lower_bounds[inactive_bounds],
-                        upper_bounds=upper_bounds[inactive_bounds],
-                    )
-                step_norm = np.linalg.norm(step_inactive)
-
-                if step_norm == 0:
-                    raise ValueError("Initial direction is zero.")
-
-        conjugate_gradient_step = _apply_bounds_to_conjugate_gradient_step(
-            step_inactive,
-            x_candidate,
-            lower_bounds,
-            upper_bounds,
-            inactive_bounds,
-            active_lower_bounds,
-            active_upper_bounds,
-            active_fixed_bounds,
-        )
-
-    return (
-        conjugate_gradient_step,
-        step_inactive,
-        step_norm,
-    )
-
-
-@njit
-def _compute_predicted_reduction_from_conjugate_gradient_step(
-    conjugate_gradient_step,
-    conjugate_gradient_step_inactive,
-    gradient_unprojected,
-    gradient_inactive,
-    hessian_inactive,
-    inactive_bounds,
-):
-    """Compute predicted reduction induced by the Conjugate Gradient step.
-
-    Args:
-        conjugate_gradient_step (np.ndarray): Conjugate gradient step,of lenght n, with
-            bounds applied to it.
-        conjugate_gradient_step_inactive (np.ndarray): Conjugate gradient step,of
-            length n, without bounds applied to it.
-        gradient_unprojected (np.ndarray): Model gradient of len n.
-        gradient_inactive (np.ndarray): Model gradient on indices where parameter
-            bounds are inactive.
-        hessian_inactive (np.ndarray): Model hessian on indices where parameter bounds
-            are inactive.
-        inactive_bounds (np.ndarray): 1d array of indices where parameter bounds
-            are inactive.
-    Returns:
-        predicted_reduction (float): Predicted reduction in criterion function.
-
-    """
-    active_bounds = ~inactive_bounds
-    if active_bounds.any():
-        # Projection changed the step, so we have to recompute the step
-        # and the predicted reduction. Leave the rust radius unchanged.
-        cg_step_recomp = conjugate_gradient_step[inactive_bounds]
-        gradient_inactive_recomp = gradient_unprojected[inactive_bounds]
-
-        predicted_reduction = _evaluate_model_criterion(
-            cg_step_recomp, gradient_inactive_recomp, hessian_inactive
-        )
-    else:
-        # Step did not change, so we can just recover the
-        # pre-computed prediction
-        predicted_reduction = _evaluate_model_criterion(
-            conjugate_gradient_step_inactive,
-            gradient_inactive,
-            hessian_inactive,
-        )
-    predicted_reduction = -predicted_reduction
-
-    return predicted_reduction
-
-
-@njit
-def _perform_gradient_descent_step(
-    x_candidate,
-    f_candidate_initial,
-    gradient_projected,
-    hessian_inactive,
-    model_gradient,
-    model_hessian,
-    lower_bounds,
-    upper_bounds,
-    inactive_bounds,
-    maxiter_steepest_descent,
-    default_radius,
-    theta,
-    mu1,
-    mu2,
-    gamma1,
-    gamma2,
-    gamma3,
-    gamma4,
-):
-    """Perform gradient descent step and update trust-region radius."""
-    f_min = f_candidate_initial
-    gradient_norm = np.linalg.norm(gradient_projected)
-
-    trustregion_radius = default_radius
-    radius_lower_bound = 0
-    step_size_accepted = 0
-
-    for _ in range(maxiter_steepest_descent):
-        x_old = x_candidate
-
-        step_size_candidate = trustregion_radius / gradient_norm
-        x_candidate = x_old - step_size_candidate * gradient_projected
-
-        x_candidate = _apply_bounds_to_x_candidate(
-            x_candidate, lower_bounds, upper_bounds
-        )
-        f_candidate = _evaluate_model_criterion(
-            x_candidate, model_gradient, model_hessian
-        )
-
-        x_diff = x_candidate - x_old
-
-        if f_candidate < f_min:
-            f_min = f_candidate
-            step_size_accepted = step_size_candidate
-
-        x_inactive = x_diff[inactive_bounds]
-        square_terms = x_inactive.T @ hessian_inactive @ x_inactive
-
-        predicted_reduction = trustregion_radius * (
-            gradient_norm
-            - 0.5 * trustregion_radius * square_terms / (gradient_norm**2)
-        )
-        actual_reduction = f_candidate_initial - f_candidate
-
-        (
-            trustregion_radius,
-            radius_lower_bound,
-        ) = _update_trustregion_radius_and_gradient_descent(
-            trustregion_radius,
-            radius_lower_bound,
-            predicted_reduction,
-            actual_reduction,
-            gradient_norm,
-            theta,
-            mu1,
-            mu2,
-            gamma1,
-            gamma2,
-            gamma3,
-            gamma4,
-        )
-
-    return (
-        x_candidate,
-        f_min,
-        step_size_accepted,
-        trustregion_radius,
-        radius_lower_bound,
-    )
-
-
-@njit
-def _update_trustregion_radius_conjugate_gradient(
-    f_candidate,
-    predicted_reduction,
-    actual_reduction,
-    x_norm_cg,
-    trustregion_radius,
-    min_radius,
-    max_radius,
-    alpha1,
-    alpha2,
-    alpha3,
-    alpha4,
-    alpha5,
-    eta1,
-    eta2,
-    eta3,
-    eta4,
-):
-    """Update the trust-region radius based on predicted and actual reduction."""
-    accept_step = False
-
-    if predicted_reduction < 0 or ~np.isfinite(predicted_reduction):
-        # Reject and start over
-        trustregion_radius = alpha1 * min(trustregion_radius, x_norm_cg)
-
-    else:
-        if ~np.isfinite(actual_reduction):
-            trustregion_radius = alpha1 * min(trustregion_radius, x_norm_cg)
-        else:
-            if abs(actual_reduction) <= max(1, abs(f_candidate) * EPSILON) and abs(
-                predicted_reduction
-            ) <= max(1, abs(f_candidate) * EPSILON):
-                kappa = 1
-            else:
-                kappa = actual_reduction / predicted_reduction
-
-            if kappa < eta1:
-                # Reject the step
-                trustregion_radius = alpha1 * min(trustregion_radius, x_norm_cg)
-            else:
-                accept_step = True
-
-                # Update the trust-region radius only if the computed step is at the
-                # trust-radius boundary
-                if x_norm_cg == trustregion_radius:
-                    if kappa < eta2:
-                        # Marginal bad step
-                        trustregion_radius = alpha2 * trustregion_radius
-                    elif kappa < eta3:
-                        # Reasonable step
-                        trustregion_radius = alpha3 * trustregion_radius
-                    elif kappa < eta4:
-                        trustregion_radius = alpha4 * trustregion_radius
-                    else:
-                        # Very good step
-                        trustregion_radius = alpha5 * trustregion_radius
-
-    trustregion_radius = min(max(trustregion_radius, min_radius), max_radius)
-    return trustregion_radius, accept_step
-
-
-@njit
-def _get_information_on_active_bounds(
-    x,
-    gradient_unprojected,
-    lower_bounds,
-    upper_bounds,
-):
-    """Return boolean arrays indicating whether bounds at indices are active or not."""
-    active_upper = np.zeros(len(x)).astype("bool")
-    active_lower = np.zeros(len(x)).astype("bool")
-    active_fixed = np.zeros(len(x)).astype("bool")
-    inactive = np.ones(len(x)).astype("bool")
-    for i in range(len(x)):
-        if (x[i] <= lower_bounds[i]) & (gradient_unprojected[i] > 0):
-            active_lower[i] = True
-            inactive[i] = False
-        elif (x[i] >= upper_bounds[i]) & (gradient_unprojected[i] < 0):
-            active_upper[i] = True
-            inactive[i] = False
-        elif lower_bounds[i] == upper_bounds[i]:
-            active_fixed[i] = True
-            inactive[i] = False
-    return active_lower, active_upper, active_fixed, inactive
-
-
-@njit
-def _find_hessian_submatrix_where_bounds_inactive(initial_hessian, inactive_bounds):
-    """Find the submatrix of the initial hessian where bounds are inactive."""
-    hessian_inactive = initial_hessian[:, inactive_bounds][inactive_bounds, :]
-    return hessian_inactive
-
-
-@njit
-def _check_for_convergence(
-    x_candidate,
-    f_candidate,
-    gradient_candidate,
-    model_gradient,
-    lower_bounds,
-    upper_bounds,
-    converged,
-    reason,
-    niter,
-    maxiter,
-    gtol_abs,
-    gtol_rel,
-    gtol_scaled,
-):
-    """Check if we have found a solution."""
-    direction_fischer_burmeister = _get_fischer_burmeister_direction_vector(
-        x_candidate, gradient_candidate, lower_bounds, upper_bounds
-    )
-    gradient_norm = np.linalg.norm(direction_fischer_burmeister)
-    gradient_norm_initial = np.linalg.norm(model_gradient)
-
-    if gradient_norm < gtol_abs:
-        converged = True
-        reason = 1
-    elif f_candidate != 0 and abs(gradient_norm / f_candidate) < gtol_rel:
-        converged = True
-        reason = 2
-    elif (
-        gradient_norm_initial != 0
-        and gradient_norm / gradient_norm_initial < gtol_scaled
-    ):
-        converged = True
-        reason = 3
-    elif gradient_norm_initial != 0 and gradient_norm == 0 and gtol_scaled == 0:
-        converged = True
-        reason = 4
-    elif f_candidate <= -np.inf:
-        converged = True
-        reason = 5
-    elif niter is not None and niter == maxiter:
-        reason = 6
-
-    return converged, reason
-
-
-@njit
-def _apply_bounds_to_x_candidate(x, lower_bounds, upper_bounds, bound_tol=0):
-    """Apply upper and lower bounds to the candidate vector."""
-    for i in range(len(x)):
-        if x[i] <= lower_bounds[i] + bound_tol:
-            x[i] = lower_bounds[i]
-        elif x[i] >= upper_bounds[i] - bound_tol:
-            x[i] = upper_bounds[i]
-    return x
-
-
-@njit
-def _project_gradient_onto_feasible_set(gradient_unprojected, inactive_bounds):
-    """Project gradient onto feasible set, where search directions unconstrained."""
-    gradient_projected = np.zeros(len(gradient_unprojected))
-    gradient_projected[inactive_bounds] = gradient_unprojected[inactive_bounds]
-
-    return gradient_projected
-
-
-@njit
-def _apply_bounds_to_conjugate_gradient_step(
-    step_inactive,
-    x_candidate,
-    lower_bounds,
-    upper_bounds,
-    inactive_bounds,
-    active_lower_bounds,
-    active_upper_bounds,
-    active_fixed_bounds,
-):
-    """Apply lower and upper bounds to the Conjugate Gradient step."""
-    cg_step = np.zeros(len(x_candidate))
-    cg_step[inactive_bounds] = step_inactive
-
-    if active_lower_bounds.any():
-        x_active_lower = x_candidate[active_lower_bounds]
-        lower_bound_active = lower_bounds[active_lower_bounds]
-
-        cg_step[active_lower_bounds] = lower_bound_active - x_active_lower
-
-    if active_upper_bounds.any():
-        x_active_upper = x_candidate[active_upper_bounds]
-        upper_bound_active = upper_bounds[active_upper_bounds]
-
-        cg_step[active_upper_bounds] = upper_bound_active - x_active_upper
-
-    if active_fixed_bounds.any():
-        cg_step[active_fixed_bounds] = 0
-
-    return cg_step
-
-
-@njit
-def _update_trustregion_radius_and_gradient_descent(
-    trustregion_radius,
-    radius_lower_bound,
-    predicted_reduction,
-    actual_reduction,
-    gradient_norm,
-    theta,
-    mu1,
-    mu2,
-    gamma1,
-    gamma2,
-    gamma3,
-    gamma4,
-):
-    """Update the trust-region radius and its upper bound."""
-    if np.abs(actual_reduction) <= EPSILON and np.abs(predicted_reduction) <= EPSILON:
-        kappa = 1
-    else:
-        kappa = actual_reduction / predicted_reduction
-
-    tau_1 = (
-        theta
-        * gradient_norm
-        * trustregion_radius
-        / (
-            theta * gradient_norm * trustregion_radius
-            + (1 - theta) * predicted_reduction
-            - actual_reduction
-        )
-    )
-    tau_2 = (
-        theta
-        * gradient_norm
-        * trustregion_radius
-        / (
-            theta * gradient_norm * trustregion_radius
-            - (1 + theta) * predicted_reduction
-            + actual_reduction
-        )
-    )
-
-    tau_min = min(tau_1, tau_2)
-    tau_max = max(tau_1, tau_2)
-
-    if np.abs(kappa - 1) <= mu1:
-        # Great agreement
-        radius_lower_bound = max(radius_lower_bound, trustregion_radius)
-
-        if tau_max < 1:
-            tau = gamma3
-        elif tau_max > gamma4:
-            tau = gamma4
-        else:
-            tau = tau_max
-
-    elif np.abs(kappa - 1) <= mu2:
-        # Good agreement
-        radius_lower_bound = max(radius_lower_bound, trustregion_radius)
-
-        if tau_max < gamma2:
-            tau = gamma2
-        elif tau_max > gamma3:
-            tau = gamma3
-        else:
-            tau = tau_max
-
-    else:
-        # Not good agreement
-        if tau_min > 1:
-            tau = gamma2
-        elif tau_max < gamma1:
-            tau = gamma1
-        elif (tau_min < gamma1) and (tau_max >= 1):
-            tau = gamma1
-        elif (
-            (tau_1 >= gamma1) and (tau_1 < 1.0) and ((tau_2 < gamma1) or (tau_2 >= 1.0))
-        ):
-            tau = tau_1
-        elif (
-            (tau_2 >= gamma1) and (tau_2 < 1.0) and ((tau_1 < gamma1) or (tau_2 >= 1.0))
-        ):
-            tau = tau_2
-        else:
-            tau = tau_max
-
-    trustregion_radius = trustregion_radius * tau
-
-    return trustregion_radius, radius_lower_bound
-
-
-@njit
-def _get_fischer_burmeister_direction_vector(x, gradient, lower_bounds, upper_bounds):
-    """Compute the constrained direction vector via the Fischer-Burmeister function."""
-    direction = np.zeros(len(x))
-    for i, (x_, g_, l_, u_) in enumerate(zip(x, gradient, lower_bounds, upper_bounds)):
-        fischer_scalar = _get_fischer_burmeister_scalar(u_ - x_, -g_)
-        fischer_scalar = _get_fischer_burmeister_scalar(fischer_scalar, x_ - l_)
-
-        if l_ == u_:
-            direction[i] = l_ - x_
-        else:
-            direction[i] = fischer_scalar
-    return direction
-
-
-@njit
-def _get_fischer_burmeister_scalar(a, b):
-    """Get the value of the Fischer-Burmeister function for two scalar inputs.
-
-    This method was suggested by Bob Vanderbei. Since the Fischer-Burmeister
-    is symmetric, the order of the scalar inputs does not matter.
-
-    Args:
-        a (float): First input.
-        b (float): Second input.
-
-    Returns:
-        float: Value of the Fischer-Burmeister function for inputs a and b.
-
-    """
-    if a + b <= 0:
-        fischer_burmeister = np.sqrt(a**2 + b**2) - (a + b)
-    else:
-        fischer_burmeister = -2 * a * b / (np.sqrt(a**2 + b**2) + (a + b))
-
-    return fischer_burmeister
-
-
-@njit
-def _evaluate_model_criterion(
-    x,
-    gradient,
-    hessian,
-):
-    """Evaluate the criterion function value of the main model.
-
-    Args:
-        x (np.ndarray): Parameter vector of shape (n,).
-        gradient (np.ndarray): Gradient of shape (n,) for which the main model
-            shall be evaluated.
-        hessian (np.ndarray): Hessian of shape (n, n) for which the main model
-            shall be evaulated.
-
-    Returns:
-        float: Criterion value of the main model.
-
-    """
-    return gradient.T @ x + 0.5 * x.T @ hessian @ x
diff --git a/src/estimagic/optimization/subsolvers/gqtpar_fast.py b/src/estimagic/optimization/subsolvers/gqtpar_fast.py
deleted file mode 100644
index 26e4a8da0..000000000
--- a/src/estimagic/optimization/subsolvers/gqtpar_fast.py
+++ /dev/null
@@ -1,668 +0,0 @@
-"""Auxiliary functions for the quadratic GQTPAR trust-region subsolver."""
-import numpy as np
-from numba import njit
-from scipy.linalg import cho_solve, solve_triangular
-from scipy.linalg.lapack import dpotrf as compute_cholesky_factorization
-
-
-def gqtpar_fast(model, x_candidate, *, k_easy=0.1, k_hard=0.2, maxiter=200):
-    """Solve the quadratic trust-region subproblem via nearly exact iterative method.
-
-    This subproblem solver is mainly based on Conn et al. (2000) "Trust region methods"
-    (:cite:`Conn2000`), pp. 169-200.
-
-    But ideas from Nocedal and Wright (2006) "Numerical optimization"
-    (:cite:`Nocedal2006`), pp. 83-91, who implement a similar algorithm,
-    were also used.
-
-    The original algorithm was developed by More and Sorensen (1983) (:cite:`More1983`)
-    and is known as "GQTPAR".
-
-    The vector x* is a global solution to the quadratic subproblem:
-
-        min_x f + g @ x + 0.5 * x.T @ H @ x,
-
-        if and only if ||x|| <= trustregion_radius
-        and if there is a scalar lambda >= 0, such that:
-
-    1) (H + lambda * I(n)) x* = -g
-    2) lambda (trustregion_radius - ||x*||) = 0
-    3) H + lambda * I is positive definite
-
-    where g denotes the gradient and H the hessian of the quadratic model,
-    respectively.
-
-    k_easy and k_hard are stopping criteria for the iterative subproblem solver.
-    See pp. 194-197 in :cite:`Conn2000` for a more detailed description.
-
-    Args:
-        model (NamedTuple): NamedTuple containing the parameters of the main model, i.e.
-            - ``linear_terms``, a np.ndarray of shape (n,) and
-            - ``square_terms``, a np.ndarray of shape (n,n).
-        x_candidate (np.ndarray): Initial guess for the solution of the subproblem.
-        k_easy (float): topping criterion for the "easy" case.
-        k_hard (float): Stopping criterion for the "hard" case.
-        maxiter (int): Maximum number of iterations to perform. If reached,
-            terminate.
-
-    Returns:
-        (dict): Result dictionary containing the following keys:
-            - ``x`` (np.ndarray): Solution vector of the subproblem of shape (n,)
-            - ``criterion`` (float): Minimum function value associated with the
-                solution.
-
-    """
-    hessian_already_factorized = False
-    model_gradient = model.linear_terms
-    model_hessian = model.square_terms
-
-    # Small floating point number signaling that for vectors smaller
-    # than that backward substituition is not reliable.
-    # See Golub, G. H., Van Loan, C. F. (2013), "Matrix computations", p.165.
-    zero_threshold = (
-        model_hessian.shape[0] * np.finfo(float).eps * _norm(model_hessian, np.Inf)
-    )
-    stopping_criteria = {
-        "k_easy": k_easy,
-        "k_hard": k_hard,
-    }
-
-    gradient_norm = _norm(model_gradient, -1)
-    (
-        lambda_candidate,
-        lambda_lower_bound,
-        lambda_upper_bound,
-    ) = _get_initial_guess_for_lambdas(model_gradient, model_hessian)
-
-    converged = False
-
-    for _niter in range(maxiter):
-        if hessian_already_factorized:
-            hessian_already_factorized = False
-        else:
-            (
-                hessian_plus_lambda,
-                hessian_upper_triangular,
-                factorization_info,
-            ) = _add_lambda_and_factorize_hessian(model_hessian, lambda_candidate)
-
-        if factorization_info == 0 and gradient_norm > zero_threshold:
-            (
-                x_candidate,
-                hessian_plus_lambda,
-                hessian_already_factorized,
-                lambda_candidate,
-                lambda_lower_bound,
-                lambda_upper_bound,
-                converged,
-            ) = _find_new_candidate_and_update_parameters(
-                model_gradient,
-                model_hessian,
-                hessian_upper_triangular,
-                hessian_plus_lambda,
-                hessian_already_factorized,
-                lambda_candidate,
-                lambda_lower_bound,
-                lambda_upper_bound,
-                stopping_criteria,
-                converged,
-            )
-
-        elif factorization_info == 0 and gradient_norm <= zero_threshold:
-            (
-                x_candidate,
-                lambda_candidate,
-                lambda_lower_bound,
-                lambda_upper_bound,
-                converged,
-            ) = _check_for_interior_convergence_and_update(
-                x_candidate,
-                hessian_upper_triangular,
-                lambda_candidate,
-                lambda_lower_bound,
-                lambda_upper_bound,
-                stopping_criteria,
-                converged,
-            )
-
-        else:
-            (
-                lambda_candidate,
-                lambda_lower_bound,
-            ) = _update_lambdas_when_factorization_unsuccessful(
-                hessian_upper_triangular,
-                hessian_plus_lambda,
-                lambda_candidate,
-                lambda_lower_bound,
-                lambda_upper_bound,
-                factorization_info,
-            )
-
-        if converged:
-            break
-
-    f_min = (
-        model_gradient.T @ x_candidate
-        + 0.5 * x_candidate.T @ model_hessian @ x_candidate
-    )
-    result = {
-        "x": x_candidate,
-        "criterion": f_min,
-        "n_iterations": _niter,
-        "success": converged,
-    }
-
-    return result
-
-
-@njit
-def _get_initial_guess_for_lambdas(model_gradient, model_hessian):
-    """Return good initial guesses for lambda, its lower and upper bound.
-
-    The values are chosen accordingly to the guidelines on
-    section 7.3.8 (p. 192) from :cite:`Conn2000`.
-
-    Args:
-        model_gradient (np.ndarray): 1d array, of len n, of linear terms of the
-            surrogate model.
-        model_hessian (np.ndarray): 2d array, of shape (n,n), of square terms of the
-            surrogate model.
-
-    Returns:
-        lambda_candidate (float): initial guess for damping factor
-        lambda_lower_bound (float): initial guess for the lower bound of the damping
-            factor.
-        lambda_upper_bound(float): initial guess for the upper bound of the damping
-            factor.
-
-    """
-    gradient_norm = _norm(model_gradient, -1.0)
-    model_hessian = model_hessian
-
-    hessian_infinity_norm = _norm(model_hessian, np.Inf)
-    hessian_frobenius_norm = _norm(model_hessian, -1.0)
-
-    hessian_gershgorin_lower, hessian_gershgorin_upper = _compute_gershgorin_bounds(
-        model_hessian
-    )
-
-    lambda_lower_bound = max(
-        0,
-        -min(np.diag(model_hessian)),
-        gradient_norm
-        - min(hessian_gershgorin_upper, hessian_frobenius_norm, hessian_infinity_norm),
-    )
-    lambda_upper_bound = max(
-        0,
-        gradient_norm
-        + min(-hessian_gershgorin_lower, hessian_frobenius_norm, hessian_infinity_norm),
-    )
-
-    if lambda_lower_bound == 0:
-        lambda_candidate = 0
-    else:
-        lambda_candidate = _get_new_lambda_candidate(
-            lower_bound=lambda_lower_bound, upper_bound=lambda_upper_bound
-        )
-
-    return lambda_candidate, lambda_lower_bound, lambda_upper_bound
-
-
-def _add_lambda_and_factorize_hessian(model_hessian, lambda_candidate):
-    """Add lambda to hessian and factorize it into its upper triangular matrix.
-
-    Args:
-        model_hessian (np.ndarray): 2d array, of shape (n,n), of square terms of the
-            surrogate model.
-        lambda_candidate (float): dampig factor.
-    Returns:
-        hessian_plus_lambda (np.ndarray):  The square terms of the main model
-                plus the identity matrix times lambda. 2d array of shape (n, n).
-        hessian_upper_triangular (np.ndarray): Factorization of the hessian from the
-                main model into its upper triangular matrix. The diagonal is filled
-                and the lower lower triangular contains zeros.
-        factorization_info (int): success flag returned by scipy.dpotrf
-
-    """
-    hessian_plus_lambda = model_hessian + lambda_candidate * _identity(
-        model_hessian.shape[0]
-    )
-    hessian_upper_triangular, factorization_info = compute_cholesky_factorization(
-        hessian_plus_lambda,
-        lower=False,
-        overwrite_a=False,
-        clean=True,
-    )
-
-    return hessian_plus_lambda, hessian_upper_triangular, factorization_info
-
-
-def _find_new_candidate_and_update_parameters(
-    model_gradient,
-    model_hessian,
-    hessian_upper_triangular,
-    hessian_plus_lambda,
-    hessian_already_factorized,
-    lambda_candidate,
-    lambda_lower_bound,
-    lambda_upper_bound,
-    stopping_criteria,
-    converged,
-):
-    """Find new candidate vector and update transformed hessian and lambdas."""
-    x_candidate = cho_solve(
-        (hessian_upper_triangular, False),
-        -model_gradient,
-    )
-    x_norm = _norm(x_candidate, -1.0)
-
-    if x_norm <= 1 and lambda_candidate == 0:
-        converged = True
-
-    w = solve_triangular(
-        hessian_upper_triangular,
-        x_candidate,
-        trans="T",
-    )
-    w_norm = _norm(w, -1.0)
-
-    newton_step = _compute_newton_step(lambda_candidate, x_norm, w_norm)
-
-    if x_norm < 1:
-        (
-            x_candidate,
-            hessian_plus_lambda,
-            hessian_already_factorized,
-            lambda_new_candidate,
-            lambda_new_lower_bound,
-            lambda_new_upper_bound,
-            converged,
-        ) = _update_candidate_and_parameters_when_candidate_within_trustregion(
-            x_candidate,
-            model_hessian,
-            hessian_upper_triangular,
-            hessian_plus_lambda,
-            hessian_already_factorized,
-            lambda_candidate,
-            lambda_lower_bound,
-            newton_step,
-            stopping_criteria,
-            converged,
-        )
-
-    else:
-        if abs(x_norm - 1) <= stopping_criteria["k_easy"]:
-            converged = True
-        lambda_new_candidate = newton_step
-        lambda_new_lower_bound = lambda_candidate
-        lambda_new_upper_bound = lambda_upper_bound
-    return (
-        x_candidate,
-        hessian_plus_lambda,
-        hessian_already_factorized,
-        lambda_new_candidate,
-        lambda_new_lower_bound,
-        lambda_new_upper_bound,
-        converged,
-    )
-
-
-def _check_for_interior_convergence_and_update(
-    x_candidate,
-    hessian_upper_triangular,
-    lambda_candidate,
-    lambda_lower_bound,
-    lambda_upper_bound,
-    stopping_criteria,
-    converged,
-):
-    """Check for interior convergence, update candidate vector and lambdas."""
-    if lambda_candidate == 0:
-        x_candidate = np.zeros(len(x_candidate))
-        converged = True
-
-    s_min, z_min = _estimate_smallest_singular_value(hessian_upper_triangular)
-    step_len = 2
-
-    if step_len**2 * s_min**2 <= stopping_criteria["k_hard"] * lambda_candidate:
-        x_candidate = step_len * z_min
-        converged = True
-
-    lambda_lower_bound = max(lambda_lower_bound, lambda_upper_bound - s_min**2)
-    lambda_new_candidate = _get_new_lambda_candidate(
-        lower_bound=lambda_lower_bound, upper_bound=lambda_candidate
-    )
-    return (
-        x_candidate,
-        lambda_new_candidate,
-        lambda_lower_bound,
-        lambda_candidate,
-        converged,
-    )
-
-
-def _update_lambdas_when_factorization_unsuccessful(
-    hessian_upper_triangular,
-    hessian_plus_lambda,
-    lambda_candidate,
-    lambda_lower_bound,
-    lambda_upper_bound,
-    factorization_info,
-):
-    """Update lambdas in the case that factorization of hessian not successful."""
-    delta, v = _compute_terms_to_make_leading_submatrix_singular(
-        hessian_upper_triangular,
-        hessian_plus_lambda,
-        factorization_info,
-    )
-    v_norm = _norm(v, -1.0)
-
-    lambda_lower_bound = max(lambda_lower_bound, lambda_candidate + delta / v_norm**2)
-    lambda_new_candidate = _get_new_lambda_candidate(
-        lower_bound=lambda_lower_bound, upper_bound=lambda_upper_bound
-    )
-    return lambda_new_candidate, lambda_lower_bound
-
-
-@njit
-def _get_new_lambda_candidate(lower_bound, upper_bound):
-    """Update current lambda so that it lies within its bounds.
-
-    Args:
-        lower_boud (float): lower bound of the current candidate dumping factor.
-        upper_bound(float): upper bound of the current candidate dumping factor.
-
-    Returns:
-        float: New candidate for the damping factor lambda.
-
-    """
-    lambda_new_candidate = max(
-        np.sqrt(max(0, lower_bound * upper_bound)),
-        lower_bound + 0.01 * (upper_bound - lower_bound),
-    )
-
-    return lambda_new_candidate
-
-
-@njit
-def _compute_gershgorin_bounds(model_hessian):
-    """Compute upper and lower Gregoshgorin bounds for a square matrix.
-
-    The Gregoshgorin bounds are the upper and lower bounds for the
-    eigenvalues of the square hessian matrix (i.e. the square terms of
-    the main model). See :cite:`Conn2000`.
-
-    Args:
-        model_hessian (np.ndarray): 2d array, of shape (n,n), with square terms of the
-            surrogate model
-    Returns:
-        lower_gershgorin (float): Lower Gregoshgorin bound.
-        upper_gershgorin (float): Upper Gregoshgorin bound.
-
-    """
-    hessian_diag = np.diag(model_hessian)
-    hessian_diag_abs = np.abs(hessian_diag)
-    hessian_row_sums = np.sum(np.abs(model_hessian), axis=1)
-
-    lower_gershgorin = np.min(hessian_diag + hessian_diag_abs - hessian_row_sums)
-    upper_gershgorin = np.max(hessian_diag - hessian_diag_abs + hessian_row_sums)
-
-    return lower_gershgorin, upper_gershgorin
-
-
-@njit
-def _compute_newton_step(lambda_candidate, p_norm, w_norm):
-    """Compute the Newton step.
-
-    Args:
-        lambda_candidate (float): Damping factor.
-        p_norm (float): Frobenius (i.e. L2-norm) of the candidate vector.
-        w_norm (float): Frobenius (i.e. L2-norm) of vector w, which is the solution
-            to the following triangular system: U.T w = p.
-
-    Returns:
-        float: Newton step computed according to formula (4.44) p.87
-            from Nocedal and Wright (2006).
-
-    """
-    return lambda_candidate + (p_norm / w_norm) ** 2 * (p_norm - 1)
-
-
-def _update_candidate_and_parameters_when_candidate_within_trustregion(
-    x_candidate,
-    model_hessian,
-    hessian_upper_triangular,
-    hessian_plus_lambda,
-    hessian_already_factorized,
-    lambda_candidate,
-    lambda_lower_bound,
-    newton_step,
-    stopping_criteria,
-    converged,
-):
-    """Update candidate vector, hessian, and lambdas when x outside trust-region."""
-
-    s_min, z_min = _estimate_smallest_singular_value(hessian_upper_triangular)
-    step_len = _compute_smallest_step_len_for_candidate_vector(x_candidate, z_min)
-
-    quadratic_term = x_candidate.T @ hessian_plus_lambda @ x_candidate
-
-    relative_error = (step_len**2 * s_min**2) / (quadratic_term + lambda_candidate)
-    if relative_error <= stopping_criteria["k_hard"]:
-        x_candidate = x_candidate + step_len * z_min
-        converged = True
-
-    lambda_new_lower_bound = max(lambda_lower_bound, lambda_candidate - s_min**2)
-
-    hessian_plus_lambda = model_hessian + newton_step * _identity(len(x_candidate))
-    _, factorization_unsuccessful = compute_cholesky_factorization(
-        hessian_plus_lambda,
-        lower=False,
-        overwrite_a=False,
-        clean=True,
-    )
-
-    if factorization_unsuccessful == 0:
-        hessian_already_factorized = True
-        lambda_new_candidate = newton_step
-    else:
-        lambda_new_lower_bound = max(lambda_new_lower_bound, newton_step)
-        lambda_new_candidate = _get_new_lambda_candidate(
-            lower_bound=lambda_new_lower_bound, upper_bound=lambda_candidate
-        )
-
-    lambda_new_upper_bound = lambda_candidate
-
-    return (
-        x_candidate,
-        hessian_plus_lambda,
-        hessian_already_factorized,
-        lambda_new_candidate,
-        lambda_new_lower_bound,
-        lambda_new_upper_bound,
-        converged,
-    )
-
-
-@njit
-def _compute_smallest_step_len_for_candidate_vector(x_candidate, z_min):
-    """Compute the smallest step length for the candidate vector.
-
-    Choose step_length with the smallest magnitude.
-    The reason for this choice is explained at p. 6 in :cite:`More1983`,
-    just before the formula for tau.
-
-    Args:
-        x_candidate (np.ndarray): Candidate vector of shape (n,).
-        z_min (float): Smallest singular value of the hessian matrix.
-
-    Returns:
-        float: Step length with the smallest magnitude.
-
-    """
-    a = z_min @ z_min
-    b = 2 * x_candidate.T @ z_min
-    c = x_candidate.T @ x_candidate - 1
-    ta, tb = np.roots(np.array([a, b, c]))
-    if abs(ta) <= abs(tb):
-        step_len = ta
-    else:
-        step_len = tb
-    return step_len
-
-
-def _compute_terms_to_make_leading_submatrix_singular(
-    hessian_upper_triangular, hessian_plus_lambda, k
-):
-    """Compute terms that make the leading submatrix of the hessian singular.
-
-    The "hessian" here refers to the matrix
-
-        H + lambda * I(n),
-
-    where H is the initial hessian, lambda is the current damping factor,
-    I the identity matrix, and m the number of rows/columns of the symmetric
-    hessian matrix.
-
-    Args:
-        hessian_upper_triangular (np.ndarray) Upper triangular matrix resulting of an
-            incomplete Cholesky decomposition of the hessian matrix.
-        hessian_plus_lambda (np.ndarray): Symmetric k by k hessian matrix, which is not
-            positive definite.
-        k (int): Positive integer such that the leading k by k submatrix from
-            hessian is the first non-positive definite leading submatrix.
-
-    Returns:
-        Tuple:
-        - delta(float): Amount that should be added to the element (k, k) of
-            the leading k by k submatrix of the hessian to make it singular.
-        - v (np.ndarray): A vector such that ``v.T B v = 0``. Where B is the
-            hessian after ``delta`` is added to its element (k, k).
-
-    """
-    hessian_plus_lambda = hessian_plus_lambda
-    upper_triangular = hessian_upper_triangular
-
-    delta = (
-        np.sum(upper_triangular[: k - 1, k - 1] ** 2)
-        - hessian_plus_lambda[k - 1, k - 1]
-    )
-
-    v = np.zeros(len(hessian_plus_lambda))
-    v[k - 1] = 1
-
-    if k != 1:
-        v[: k - 1] = solve_triangular(
-            upper_triangular[: k - 1, : k - 1],
-            -upper_triangular[: k - 1, k - 1],
-        )
-
-    return delta, v
-
-
-@njit
-def _estimate_condition(u):
-    """Return largest possible solution w to the system u.T w = e.
-
-    u is an upper triangular matrix, and components of e are selected from {+1, -1}.
-
-    Args:
-        u (np.ndarray): Upper triangular matrix of shape (n,n).
-    Returns:
-        w (np.ndarray): 1d array of len n.
-
-    """
-    u = np.atleast_2d(u)
-
-    if u.shape[0] != u.shape[1]:
-        raise ValueError("A square triangular matrix should be provided.")
-
-    # A vector `e` with components selected from {+1, -1}
-    # is selected so that the solution `w` to the system
-    # `U.T w = e` is as large as possible. Implementation
-    # based on algorithm 3.5.1, p. 142, from reference [2]
-    # adapted for lower triangular matrix.
-    m = u.shape[0]
-    p = np.zeros(m)
-    w = np.zeros(m)
-
-    # Implemented according to:  Golub, G. H., Van Loan, C. F. (2013).
-    # "Matrix computations". Forth Edition. JHU press. pp. 140-142.
-    for k in range(m):
-        wp = (1 - p[k]) / u.T[k, k]
-        wm = (-1 - p[k]) / u.T[k, k]
-        pp = p[k + 1 :] + u.T[k + 1 :, k] * wp
-        pm = p[k + 1 :] + u.T[k + 1 :, k] * wm
-
-        if abs(wp) + _norm(pp, 1) >= abs(wm) + _norm(pm, 1):
-            w[k] = wp
-            p[k + 1 :] = pp
-        else:
-            w[k] = wm
-            p[k + 1 :] = pm
-    return w
-
-
-def _estimate_smallest_singular_value(upper_triangular):
-    """Estimate the smallest singular vlue and the correspondent right singular vector.
-
-    Given an upper triangular matrix `u`, performs in O(n**2) operations and returns
-    estimated values of smalles singular value and the correspondent right singular
-    vector.
-
-    Based on estimate_smallest_singular_value from scipy.optimize._trustregion_exact,
-    jitting some calculations in a separate function and calling them here.
-
-    Args:
-        upper_triangular (np.ndarray) : Square upper triangular matrix of shape (n,n)
-
-    Returns:
-        s_min (float): Estimated smallest singular value of the provided matrix.
-        z_min (np.ndarray): Estimatied right singular vector.
-
-    Notes:
-        The procedure is based on [1] and is done in two steps. First, it finds
-        a vector ``e`` with components selected from {+1, -1} such that the
-        solution ``w`` from the system ``U.T w = e`` is as large as possible.
-        Next it estimate ``U v = w``. The smallest singular value is close
-        to ``norm(w)/norm(v)`` and the right singular vector is close
-        to ``v/norm(v)``.
-        The estimation will be better more ill-conditioned is the matrix.
-
-    References:
-    .. [1] Cline, A. K., Moler, C. B., Stewart, G. W., Wilkinson, J. H.
-        An estimate for the condition number of a matrix.  1979.
-        SIAM Journal on Numerical Analysis, 16(2), 368-375.
-
-    """
-    w = _estimate_condition(upper_triangular)
-
-    # The system `U v = w` is solved using backward substitution.
-    v = solve_triangular(upper_triangular, w)
-
-    v_norm = _norm(v, -1.0)
-    w_norm = _norm(w, -1.0)
-
-    # Smallest singular value
-    s_min = w_norm / v_norm
-
-    # Associated vector
-    z_min = v / v_norm
-
-    return s_min, z_min
-
-
-@njit
-def _norm(a, order):
-    """A wrapper to jit np.linalg.norm."""
-    if order == -1:
-        out = np.linalg.norm(a)
-    else:
-        out = np.linalg.norm(a, order)
-    return out
-
-
-@njit
-def _identity(dim):
-    """A wrapper to jit np.eye."""
-    return np.eye(dim)
diff --git a/src/estimagic/optimization/tranquilo.py b/src/estimagic/optimization/tranquilo.py
new file mode 100644
index 000000000..6574d936c
--- /dev/null
+++ b/src/estimagic/optimization/tranquilo.py
@@ -0,0 +1,29 @@
+from estimagic.config import IS_TRANQUILO_INSTALLED
+
+
+if IS_TRANQUILO_INSTALLED:
+    from tranquilo.tranquilo import _tranquilo
+    from functools import partial
+    from estimagic.decorators import mark_minimizer
+
+    tranquilo = mark_minimizer(
+        func=partial(_tranquilo, functype="scalar"),
+        name="tranquilo",
+        primary_criterion_entry="value",
+        needs_scaling=True,
+        is_available=True,
+        is_global=False,
+    )
+
+    tranquilo_ls = mark_minimizer(
+        func=partial(_tranquilo, functype="least_squares"),
+        primary_criterion_entry="root_contributions",
+        name="tranquilo_ls",
+        needs_scaling=True,
+        is_available=True,
+        is_global=False,
+    )
+
+    __all__ = ["tranquilo", "tranquilo_ls"]
+else:
+    __all__ = []
diff --git a/src/estimagic/optimization/tranquilo/__init__.py b/src/estimagic/optimization/tranquilo/__init__.py
deleted file mode 100644
index e69de29bb..000000000
diff --git a/src/estimagic/optimization/tranquilo/acceptance_decision.py b/src/estimagic/optimization/tranquilo/acceptance_decision.py
deleted file mode 100644
index d2303320b..000000000
--- a/src/estimagic/optimization/tranquilo/acceptance_decision.py
+++ /dev/null
@@ -1,244 +0,0 @@
-"""Functions that decide what is the next accepted point, given a candidate.
-
-Decision functions can simply decide whether or not the candidate is accepted but can
-also do own function evaluations and decide to accept a different point.
-
-"""
-from typing import NamedTuple
-
-import numpy as np
-
-from estimagic.optimization.tranquilo.acceptance_sample_size import (
-    get_acceptance_sample_sizes,
-)
-from estimagic.optimization.tranquilo.get_component import get_component
-from estimagic.optimization.tranquilo.options import AcceptanceOptions
-
-
-def get_acceptance_decider(acceptance_decider, acceptance_options):
-    func_dict = {
-        "classic": _accept_classic,
-        "naive_noisy": accept_naive_noisy,
-        "noisy": accept_noisy,
-    }
-
-    out = get_component(
-        name_or_func=acceptance_decider,
-        func_dict=func_dict,
-        component_name="acceptance_decider",
-        user_options=acceptance_options,
-        default_options=AcceptanceOptions(),
-    )
-
-    return out
-
-
-def _accept_classic(
-    subproblem_solution,
-    state,
-    history,
-    *,
-    wrapped_criterion,
-    min_improvement,
-):
-    """Do a classic acceptance step for a trustregion algorithm.
-
-    Args:
-        subproblem_solution (SubproblemResult): Result of the subproblem solution.
-        state (State): Namedtuple containing the trustregion, criterion value of
-            previously accepted point, indices of model points, etc.
-        wrapped_criterion (callable): The criterion function.
-        min_improvement (float): Minimum improvement required to accept a point.
-
-    Returns:
-        AcceptanceResult
-
-    """
-    out = _accept_simple(
-        subproblem_solution=subproblem_solution,
-        state=state,
-        history=history,
-        wrapped_criterion=wrapped_criterion,
-        min_improvement=min_improvement,
-        n_evals=1,
-    )
-    return out
-
-
-def accept_naive_noisy(
-    subproblem_solution,
-    state,
-    history,
-    *,
-    wrapped_criterion,
-    min_improvement,
-):
-    """Do a naive noisy acceptance step, averaging over a fixed number of points."""
-    out = _accept_simple(
-        subproblem_solution=subproblem_solution,
-        state=state,
-        history=history,
-        wrapped_criterion=wrapped_criterion,
-        min_improvement=min_improvement,
-        n_evals=5,
-    )
-    return out
-
-
-def _accept_simple(
-    subproblem_solution,
-    state,
-    history,
-    *,
-    wrapped_criterion,
-    min_improvement,
-    n_evals,
-):
-    """Do a classic acceptance step for a trustregion algorithm.
-
-    Args:
-        subproblem_solution (SubproblemResult): Result of the subproblem solution.
-        state (State): Namedtuple containing the trustregion, criterion value of
-            previously accepted point, indices of model points, etc.
-        wrapped_criterion (callable): The criterion function.
-        min_improvement (float): Minimum improvement required to accept a point.
-
-    Returns:
-        AcceptanceResult
-
-    """
-    candidate_x = subproblem_solution.x
-
-    candidate_index = history.add_xs(candidate_x)
-
-    wrapped_criterion({candidate_index: n_evals})
-
-    candidate_fval = np.mean(history.get_fvals(candidate_index))
-
-    actual_improvement = -(candidate_fval - state.fval)
-
-    rho = calculate_rho(
-        actual_improvement=actual_improvement,
-        expected_improvement=subproblem_solution.expected_improvement,
-    )
-
-    is_accepted = actual_improvement >= min_improvement
-
-    res = _get_acceptance_result(
-        candidate_x=candidate_x,
-        candidate_fval=candidate_fval,
-        candidate_index=candidate_index,
-        rho=rho,
-        is_accepted=is_accepted,
-        old_state=state,
-    )
-
-    return res
-
-
-def accept_noisy(
-    subproblem_solution,
-    state,
-    noise_variance,
-    history,
-    *,
-    wrapped_criterion,
-    min_improvement,
-    power_level,
-    confidence_level,
-    n_min,
-    n_max,
-):
-    candidate_x = subproblem_solution.x
-    candidate_index = history.add_xs(candidate_x)
-    existing_n1 = len(history.get_fvals(state.index))
-
-    n_1, n_2 = get_acceptance_sample_sizes(
-        sigma=np.sqrt(noise_variance),
-        existing_n1=existing_n1,
-        expected_improvement=subproblem_solution.expected_improvement,
-        power_level=power_level,
-        confidence_level=confidence_level,
-        n_min=n_min,
-        n_max=n_max,
-    )
-
-    eval_info = {
-        state.index: n_1,
-        candidate_index: n_2,
-    }
-
-    wrapped_criterion(eval_info)
-
-    current_fval = history.get_fvals(state.index).mean()
-    candidate_fval = history.get_fvals(candidate_index).mean()
-
-    actual_improvement = -(candidate_fval - current_fval)
-
-    rho = calculate_rho(
-        actual_improvement=actual_improvement,
-        expected_improvement=subproblem_solution.expected_improvement,
-    )
-
-    is_accepted = actual_improvement >= min_improvement
-
-    res = _get_acceptance_result(
-        candidate_x=candidate_x,
-        candidate_fval=candidate_fval,
-        candidate_index=candidate_index,
-        rho=rho,
-        is_accepted=is_accepted,
-        old_state=state,
-    )
-
-    return res
-
-
-class AcceptanceResult(NamedTuple):
-    x: np.ndarray
-    fval: float
-    index: int
-    rho: float
-    accepted: bool
-    step_length: float
-    relative_step_length: float
-    candidate_index: int
-    candidate_x: np.ndarray
-
-
-def _get_acceptance_result(
-    candidate_x,
-    candidate_fval,
-    candidate_index,
-    rho,
-    is_accepted,
-    old_state,
-):
-    x = candidate_x if is_accepted else old_state.x
-    fval = candidate_fval if is_accepted else old_state.fval
-    index = candidate_index if is_accepted else old_state.index
-    step_length = np.linalg.norm(x - old_state.x, ord=2)
-    relative_step_length = step_length / old_state.trustregion.radius
-
-    out = AcceptanceResult(
-        x=x,
-        fval=fval,
-        index=index,
-        rho=rho,
-        accepted=is_accepted,
-        step_length=step_length,
-        relative_step_length=relative_step_length,
-        candidate_index=candidate_index,
-        candidate_x=candidate_x,
-    )
-    return out
-
-
-def calculate_rho(actual_improvement, expected_improvement):
-    if expected_improvement == 0 and actual_improvement > 0:
-        rho = np.inf
-    elif expected_improvement == 0:
-        rho = -np.inf
-    else:
-        rho = actual_improvement / expected_improvement
-    return rho
diff --git a/src/estimagic/optimization/tranquilo/acceptance_sample_size.py b/src/estimagic/optimization/tranquilo/acceptance_sample_size.py
deleted file mode 100644
index 53f5b0208..000000000
--- a/src/estimagic/optimization/tranquilo/acceptance_sample_size.py
+++ /dev/null
@@ -1,69 +0,0 @@
-import numpy as np
-from scipy.stats import norm
-
-
-def get_acceptance_sample_sizes(
-    sigma,
-    existing_n1,
-    expected_improvement,
-    power_level,
-    confidence_level,
-    n_min,
-    n_max,
-):
-    n1_raw, n2_raw = _get_optimal_sample_sizes(
-        sd_1=sigma,
-        sd_2=sigma,
-        existing_n1=existing_n1,
-        minimal_effect_size=np.clip(expected_improvement, 1e-8, np.inf),
-        power_level=power_level,
-        significance_level=1 - confidence_level,
-    )
-
-    n1 = int(np.ceil(np.clip(n1_raw, 0, max(0, n_max - existing_n1))))
-    n2 = int(np.ceil(np.clip(n2_raw, n_min, n_max)))
-    return n1, n2
-
-
-def _get_optimal_sample_sizes(
-    sd_1, sd_2, existing_n1, minimal_effect_size, power_level, significance_level
-):
-    """Return missing sample sizes.
-
-    Args:
-        sd_1 (float): Standard deviation of the first group.
-        sd_2 (float): Standard deviation of the second group.
-        existing_n1 (int): Number of samples in the first group.
-        minimal_effect_size (float): Minimal effect size.
-        power_level (float): Power level.
-        significance_level (float): Significance level.
-
-    Returns:
-        tuple: Missing sample sizes.
-
-    """
-    factor = _compute_factor(minimal_effect_size, power_level, significance_level)
-
-    n1_optimal = (sd_1 * (sd_2 + sd_1)) * factor
-    n2_optimal = (sd_2 * (sd_2 + sd_1)) * factor
-
-    if existing_n1 <= n1_optimal:
-        n1 = n1_optimal - existing_n1
-        n2 = n2_optimal
-    else:
-        n1 = 0
-        n2 = sd_2**2 * (factor ** (-1) - sd_1**2 / existing_n1) ** (-1)
-
-    return n1, n2
-
-
-def _compute_factor(minimal_effect_size, power_level, significance_level):
-    # avoid division by zero warning; will be clipped later
-    if minimal_effect_size == 0:
-        factor = np.inf
-    else:
-        factor = (
-            (norm.ppf(1 - significance_level) + norm.ppf(power_level))
-            / minimal_effect_size
-        ) ** 2
-    return factor
diff --git a/src/estimagic/optimization/tranquilo/adjust_radius.py b/src/estimagic/optimization/tranquilo/adjust_radius.py
deleted file mode 100644
index 5972f4fa5..000000000
--- a/src/estimagic/optimization/tranquilo/adjust_radius.py
+++ /dev/null
@@ -1,42 +0,0 @@
-import numpy as np
-
-
-def adjust_radius(radius, rho, step_length, options):
-    """Adjust the trustregion radius based on relative improvement and stepsize.
-
-    This is just a slight generalization of the pounders radius adjustment. With default
-    options it yields the same result.
-
-    Noise handling is not built-in here. It will be achieved by calling the
-    function with a noise-adjusted rho.
-
-    Args:
-        radius (float): The current trust-region radius.
-        rho (float): Actual over expected improvement between the last two accepted
-            parameter vectors.
-        step (np.ndarray): The step between the last two accepted parameter vectors.
-        options (NamedTuple): Options for radius management.
-
-    Returns:
-        float: The updated radius.
-
-    """
-    is_large_step = step_length / radius >= options.large_step
-
-    if rho >= options.rho_increase and is_large_step:
-        new_radius = radius * options.expansion_factor
-    elif rho >= options.rho_decrease:
-        new_radius = radius
-    else:
-        new_radius = radius * options.shrinking_factor
-
-    if np.isfinite(options.max_radius_to_step_ratio):
-        max_radius = np.min(
-            [options.max_radius, step_length * options.max_radius_to_step_ratio]
-        )
-    else:
-        max_radius = options.max_radius
-
-    new_radius = np.clip(new_radius, options.min_radius, max_radius)
-
-    return new_radius
diff --git a/src/estimagic/optimization/tranquilo/aggregate_models.py b/src/estimagic/optimization/tranquilo/aggregate_models.py
deleted file mode 100644
index 3de7c6408..000000000
--- a/src/estimagic/optimization/tranquilo/aggregate_models.py
+++ /dev/null
@@ -1,152 +0,0 @@
-from functools import partial
-
-import numpy as np
-
-from estimagic.optimization.tranquilo.models import ScalarModel
-
-
-def get_aggregator(aggregator):
-    """Get a function that aggregates a VectorModel into a ScalarModel.
-
-    Args:
-        aggregator (str): Name of an aggregator.
-
-    Returns:
-        callable: The partialled aggregator that only depends on vector_model.
-
-    """
-    built_in_aggregators = {
-        "identity": aggregator_identity,
-        "sum": aggregator_sum,
-        "information_equality_linear": aggregator_information_equality_linear,
-        "least_squares_linear": aggregator_least_squares_linear,
-    }
-
-    if aggregator in built_in_aggregators:
-        _aggregator = built_in_aggregators[aggregator]
-    else:
-        raise ValueError(
-            f"Invalid aggregator: {aggregator}. Must be one of "
-            f"{list(built_in_aggregators)} or a callable."
-        )
-
-    out = partial(_aggregate_models_template, aggregator=_aggregator)
-    return out
-
-
-def _aggregate_models_template(vector_model, aggregator):
-    """Aggregate a VectorModel into a ScalarModel.
-
-    Args:
-        vector_model (VectorModel): The VectorModel to aggregate.
-        aggregator (callable): The function that does the actual aggregation.
-
-    Returns:
-        ScalarModel: The aggregated model
-
-    """
-    intercept, linear_terms, square_terms = aggregator(vector_model)
-    scalar_model = ScalarModel(
-        intercept=intercept,
-        linear_terms=linear_terms,
-        square_terms=square_terms,
-        shift=vector_model.shift,
-        scale=vector_model.scale,
-    )
-    return scalar_model
-
-
-def aggregator_identity(vector_model):
-    """Aggregate quadratic VectorModel using identity function.
-
-    This aggregation is useful if the underlying maximization problem is a scalar
-    problem. To get a second-order main model vector_model must be second-order model.
-
-    Assumptions
-    -----------
-    1. functype: scalar
-    2. model_type: quadratic
-
-    """
-    n_params = vector_model.linear_terms.size
-    intercept = float(vector_model.intercepts)
-    linear_terms = vector_model.linear_terms.flatten()
-    if vector_model.square_terms is None:
-        square_terms = np.zeros((n_params, n_params))
-    else:
-        square_terms = vector_model.square_terms.reshape(n_params, n_params)
-    return intercept, linear_terms, square_terms
-
-
-def aggregator_sum(vector_model):
-    """Aggregate quadratic VectorModel using sum function.
-
-    This aggregation is useful if the underlying maximization problem is a likelihood
-    problem. That is, the criterion is the sum of residuals, which allows us to sum
-    up the coefficients of the residual model to get the main model. The main model will
-    only be a second-order model if the residual model is a second-order model.
-
-    Assumptions
-    -----------
-    1. functype: likelihood
-    2. model_type: quadratic
-
-    """
-    vm_intercepts = vector_model.intercepts
-    intercept = vm_intercepts.sum(axis=0)
-    linear_terms = vector_model.linear_terms.sum(axis=0)
-    square_terms = vector_model.square_terms.sum(axis=0)
-    return intercept, linear_terms, square_terms
-
-
-def aggregator_least_squares_linear(vector_model):
-    """Aggregate linear VectorModel assuming a least_squares functype.
-
-    This aggregation is useful if the underlying maximization problem is a least-squares
-    problem. We can then simply plug-in a linear model for the residuals into the
-    least-squares formulae to get a second-order main model.
-
-    Assumptions
-    -----------
-    1. functype: least_squares
-    2. model_type: linear
-
-    References
-    ----------
-    See section 2.1 of :cite:`Cartis2018` for further information.
-
-    """
-    vm_linear_terms = vector_model.linear_terms
-    vm_intercepts = vector_model.intercepts
-
-    intercept = vm_intercepts @ vm_intercepts
-    linear_terms = 2 * np.sum(vm_linear_terms * vm_intercepts.reshape(-1, 1), axis=0)
-    square_terms = 2 * vm_linear_terms.T @ vm_linear_terms
-
-    return intercept, linear_terms, square_terms
-
-
-def aggregator_information_equality_linear(vector_model):
-    """Aggregate linear VectorModel using the Fisher information equality.
-
-    This aggregation is useful if the underlying maximization problem is a likelihood
-    problem. Given a linear model for the likelihood contributions we get an estimate of
-    the scores. Using the Fisher-Information-Equality we estimate the average Hessian
-    using the scores.
-
-    Assumptions
-    -----------
-    1. functype: likelihood
-    2. model_type: linear
-
-    """
-    vm_linear_terms = vector_model.linear_terms
-    vm_intercepts = vector_model.intercepts
-
-    fisher_information = vm_linear_terms.T @ vm_linear_terms
-
-    intercept = vm_intercepts.sum(axis=0)
-    linear_terms = vm_linear_terms.sum(axis=0)
-    square_terms = -fisher_information / 2
-
-    return intercept, linear_terms, square_terms
diff --git a/src/estimagic/optimization/tranquilo/bounds.py b/src/estimagic/optimization/tranquilo/bounds.py
deleted file mode 100644
index f73275421..000000000
--- a/src/estimagic/optimization/tranquilo/bounds.py
+++ /dev/null
@@ -1,28 +0,0 @@
-from dataclasses import dataclass, replace
-
-import numpy as np
-
-
-@dataclass(frozen=True)
-class Bounds:
-    """Parameter bounds."""
-
-    lower: np.ndarray
-    upper: np.ndarray
-
-    def __post_init__(self):
-        # cannot use standard __setattr__ because it is frozen
-        super().__setattr__("has_any", _any_finite(self.lower, self.upper))
-
-    # make it behave like a NamedTuple
-    def _replace(self, **kwargs):
-        return replace(self, **kwargs)
-
-
-def _any_finite(lb, ub):
-    out = False
-    if lb is not None and np.isfinite(lb).any():
-        out = True
-    if ub is not None and np.isfinite(ub).any():
-        out = True
-    return out
diff --git a/src/estimagic/optimization/tranquilo/clustering.py b/src/estimagic/optimization/tranquilo/clustering.py
deleted file mode 100644
index a697ac5c4..000000000
--- a/src/estimagic/optimization/tranquilo/clustering.py
+++ /dev/null
@@ -1,75 +0,0 @@
-import numpy as np
-from numba import njit
-from scipy.spatial.distance import pdist, squareform
-
-
-def cluster(x, epsilon, shape="sphere"):
-    """Find clusters in x.
-
-    A cluster is a set of points that are all within a radius
-    of eps around the central point of the cluster.
-
-    Args:
-        x (np.ndarray): 2d numpy array of shape (n, d) with n points in
-            d-dimensional space.
-        eps (float): Proximity radius that determines the size of clusters.
-        shape (str): One of "sphere" or "cube". This is the shape of the clusters.
-            If "sphere", the distances between the points is calculated with an l2 norm.
-            If "cube", they are calculated with an infinity norm.
-
-    Returns:
-        np.ndarray: 1d integer numpy array containing the cluster of each point.
-        np.ndarray: 1d integer numpy array containing the centers of each cluster.
-
-    """
-    if shape == "sphere":
-        dists = squareform(pdist(x))
-    else:
-        raise NotImplementedError()
-
-    labels, centers = _cluster(dists, epsilon)
-    return labels, centers
-
-
-@njit
-def _cluster(dists, epsilon):
-    n_points = len(dists)
-    labels = np.full(n_points, -1)
-    centers = np.full(n_points, -1)
-    n_labeled = 0
-    cluster_counter = 0
-
-    while n_labeled < n_points:
-        # find best centerpoint among remaining points
-
-        # provoke an index error if forget to set this later
-        candidate_center = 2 * n_points
-        max_n_neighbors = 0
-        for i in range(n_points):
-            if labels[i] < 0:
-                n_neighbors = 0
-                for j in range(n_points):
-                    if labels[j] < 0 and j != i and dists[i, j] <= epsilon:
-                        n_neighbors += 1
-                if n_neighbors == 0:
-                    labels[i] = cluster_counter
-                    centers[cluster_counter] = i
-                    cluster_counter += 1
-                    n_labeled += 1
-                elif n_neighbors > max_n_neighbors:
-                    max_n_neighbors = n_neighbors
-                    candidate_center = i
-
-        # if not all points are labeled, we can be sure a cluster center
-        # was found
-        if n_labeled < n_points:
-            i = candidate_center
-            for j in range(n_points):
-                if labels[j] < 0 and dists[i, j] <= epsilon:
-                    labels[j] = cluster_counter
-                    n_labeled += 1
-
-            centers[cluster_counter] = i
-            cluster_counter += 1
-
-    return labels, centers[:cluster_counter]
diff --git a/src/estimagic/optimization/tranquilo/estimate_variance.py b/src/estimagic/optimization/tranquilo/estimate_variance.py
deleted file mode 100644
index c4503323d..000000000
--- a/src/estimagic/optimization/tranquilo/estimate_variance.py
+++ /dev/null
@@ -1,61 +0,0 @@
-"""Estimate the variance or covariance matrix of the noise in the objective function."""
-
-
-import numpy as np
-
-from estimagic.optimization.tranquilo.get_component import get_component
-from estimagic.optimization.tranquilo.history import History
-from estimagic.optimization.tranquilo.region import Region
-from estimagic.optimization.tranquilo.options import VarianceEstimatorOptions
-
-
-def get_variance_estimator(fitter, user_options):
-    func_dict = {
-        "classic": _estimate_variance_classic,
-    }
-
-    out = get_component(
-        name_or_func=fitter,
-        func_dict=func_dict,
-        component_name="variance estimator",
-        user_options=user_options,
-        default_options=VarianceEstimatorOptions(),
-    )
-
-    return out
-
-
-def _estimate_variance_classic(
-    trustregion: Region,
-    history: History,
-    model_type: str,
-    max_distance_factor: float,
-    min_n_evals: int,
-):
-    all_indices = history.get_x_indices_in_region(
-        trustregion._replace(radius=trustregion.radius * max_distance_factor)
-    )
-
-    n_evals = {idx: len(history.get_fvals(idx)) for idx in all_indices}
-
-    # make sure we keep at least one sample from which we can estimate a variance
-    cutoff = min(max(n_evals.values()), min_n_evals)
-
-    valid_indices = [idx for idx in all_indices if n_evals[idx] >= cutoff]
-    weights = np.array([n_ for idx, n_ in n_evals.items() if idx in valid_indices])
-    weights = weights / weights.sum()
-
-    if model_type == "scalar":
-        samples = list(history.get_fvals(valid_indices).values())
-        out = 0.0
-        for weight, sample in zip(weights, samples):
-            out += weight * np.var(sample, ddof=1)
-    else:
-        samples = list(history.get_fvecs(valid_indices).values())
-
-        dim = samples[0].shape[1]
-        out = np.zeros((dim, dim))
-        for weight, sample in zip(weights, samples):
-            out += weight * np.cov(sample, rowvar=False, ddof=1)
-
-    return out
diff --git a/src/estimagic/optimization/tranquilo/filter_points.py b/src/estimagic/optimization/tranquilo/filter_points.py
deleted file mode 100644
index 39658b45a..000000000
--- a/src/estimagic/optimization/tranquilo/filter_points.py
+++ /dev/null
@@ -1,129 +0,0 @@
-import numpy as np
-import scipy
-
-from estimagic.optimization.tranquilo.clustering import cluster
-from estimagic.optimization.tranquilo.get_component import get_component
-from estimagic.optimization.tranquilo.volume import get_radius_after_volume_scaling
-from estimagic.optimization.tranquilo.options import FilterOptions
-
-
-def get_sample_filter(sample_filter="keep_all", user_options=None):
-    """Get filter function with partialled options.
-
-    The filter function is applied to points inside the current trustregion before
-    additional points are sampled.
-
-    The resulting function only takes an array of shape n_points, n_params as argument.
-
-    Args:
-        sample_filter (str or callable): The name of a built in filter or a function
-            with the filter interface.
-        user_options (dict or namedtuple): Additional options for the filter.
-
-    Returns:
-        callable: The filter
-
-    """
-    built_in_filters = {
-        "discard_all": discard_all,
-        "keep_all": keep_all,
-        "clustering": keep_cluster_centers,
-        "drop_excess": drop_excess,
-    }
-
-    out = get_component(
-        name_or_func=sample_filter,
-        component_name="sample_filter",
-        func_dict=built_in_filters,
-        user_options=user_options,
-        default_options=FilterOptions(),
-    )
-
-    return out
-
-
-def discard_all(state):
-    return state.x.reshape(1, -1), np.array([state.index])
-
-
-def keep_all(xs, indices):
-    return xs, indices
-
-
-def drop_excess(xs, indices, state, target_size):
-    n_to_drop = max(0, len(xs) - target_size)
-
-    if n_to_drop:
-        xs, indices = drop_worst_points(xs, indices, state, n_to_drop)
-
-    return xs, indices
-
-
-def drop_worst_points(xs, indices, state, n_to_drop):
-    """Drop the worst points from xs and indices.
-
-    As long as there are points outside the trustregion, drop the point that is furthest
-    away from the trustregion center.
-
-    If all points are inside the trustregion, find the two points that are closest to
-    each other. If one of them is the center, drop the other one. If none is the center,
-    drop the one that is closer to the center.
-
-    This reflects that we want to have points as far out as possible as long as they are
-    inside the trustregion.
-
-    The control flow is a bit complicated to avoid unnecessary or repeated computations
-    of distances and pairwise distances.
-
-    """
-    n_dropped = 0
-
-    if n_dropped < n_to_drop:
-        dists = np.linalg.norm(xs - state.x, axis=1)
-
-        while n_dropped < n_to_drop and (dists > state.trustregion.radius).any():
-            drop_index = np.argmax(dists)
-            xs = np.delete(xs, drop_index, axis=0)
-            indices = np.delete(indices, drop_index)
-            dists = np.delete(dists, drop_index, axis=0)
-            n_dropped += 1
-
-    if n_dropped < n_to_drop:
-        pdists = scipy.spatial.distance.squareform(scipy.spatial.distance.pdist(xs))
-        pdists[np.diag_indices_from(pdists)] = np.inf
-
-        while n_dropped < n_to_drop:
-            i, j = np.unravel_index(np.argmin(pdists), pdists.shape)
-
-            if indices[i] == state.index:
-                drop_index = j
-            elif indices[j] == state.index:
-                drop_index = i
-            else:
-                drop_index = i if dists[i] < dists[j] else j
-
-            xs = np.delete(xs, drop_index, axis=0)
-            indices = np.delete(indices, drop_index)
-            dists = np.delete(dists, drop_index, axis=0)
-            pdists = np.delete(pdists, drop_index, axis=0)
-            pdists = np.delete(pdists, drop_index, axis=1)
-            n_dropped += 1
-
-    return xs, indices
-
-
-def keep_cluster_centers(
-    xs, indices, state, target_size, strictness=1e-10, shape="sphere"
-):
-    dim = xs.shape[1]
-    scaling_factor = strictness / target_size
-    cluster_radius = get_radius_after_volume_scaling(
-        radius=state.trustregion.radius,
-        dim=dim,
-        scaling_factor=scaling_factor,
-    )
-    _, centers = cluster(x=xs, epsilon=cluster_radius, shape=shape)
-
-    # do I need to make sure trustregion center is in there?
-    out = xs[centers], indices[centers]
-    return out
diff --git a/src/estimagic/optimization/tranquilo/fit_models.py b/src/estimagic/optimization/tranquilo/fit_models.py
deleted file mode 100644
index 32f0b74ce..000000000
--- a/src/estimagic/optimization/tranquilo/fit_models.py
+++ /dev/null
@@ -1,507 +0,0 @@
-from functools import partial
-
-import numpy as np
-from numba import njit
-from scipy.linalg import qr_multiply
-
-from estimagic.optimization.tranquilo.get_component import get_component
-from estimagic.optimization.tranquilo.handle_infinity import get_infinity_handler
-from estimagic.optimization.tranquilo.options import FitterOptions
-from estimagic.optimization.tranquilo.models import (
-    VectorModel,
-    add_models,
-    move_model,
-    n_second_order_terms,
-)
-
-
-def get_fitter(
-    fitter,
-    fitter_options=None,
-    model_type=None,
-    residualize=None,
-    infinity_handling=None,
-):
-    """Get a fit-function with partialled options.
-
-    Args:
-        fitter (str or callable): Name of a fit method or a fit method. Arguments need
-            to be, in order,
-            - x (np.ndarray): Data points.
-            - y (np.ndarray): Corresponding function evaluations at data points.
-            - weighs (np.ndarray): Weights for the data points.
-            - model_type (str): Type of model to be fitted.
-
-        fitter_options (dict): Options for the fit method. The following are supported:
-            - l2_penalty_linear (float): Penalty that is applied to all linear terms.
-            - l2_penalty_square (float): Penalty that is applied to all square terms,
-            that is the quadratic and interaction terms.
-
-        model_type (str): Type of the model that is fitted. The following are supported:
-            - "linear": Only linear effects and intercept.
-            - "quadratic": Fully quadratic model.
-
-        residualize (bool): If True, the model is fitted to the residuals of the old
-            model. This introduces momentum when the coefficients are penalized.
-
-        infinity_handling (str): How to handle infinite values in the data. Currently
-            supported: {"relative"}. See `handle_infinty.py`.
-
-    Returns:
-        callable: The partialled fit method that only depends on x and y.
-
-    """
-    built_in_fitters = {
-        "ols": fit_ols,
-        "ridge": fit_ridge,
-        "powell": fit_powell,
-        "tranquilo": fit_tranquilo,
-    }
-
-    mandatory_arguments = ["x", "y", "model_type"]
-
-    _raw_fitter = get_component(
-        name_or_func=fitter,
-        component_name="fitter",
-        func_dict=built_in_fitters,
-        default_options=FitterOptions(),
-        user_options=fitter_options,
-        mandatory_signature=mandatory_arguments,
-    )
-
-    clip_infinite_values = get_infinity_handler(infinity_handling)
-
-    fitter = partial(
-        _fitter_template,
-        fitter=_raw_fitter,
-        model_type=model_type,
-        clip_infinite_values=clip_infinite_values,
-        residualize=residualize,
-    )
-
-    return fitter
-
-
-def _fitter_template(
-    x,
-    y,
-    region,
-    old_model,
-    weights=None,
-    fitter=None,
-    model_type=None,
-    clip_infinite_values=None,
-    residualize=False,
-):
-    """Fit a model to data.
-
-    Args:
-        x (np.ndarray): Array of shape (n_samples, n_params) of x-values,
-            rescaled such that the trust region becomes a hypercube from -1 to 1.
-        y (np.ndarray): Array of shape (n_samples, n_residuals) with function
-            evaluations that have been centered around the function value at the
-            trust region center.
-        fitter (callable): Fit method. The first argument of any fit method needs to be
-            ``x``, second ``y`` and third ``model_type``.
-        model_type (str): Type of the model that is fitted. The following are supported:
-            - "linear": Only linear effects and intercept.
-            - "quadratic": Fully quadratic model.
-
-    Returns:
-        VectorModel or ScalarModel: Results container.
-
-    """
-    _, n_params = x.shape
-    n_residuals = y.shape[1]
-
-    y_clipped = clip_infinite_values(y)
-    x_unit = region.map_to_unit(x)
-
-    if residualize:
-        old_model_moved = move_model(old_model, region)
-        y_clipped = y_clipped - old_model_moved.predict(x_unit).reshape(y_clipped.shape)
-
-    coef = fitter(x=x_unit, y=y_clipped, weights=weights, model_type=model_type)
-
-    # results processing
-    intercepts, linear_terms, square_terms = np.split(coef, (1, n_params + 1), axis=1)
-    intercepts = intercepts.flatten()
-
-    # construct final square terms
-    if model_type == "quadratic":
-        square_terms = _reshape_square_terms_to_hess(
-            square_terms, n_params, n_residuals
-        )
-    else:
-        square_terms = None
-
-    results = VectorModel(
-        intercepts,
-        linear_terms,
-        square_terms,
-        shift=region.effective_center,
-        scale=region.effective_radius,
-    )
-
-    if residualize:
-        results = add_models(results, old_model_moved)
-
-    return results
-
-
-def fit_ols(x, y, weights, model_type):
-    """Fit a linear model using ordinary least squares.
-
-    Args:
-        x (np.ndarray): Array of shape (n_samples, n_params) of x-values,
-            rescaled such that the trust region becomes a hypercube from -1 to 1.
-        y (np.ndarray): Array of shape (n_samples, n_residuals) with function
-            evaluations that have been centered around the function value at the
-            trust region center.
-        model_type (str): Type of the model that is fitted. The following are supported:
-            - "linear": Only linear effects and intercept.
-            - "quadratic": Fully quadratic model.
-
-    Returns:
-        np.ndarray: The model coefficients.
-
-    """
-    features = _build_feature_matrix(x, model_type)
-    features_w, y_w = _add_weighting(features, y, weights)
-    coef = _fit_ols(features_w, y_w)
-
-    return coef
-
-
-def _fit_ols(x, y):
-    """Fit a linear model using least-squares.
-
-    Args:
-        x (np.ndarray): Array of shape (n, p) of x-values.
-        y (np.ndarray): Array of shape (n, k) of y-values.
-
-    Returns:
-        coef (np.ndarray): Array of shape (p, k) of coefficients.
-
-    """
-    coef, *_ = np.linalg.lstsq(x, y, rcond=None)
-    coef = coef.T
-
-    return coef
-
-
-def fit_tranquilo(x, y, weights, model_type, p_intercept, p_linear, p_square):
-    """Fit a linear model using ordinary least squares.
-
-    The difference to fit_ols is that the linear terms are penalized less strongly
-    when the system is underdetermined.
-
-    Args:
-        x (np.ndarray): Array of shape (n_samples, n_params) of x-values,
-            rescaled such that the trust region becomes a hypercube from -1 to 1.
-        y (np.ndarray): Array of shape (n_samples, n_residuals) with function
-            evaluations that have been centered around the function value at the
-            trust region center.
-        model_type (str): Type of the model that is fitted. The following are supported:
-            - "linear": Only linear effects and intercept.
-            - "quadratic": Fully quadratic model.
-
-    Returns:
-        np.ndarray: The model coefficients.
-
-    """
-    features = _build_feature_matrix(x, model_type)
-    features_w, y_w = _add_weighting(features, y, weights)
-
-    n_params = x.shape[1]
-    n_features = features.shape[1]
-
-    factor = np.array(
-        [1 / p_intercept]
-        + [1 / p_linear] * n_params
-        + [1 / p_square] * (n_features - 1 - n_params)
-    )
-
-    coef_raw = _fit_ols(features_w * factor, y_w)
-    coef = coef_raw * factor
-
-    return coef
-
-
-def fit_ridge(
-    x,
-    y,
-    weights,
-    model_type,
-    l2_penalty_linear,
-    l2_penalty_square,
-):
-    """Fit a linear model using Ridge regression.
-
-    Args:
-        x (np.ndarray): Array of shape (n_samples, n_params) of x-values, rescaled such
-            that the trust region becomes a hypercube from -1 to 1.
-        y (np.ndarray): Array of shape (n_samples, n_residuals) with function
-            evaluations that have been centered around the function value at the trust
-            region center.
-        model_type (str): Type of the model that is fitted. The following are supported:
-            - "linear": Only linear effects and intercept.
-            - "quadratic": Fully quadratic model.
-        l2_penalty_linear (float): Penalty that is applied to all linear terms.
-        l2_penalty_square (float): Penalty that is applied to all square terms, that is
-            the quadratic and interaction terms.
-
-    Returns:
-        np.ndarray: The model coefficients.
-
-    """
-    features = _build_feature_matrix(x, model_type)
-
-    features_w, y_w = _add_weighting(features, y, weights)
-
-    # create penalty array
-    n_params = x.shape[1]
-    cutoffs = (1, n_params + 1)
-
-    penalty = np.zeros(features.shape[1])
-    penalty[: cutoffs[0]] = 0
-    penalty[cutoffs[0] : cutoffs[1]] = l2_penalty_linear
-    penalty[cutoffs[1] :] = l2_penalty_square
-
-    coef = _fit_ridge(features_w, y_w, penalty)
-
-    return coef
-
-
-def _fit_ridge(x, y, penalty):
-    """Fit a linear model using ridge regression.
-
-    Args:
-        x (np.ndarray): Array of shape (n, p) of x-values.
-        y (np.ndarray): Array of shape (n, k) of y-values.
-        penalty (np.ndarray): Array of shape (p, ) of penalty values.
-
-    Returns:
-        np.ndarray: Array of shape (p, k) of coefficients.
-
-    """
-    a = x.T @ x
-    b = x.T @ y
-
-    coef, *_ = np.linalg.lstsq(a + np.diag(penalty), b, rcond=None)
-    coef = coef.T
-
-    return coef
-
-
-def fit_powell(x, y, model_type):
-    """Fit a model, switching between penalized and unpenalized fitting.
-
-    For:
-    - n + 1 points: Fit ols with linear feature matrix.
-    - n + 2 <= n + 0.5 * n * (n + 1) points, i.e. until one less than a
-        just identified quadratic model: Fit pounders.
-    - else: Fit ols with quadratic feature matrix.
-
-
-    Args:
-        x (np.ndarray): Array of shape (n_samples, n_params) of x-values,
-            rescaled such that the trust region becomes a hypercube from -1 to 1.
-        y (np.ndarray): Array of shape (n_samples, n_residuals) with function
-            evaluations that have been centered around the function value at the
-            trust region center.
-        model_type (str): Type of the model that is fitted. The following are supported:
-            - "linear": Only linear effects and intercept.
-            - "quadratic": Fully quadratic model.
-
-    Returns:
-        np.ndarray: The model coefficients.
-
-    """
-    n_samples, n_params = x.shape
-
-    _switch_to_linear = n_samples <= n_params + 1
-
-    _n_just_identified = n_params + 1
-    if model_type == "quadratic":
-        _n_just_identified += n_second_order_terms(n_params)
-
-    if _switch_to_linear:
-        coef = fit_ols(x, y, weights=None, model_type="linear")
-        n_resid, n_present = coef.shape
-        padding = np.zeros((n_resid, _n_just_identified - n_present))
-        coef = np.hstack([coef, padding])
-    elif n_samples >= _n_just_identified:
-        coef = fit_ols(x, y, weights=None, model_type=model_type)
-    else:
-        coef = _fit_minimal_frobenius_norm_of_hessian(x, y)
-
-    return coef
-
-
-def _fit_minimal_frobenius_norm_of_hessian(x, y):
-    """Fit a quadraitc model using the powell fitting method.
-
-    The solution represents the quadratic whose Hessian matrix is of
-    minimum Frobenius norm. This has been popularized by Powell and is used in
-    many optimizers, e.g. bobyqa and pounders.
-
-    For a mathematical exposition, see :cite:`Wild2008`, p. 3-5.
-
-    This method should only be called if the number of samples is larger than what
-    is needed to identify the parameters of a linear model but smaller than what
-    is needed to identify the parameters of a quadratic model. Most of the time,
-    the sample size is 2n + 1.
-
-    Args:
-        x (np.ndarray): Array of shape (n_samples, n_params) of x-values,
-            rescaled such that the trust region becomes a hypercube from -1 to 1.
-        y (np.ndarray): Array of shape (n_samples, n_residuals) with function
-            evaluations that have been centered around the function value at the
-            trust region center.
-
-    Returns:
-        np.ndarray: The model coefficients.
-
-    """
-    n_samples, n_params = x.shape
-
-    _n_too_few = n_params + 1
-    _n_too_many = n_params + n_params * (n_params + 1) // 2 + 1
-
-    if n_samples <= _n_too_few:
-        raise ValueError("Too few points for minimum frobenius fitting.")
-    if n_samples >= _n_too_many:
-        raise ValueError("Too may points for minimum frobenius fitting")
-
-    n_poly_features = n_second_order_terms(n_params)
-
-    (
-        m_mat,
-        n_mat,
-        z_mat,
-        n_z_mat,
-    ) = _get_feature_matrices_minimal_frobenius_norm_of_hessian(x)
-
-    coef = _get_current_fit_minimal_frobenius_norm_of_hessian(
-        y=y,
-        m_mat=m_mat,
-        n_mat=n_mat,
-        z_mat=z_mat,
-        n_z_mat=n_z_mat,
-        n_params=n_params,
-        n_poly_features=n_poly_features,
-    )
-
-    return coef
-
-
-def _get_current_fit_minimal_frobenius_norm_of_hessian(
-    y,
-    m_mat,
-    n_mat,
-    z_mat,
-    n_z_mat,
-    n_params,
-    n_poly_features,
-):
-    n_residuals = y.shape[1]
-    offset = 0
-
-    coeffs_linear = np.empty((n_residuals, 1 + n_params))
-    coeffs_square = np.empty((n_residuals, n_poly_features))
-
-    n_z_mat_square = n_z_mat.T @ n_z_mat
-
-    for k in range(n_residuals):
-        z_y_vec = np.dot(z_mat.T, y[:, k])
-        coeffs_first_stage, *_ = np.linalg.lstsq(
-            np.atleast_2d(n_z_mat_square), np.atleast_1d(z_y_vec), rcond=None
-        )
-
-        coeffs_second_stage = np.atleast_2d(n_z_mat) @ coeffs_first_stage
-
-        rhs = y[:, k] - n_mat @ coeffs_second_stage
-
-        alpha, *_ = np.linalg.lstsq(m_mat, rhs[: n_params + 1], rcond=None)
-        coeffs_linear[k, :] = alpha[offset : (n_params + 1)]
-
-        coeffs_square[k] = coeffs_second_stage
-
-    coef = np.concatenate((coeffs_linear, coeffs_square), axis=1)
-
-    return np.atleast_2d(coef)
-
-
-def _get_feature_matrices_minimal_frobenius_norm_of_hessian(x):
-    n_samples, n_params = x.shape
-
-    intercept = np.ones((n_samples, 1))
-    features = np.concatenate((intercept, _quadratic_features(x)), axis=1)
-    m_mat, n_mat = np.split(features, (n_params + 1,), axis=1)
-
-    m_mat_pad = np.zeros((n_samples, n_samples))
-    m_mat_pad[:, : n_params + 1] = m_mat
-
-    n_z_mat, _ = qr_multiply(
-        m_mat_pad,
-        n_mat.T,
-    )
-
-    z_mat, _ = qr_multiply(
-        m_mat_pad,
-        np.eye(n_samples),
-    )
-
-    return (
-        m_mat[: n_params + 1, : n_params + 1],
-        n_mat,
-        z_mat[:, n_params + 1 : n_samples],
-        n_z_mat[:, n_params + 1 : n_samples],
-    )
-
-
-def _build_feature_matrix(x, model_type):
-    raw = x if model_type == "linear" else _quadratic_features(x)
-    intercept = np.ones((len(x), 1))
-    features = np.concatenate((intercept, raw), axis=1)
-    return features
-
-
-def _reshape_square_terms_to_hess(square_terms, n_params, n_residuals):
-    idx1, idx2 = np.triu_indices(n_params)
-    hess = np.zeros((n_residuals, n_params, n_params), dtype=np.float64)
-    hess[:, idx1, idx2] = square_terms
-    hess = hess + np.triu(hess).transpose(0, 2, 1)
-
-    return hess
-
-
-@njit
-def _quadratic_features(x):
-    # Create fully quadratic features without intercept
-    n_samples, n_params = x.shape
-    n_poly_terms = n_second_order_terms(n_params)
-
-    poly_terms = np.empty((n_poly_terms, n_samples), np.float64)
-    xt = x.T
-
-    idx = 0
-    for i in range(n_params):
-        j_start = i
-        for j in range(j_start, n_params):
-            poly_terms[idx] = xt[i] * xt[j]
-            idx += 1
-    out = np.concatenate((xt, poly_terms), axis=0)
-    return out.T
-
-
-def _add_weighting(x, y, weights=None):
-    # weight the data in order to get weighted fitting from fitters that do not support
-    # weights. Inspired by: https://stackoverflow.com/a/52452833
-    n_samples = len(x)
-    if weights is not None:
-        _root_weights = np.sqrt(weights).reshape(n_samples, 1)
-        y = y * _root_weights
-        x = x * _root_weights
-    return x, y
diff --git a/src/estimagic/optimization/tranquilo/geometry.py b/src/estimagic/optimization/tranquilo/geometry.py
deleted file mode 100644
index de1d6aa94..000000000
--- a/src/estimagic/optimization/tranquilo/geometry.py
+++ /dev/null
@@ -1,24 +0,0 @@
-import numpy as np
-
-
-def log_d_quality_calculator(sample, trustregion):
-    """Logarithm of the d-optimality criterion.
-
-    For a data sample x the log_d_criterion is defined as log(det(x.T @ x)). If the
-    determinant is zero the function returns -np.inf. Before computation the sample is
-    mapped into unit space.
-
-    Args:
-        sample (np.ndarray): The data sample, shape = (n, p).
-        trustregion (Region): Trustregion. See module region.py.
-
-    Returns:
-        np.ndarray: The criterion values, shape = (n, ).
-
-    """
-    points = trustregion.map_to_unit(sample)
-    n_samples, n_params = points.shape
-    xtx = points.T @ points
-    det = np.linalg.det(xtx / n_samples)
-    out = n_params * np.log(n_samples) + np.log(det)
-    return out
diff --git a/src/estimagic/optimization/tranquilo/get_component.py b/src/estimagic/optimization/tranquilo/get_component.py
deleted file mode 100644
index 54bdbe850..000000000
--- a/src/estimagic/optimization/tranquilo/get_component.py
+++ /dev/null
@@ -1,231 +0,0 @@
-import functools
-import inspect
-import warnings
-from functools import partial
-
-from estimagic.utilities import propose_alternatives
-from estimagic.optimization.tranquilo.options import update_option_bundle
-
-
-def get_component(
-    name_or_func,
-    component_name,
-    func_dict=None,
-    default_options=None,
-    user_options=None,
-    redundant_option_handling="ignore",
-    redundant_argument_handling="ignore",
-    mandatory_signature=None,
-):
-    """Process a function that represents an interchangeable component of tranquilo.
-
-    The function is either a built in function or a user provided function. In all
-    cases we run some checks that the signature of the function is correct and then
-    partial all static options into the function.
-
-    Args:
-        name_or_func (str or callable): Name of a function or function.
-        component_name (str): Name of the component. Used in error messages. Examples
-            would be "subsolver" or "model".
-        func_dict (dict): Dict with function names as keys and functions as values.
-        default_options (NamedTuple): Default options as a dict or NamedTuple. The
-            default options will be updated by the user options.
-        user_options (NamedTuple, Dict or None): User options as a dict or NamedTuple.
-            The default options will be updated by the user options.
-        redundant_option_handling (str): How to handle redundant options. Can be
-            "warn", "raise" or "ignore". Default "ignore".
-        redundant_argument_handling (str): How to handle redundant arguments passed
-            to the processed function at runtime. Can be "warn", "raise" or "ignore".
-            Default "ignore".
-        mandatory_signature (list): List or tuple of arguments that must be in the
-            signature of all functions in `func_dict`. These can be options or
-            arguments. Otherwise, a ValueError is raised.
-
-    Returns:
-        callable: The processed function.
-
-    """
-
-    _func, _name = _get_function_and_name(
-        name_or_func=name_or_func,
-        component_name=component_name,
-        func_dict=func_dict,
-    )
-
-    _all_arguments = list(inspect.signature(_func).parameters)
-
-    _valid_options = _get_valid_options(
-        default_options=default_options,
-        user_options=user_options,
-        signature=_all_arguments,
-        name=_name,
-        component_name=component_name,
-        redundant_option_handling=redundant_option_handling,
-    )
-
-    _fail_if_mandatory_argument_is_missing(
-        mandatory_arguments=mandatory_signature,
-        signature=_all_arguments,
-        name=_name,
-        component_name=component_name,
-    )
-
-    _partialled = partial(_func, **_valid_options)
-
-    if redundant_argument_handling == "raise":
-        out = _partialled
-    else:
-        out = _add_redundant_argument_handling(
-            func=_partialled,
-            signature=_all_arguments,
-            warn=redundant_argument_handling == "warn",
-        )
-
-    return out
-
-
-def _get_function_and_name(name_or_func, component_name, func_dict):
-    """Get the function and its name.
-
-    Args:
-        name_or_func (str or callable): Name of a function or function.
-        component_name (str): Name of the component. Used in error messages. Examples
-            would be "subsolver" or "model".
-        func_dict (dict): Dict with function names as keys and functions as values.
-
-    Returns:
-        tuple: The function and its name.
-
-    """
-    func_dict = {} if func_dict is None else func_dict
-    if isinstance(name_or_func, str):
-        if name_or_func in func_dict:
-            _func = func_dict[name_or_func]
-            _name = name_or_func
-        else:
-            _proposal = propose_alternatives(name_or_func, list(func_dict))
-            msg = (
-                f"If {component_name} is a string, it must be one of the built in "
-                f"{component_name}s. Did you mean: {_proposal}?"
-            )
-            raise ValueError(msg)
-    elif callable(name_or_func):
-        _func = name_or_func
-        _name = _func.__name__
-    else:
-        raise TypeError("name_or_func must be a string or a callable.")
-
-    return _func, _name
-
-
-def _get_valid_options(
-    default_options,
-    user_options,
-    signature,
-    name,
-    component_name,
-    redundant_option_handling,
-):
-    """Get the options that are valid for the function.
-
-    Args:
-        default_options (NamedTuple): Default options as a dict or NamedTuple. The
-            default options will be updated by the user options.
-        user_options (NamedTuple, Dict or None): User options as a dict or NamedTuple.
-            The default options will be updated by the user options.
-        signature (list): List of arguments that are present in the signature.
-        name (str): Name of the function.
-        component_name (str): Name of the component. Used in error messages. Examples
-            would be "subsolver" or "model".
-        redundant_option_handling (str): How to handle redundant options. Can be
-
-    Returns:
-        dict: Valid options.
-
-    """
-    _options = update_option_bundle(default_options, user_options=user_options)
-    _options = _options._asdict()
-
-    _valid_options = {k: v for k, v in _options.items() if k in signature}
-    _redundant_options = {k: v for k, v in _options.items() if k not in signature}
-
-    if redundant_option_handling == "warn" and _redundant_options:
-        msg = (
-            f"The following options are not supported by the {component_name} {name} "
-            f"and will be ignored: {list(_redundant_options)}."
-        )
-        warnings.warn(msg)
-
-    elif redundant_option_handling == "raise" and _redundant_options:
-        msg = (
-            f"The following options are not supported by the {component_name} {name}: "
-            f"{list(_redundant_options)}."
-        )
-        raise ValueError(msg)
-
-    return _valid_options
-
-
-def _fail_if_mandatory_argument_is_missing(
-    mandatory_arguments, signature, name, component_name
-):
-    """Check if any mandatory arguments are missing in the signature of the function.
-
-    Args:
-        mandatory_arguments (list): List of mandatory arguments.
-        signature (list): List of arguments that are present in the signature.
-        name (str): Name of the function.
-        component_name (str): Name of the component. Used in error messages. Examples
-            would be "subsolver" or "model".
-
-    Returns:
-        None
-
-    Raises:
-        ValueError: If any mandatory arguments are missing in the signature of the
-            function.
-
-    """
-    mandatory_arguments = [] if mandatory_arguments is None else mandatory_arguments
-
-    _missing = [arg for arg in mandatory_arguments if arg not in signature]
-
-    if _missing:
-        msg = (
-            f"The following mandatory arguments are missing in the signature of the "
-            f"{component_name} {name}: {_missing}."
-        )
-        raise ValueError(msg)
-
-
-def _add_redundant_argument_handling(func, signature, warn):
-    """Allow func to be called with arguments that are not in the signature.
-
-    Args:
-        func (callable): The function to be wrapped.
-        signature (list): List of arguments that are supported by func.
-        warn (bool): Whether to warn about redundant arguments.
-
-    Returns:
-        callable: The wrapped function.
-
-    """
-
-    @functools.wraps(func)
-    def _wrapper_add_redundant_argument_handling(*args, **kwargs):
-        _kwargs = {**dict(zip(signature[: len(args)], args)), **kwargs}
-
-        _redundant = {k: v for k, v in _kwargs.items() if k not in signature}
-        _valid = {k: v for k, v in _kwargs.items() if k in signature}
-
-        if warn and _redundant:
-            msg = (
-                f"The following arguments are not supported by the function "
-                f"{func.__name__} and will be ignored: {_redundant}."
-            )
-            warnings.warn(msg)
-
-        out = func(**_valid)
-        return out
-
-    return _wrapper_add_redundant_argument_handling
diff --git a/src/estimagic/optimization/tranquilo/handle_infinity.py b/src/estimagic/optimization/tranquilo/handle_infinity.py
deleted file mode 100644
index c250b64f2..000000000
--- a/src/estimagic/optimization/tranquilo/handle_infinity.py
+++ /dev/null
@@ -1,49 +0,0 @@
-import numpy as np
-
-
-def get_infinity_handler(infinity_handler):
-    if isinstance(infinity_handler, str):
-        built_in_handlers = {"relative": clip_relative}
-        infinity_handler = built_in_handlers[infinity_handler]
-    elif not callable(infinity_handler):
-        raise TypeError("infinity_handler must be a string or callable.")
-
-    return infinity_handler
-
-
-def clip_relative(fvecs):
-    """Clip infinities at a value that is relative to worst finite value.
-
-    Args:
-        fvecs (np.ndarray): 2d numpy array of shape n_samples, n_residuals.
-
-
-    Returns:
-        np.ndarray: Array of same shape as fvecs with finite values.
-
-    """
-    _mask = np.isfinite(fvecs)
-
-    _mins = np.min(fvecs, axis=0, where=_mask, initial=1e300)
-    _maxs = np.max(fvecs, axis=0, where=_mask, initial=-1e300)
-
-    # abs is necessary because if all values are infinite, the diffs can switch sign
-    # due to the initial value in the masked min and max
-    _diff = _maxs - _mins
-
-    # Due to the initial value of the masked min and max, the sign of the diff can
-    # be negative if all values are infinite. In that case we want to switch the
-    # signe of _diff, _mins and _maxs.
-    _signs = np.sign(_diff)
-    _diff *= _signs
-    _maxs *= _signs
-    _mins *= _signs
-
-    _pos_penalty = _maxs + 2 * _diff + 1
-    _neg_penalty = _mins - 2 * _diff - 1
-
-    out = np.nan_to_num(
-        fvecs, nan=_pos_penalty, posinf=_pos_penalty, neginf=_neg_penalty
-    )
-
-    return out
diff --git a/src/estimagic/optimization/tranquilo/history.py b/src/estimagic/optimization/tranquilo/history.py
deleted file mode 100644
index b2144df1f..000000000
--- a/src/estimagic/optimization/tranquilo/history.py
+++ /dev/null
@@ -1,261 +0,0 @@
-import numpy as np
-
-
-class History:
-    """Container to save and retrieve history entries.
-
-    These entries are: xs, fvecs and fvals.
-
-    fvals don't need to be added explicitly, as they are computed internally whenever
-    new entries are added.
-
-    """
-
-    def __init__(self, functype):
-        self.xs = None
-        self.fvecs = None
-        self.fvals = None
-        self.n_xs = 0
-        self.n_fun = 0
-        self.index_mapper = {}
-
-        self.functype = functype
-
-        if functype == "scalar":
-            self.aggregate = lambda x: x.flatten()
-        elif functype == "likelihood":
-            self.aggregate = lambda x: x.sum(axis=-1)
-        elif functype == "least_squares":
-            self.aggregate = lambda x: (x**2).sum(axis=-1)
-        else:
-            raise ValueError(
-                "funtype must be 'scalar', 'likelihood' or 'least_squares'."
-            )
-
-    def add_xs(self, xs):
-        """Add new parameter vectors to the history and return their indices.
-
-        Args:
-            xs (np.ndarray or list): 1d or 2d array or list of 1d arrays with
-                parameter vectors.
-
-        Returns:
-            np.ndarray: 1d array with indices of the added xs.
-
-        """
-        is_single = np.ndim(xs) == 1
-
-        xs = np.atleast_2d(xs)
-
-        n_new_points = len(xs) if xs.size != 0 else 0
-
-        if n_new_points == 0:
-            return []
-
-        self.xs = _add_entries_to_array(self.xs, xs, self.n_xs)
-
-        x_indices = np.arange(self.n_xs, self.n_xs + n_new_points)
-
-        for x_index in x_indices:
-            self.index_mapper[x_index] = []
-
-        self.n_xs += n_new_points
-
-        if is_single:
-            x_indices = x_indices[0]
-
-        return x_indices
-
-    def add_evals(self, x_indices, evals):
-        """Add new function evaluations to the history.
-
-        Args:
-            x_indices (int, list or np.ndarray): Indices of the xs at which the function
-                was evaluated.
-            evals (np.ndarray or list): 1d or 2d array or list of 1d arrays with
-                least square fvecs.
-
-        """
-        x_indices = np.atleast_1d(x_indices)
-
-        if not (x_indices < self.n_xs).all():
-            raise ValueError(
-                "You requested to store a function evaluation for an x vector that is "
-                "not in the history."
-            )
-
-        n_new_points = len(x_indices)
-
-        if n_new_points == 0:
-            return
-
-        if self.functype == "scalar":
-            fvecs = np.reshape(evals, (-1, 1))
-        else:
-            fvecs = np.atleast_2d(evals)
-
-        fvals = np.atleast_1d(self.aggregate(fvecs))
-
-        if n_new_points != len(fvecs):
-            raise ValueError()
-
-        self.fvecs = _add_entries_to_array(self.fvecs, fvecs, self.n_fun)
-        self.fvals = _add_entries_to_array(self.fvals, fvals, self.n_fun)
-
-        f_indices = np.arange(self.n_fun, self.n_fun + n_new_points)
-
-        for x_index, f_index in zip(x_indices, f_indices):
-            self.index_mapper[x_index].append(f_index)
-
-        self.n_fun += n_new_points
-
-    def get_xs(self, x_indices=None):
-        """Retrieve xs from history.
-
-        Args:
-            x_indices (int, slice or sequence): Specifies the subset of rows that will
-                be returned. Anything that can be used to index into a 1d numpy array
-                is allowed.
-
-        Returns:
-            np.ndarray: 1d or 2d array with parameter vectors
-
-        """
-        if isinstance(x_indices, np.ndarray):
-            x_indices = x_indices.astype(int)
-
-        out = self.xs[: self.n_xs]
-        out = out[x_indices] if x_indices is not None else out
-
-        return out
-
-    def get_fvecs(self, x_indices):
-        """Retrieve fvecs from history.
-
-        Args:
-            x_indices (int, slice or sequence): Specifies the subset of rows that will
-                be returned. Anything that can be used to index into a 1d numpy array
-                is allowed.
-
-        Returns:
-            np.ndarray or dict: If x_indices is a scalar, a single array is returned.
-                Otherwise, a dictionary with x_indices as keys and arrays as values is
-                returned.
-
-        """
-        out = _extract_from_indices(
-            arr=self.fvecs[: self.n_fun],
-            mapper=self.index_mapper,
-            x_indices=x_indices,
-            n_xs=self.n_xs,
-        )
-        return out
-
-    def get_fvals(self, x_indices):
-        """Retrieve fvals from history.
-
-        Args:
-            x_indices (int, slice or sequence): Specifies the subset of rows that will
-                be returned. Anything that can be used to index into a 1d numpy array
-                is allowed.
-
-        Returns:
-            np.ndarray or dict: If x_indices is a scalar, a single array is returned.
-                Otherwise, a dictionary with x_indices as keys and arrays as values is
-                returned.
-
-        """
-        out = _extract_from_indices(
-            arr=self.fvals[: self.n_fun],
-            mapper=self.index_mapper,
-            x_indices=x_indices,
-            n_xs=self.n_xs,
-        )
-        return out
-
-    def get_model_data(self, x_indices, average=True):
-        if np.isscalar(x_indices):
-            x_indices = [x_indices]
-
-        raw_xs = self.get_xs(x_indices)
-        raw_fvecs = self.get_fvecs(x_indices)
-
-        if average:
-            fvecs = np.array([np.mean(fvec, axis=0) for fvec in raw_fvecs.values()])
-            xs = raw_xs
-        else:
-            fvecs = np.vstack(list(raw_fvecs.values()))
-            n_obs = np.array([len(fvec) for fvec in raw_fvecs.values()])
-            xs = np.repeat(raw_xs, n_obs, axis=0)
-
-        return xs, fvecs
-
-    def get_n_fun(self):
-        return self.n_fun
-
-    def get_n_xs(self):
-        return self.n_xs
-
-    def get_x_indices_in_region(self, region):
-        # early return if there are no entries
-        if self.get_n_fun() == 0:
-            return np.array([])
-        xs = self.get_xs()
-        mask = np.linalg.norm(xs - region.center, axis=1) <= region.radius
-        out = np.arange(len(mask))[mask]
-        return out
-
-    def __repr__(self):
-        return f"History for {self.functype} function with {self.n_fun} entries."
-
-
-def _add_entries_to_array(arr, new, position):
-    if arr is None:
-        shape = 1_000 if new.ndim == 1 else (1_000, new.shape[1])
-        arr = np.full(shape, np.nan)
-
-    n_new_points = len(new) if new.size != 0 else 0
-
-    if len(arr) - position - n_new_points < 0:
-        n_extend = max(len(arr), n_new_points)
-        if arr.ndim == 2:
-            extension_shape = (n_extend, arr.shape[1])
-            arr = np.vstack([arr, np.full(extension_shape, np.nan)])
-        else:
-            arr = np.hstack([arr, np.full(n_extend, np.nan)])
-
-    arr[position : position + n_new_points] = new
-
-    return arr
-
-
-def _extract_from_indices(arr, mapper, x_indices, n_xs):
-    """Retrieve fvecs or fvals from history.
-
-    Args:
-        arr (np.ndarray): 1d or 2d Array with function values.
-        mapper (dict): Maps x indices to f indices.
-        x_indices (None, int or np.ndarray): Specifies the subset of parameter
-            vectors for which the function values will be returned.
-
-    Returns:
-        dict or np.ndarray: If x_indices is a scalar, a single array is returned.
-            Otherwise, a dictionary with x_indices as keys and arrays as values is
-            returned.
-
-    """
-    if isinstance(x_indices, np.ndarray):
-        x_indices = x_indices.astype(int)
-
-    is_single = np.isscalar(x_indices)
-    if is_single:
-        x_indices = [x_indices]
-
-    indices = np.arange(n_xs)[x_indices].tolist()
-
-    out = {i: arr[mapper[i]] for i in indices}
-
-    if is_single:
-        out = out[x_indices[0]]
-
-    return out
diff --git a/src/estimagic/optimization/tranquilo/models.py b/src/estimagic/optimization/tranquilo/models.py
deleted file mode 100644
index 50b63da80..000000000
--- a/src/estimagic/optimization/tranquilo/models.py
+++ /dev/null
@@ -1,295 +0,0 @@
-from dataclasses import dataclass, replace
-from typing import Union
-
-import numpy as np
-from numba import njit
-
-
-@dataclass(frozen=True)
-class VectorModel:
-    intercepts: np.ndarray  # shape (n_residuals,)
-    linear_terms: np.ndarray  # shape (n_residuals, n_params)
-    square_terms: Union[
-        np.ndarray, None
-    ] = None  # shape (n_residuals, n_params, n_params)
-
-    # scale and shift correspond to effective_radius and effective_center of the region
-    # on which the model was fitted
-    scale: Union[float, np.ndarray] = None
-    shift: np.ndarray = None
-
-    def predict(self, x: np.ndarray) -> np.ndarray:
-        return _predict_vector(self, x)
-
-    # make it behave like a NamedTuple
-    def _replace(self, **kwargs):
-        return replace(self, **kwargs)
-
-
-@dataclass(frozen=True)
-class ScalarModel:
-    intercept: float
-    linear_terms: np.ndarray  # shape (n_params,)
-    square_terms: Union[np.ndarray, None] = None  # shape (n_params, n_params)
-
-    # scale and shift correspond to effective_radius and effective_center of the region
-    # on which the model was fitted
-    scale: Union[float, np.ndarray] = None
-    shift: np.ndarray = None
-
-    def predict(self, x: np.ndarray) -> np.ndarray:
-        return _predict_scalar(self, x)
-
-    # make it behave like a NamedTuple
-    def _replace(self, **kwargs):
-        return replace(self, **kwargs)
-
-
-def _predict_vector(model: VectorModel, x_unit: np.ndarray) -> np.ndarray:
-    """Evaluate a VectorModel at x_unit.
-
-    We utilize that a quadratic model can be written in the form:
-
-    Equation 1:     f(x) = a + x.T @ g + 0.5 * x.T @ H @ x,
-
-    with symmetric H. Note that H = f''(x), while g = f'(x) - H @ x. If we consider a
-    polynomial expansion around x = 0, we therefore get g = f'(x). Hence, g, H can be
-    thought of as the gradient and Hessian. Note that here we consider the case of
-    f(x) being vector-valued. In this case the above equation holds for each entry of
-    f seperately.
-
-    Args:
-        model (VectorModel): The aggregated model. Has entries:
-            - 'intercepts': corresponds to 'a' in the above equation
-            - 'linear_terms': corresponds to 'g' in the above equation
-            - 'square_terms': corresponds to 'H' in the above equation
-        x_unit (np.ndarray): New data. Has shape (n_params,) or (n_samples, n_params).
-
-    Returns:
-        np.ndarray: Model evaluations, has shape (n_samples, n_residuals) if x is 2d
-            and (n_residuals,) if x is 1d.
-
-    """
-    is_flat_x = x_unit.ndim == 1
-
-    x = np.atleast_2d(x_unit)
-
-    y = model.linear_terms @ x.T + model.intercepts.reshape(-1, 1)
-
-    if model.square_terms is not None:
-        y += np.sum((x @ model.square_terms) * x, axis=2) / 2
-
-    if is_flat_x:
-        out = y.flatten()
-    else:
-        out = y.T.reshape(len(x_unit), -1)
-
-    return out
-
-
-def add_models(model1, model2):
-    """Add two models.
-
-    Args:
-        model1 (Union[ScalarModel, VectorModel]): The first model.
-        model2 (Union[ScalarModel, VectorModel]): The second model.
-
-    Returns:
-        Union[ScalarModel, VectorModel]: The sum of the two models.
-
-    """
-    if type(model1) != type(model2):
-        raise TypeError("Models must be of the same type.")
-
-    if not np.allclose(model1.shift, model2.shift):
-        raise ValueError("Models must have the same shift.")
-
-    if not np.allclose(model1.scale, model2.scale):
-        raise ValueError("Models must have the same scale.")
-
-    new = {}
-    if isinstance(model1, ScalarModel):
-        new["intercept"] = model1.intercept + model2.intercept
-    else:
-        new["intercepts"] = model1.intercepts + model2.intercepts
-
-    new["linear_terms"] = model1.linear_terms + model2.linear_terms
-
-    if model1.square_terms is not None:
-        assert model2.square_terms is not None
-        new["square_terms"] = model1.square_terms + model2.square_terms
-
-    out = replace(model1, **new)
-    return out
-
-
-def move_model(model, new_region):
-    """Move a model to a new region.
-
-    Args:
-        model (Union[ScalarModel, VectorModel]): The model to move.
-        new_region (Region): The new region.
-
-    Returns:
-        Union[ScalarModel, VectorModel]: The moved model.
-
-    """
-    # undo old scaling
-    out = _scale_model(model, factor=1 / model.scale)
-
-    # shift the center
-    shift = new_region.effective_center - model.shift
-    if isinstance(model, ScalarModel):
-        out = _shift_scalar_model(out, shift=shift)
-    else:
-        out = _shift_vector_model(out, shift=shift)
-
-    # apply new scaling
-    new_scale = new_region.effective_radius
-    out = _scale_model(out, factor=new_scale)
-    return out
-
-
-def _scale_model(model, factor):
-    """Scale a scalar or vector model to a new radius.
-
-    Args:
-        model (Union[ScalarModel, VectorModel]): The model to scale.
-        factor (Union[float, np.ndarray]): The scaling factor.
-
-    Returns:
-        Union[ScalarModel, VectorModel]: The scaled model.
-
-    """
-    new_g = model.linear_terms * factor
-    new_h = None if model.square_terms is None else model.square_terms * factor**2
-
-    out = model._replace(
-        linear_terms=new_g,
-        square_terms=new_h,
-        scale=model.scale * factor,
-    )
-    return out
-
-
-def _shift_scalar_model(model, shift):
-    """Shift a scalar model to a new center.
-
-    Args:
-        model (ScalarModel): The model to shift.
-        shift (np.ndarray): The shift.
-
-    Returns:
-        ScalarModel: The shifted model.
-
-    """
-    new_c = model.predict(shift)
-    new_g = model.linear_terms + model.square_terms @ shift
-
-    out = model._replace(
-        intercept=new_c,
-        linear_terms=new_g,
-        shift=model.shift + shift,
-    )
-    return out
-
-
-def _shift_vector_model(model, shift):
-    """Shift a vector model to a new center.
-
-    Args:
-        model (VectorModel): The model to shift.
-        shift (np.ndarray): The shift.
-
-    Returns:
-        VectorModel: The shifted model.
-
-    """
-    new_c = model.predict(shift)
-
-    new_g = model.linear_terms
-
-    if model.square_terms is not None:
-        new_g += shift @ model.square_terms
-
-    out = model._replace(
-        intercepts=new_c,
-        linear_terms=new_g,
-        shift=model.shift + shift,
-    )
-    return out
-
-
-def _predict_scalar(model: ScalarModel, x_unit: np.ndarray) -> np.ndarray:
-    """Evaluate a ScalarModel at x_unit.
-
-    We utilize that a quadratic model can be written in the form:
-
-    Equation 1:     f(x) = a + x.T @ g + 0.5 * x.T @ H @ x,
-
-    with symmetric H. Note that H = f''(x), while g = f'(x) - H @ x. If we consider a
-    polynomial expansion around x = 0, we therefore get g = f'(x). Hence, g, H can be
-    thought of as the gradient and Hessian.
-
-    Args:
-        model (ScalarModel): The aggregated model. Has entries:
-            - 'intercept': corresponds to 'a' in the above equation
-            - 'linear_terms': corresponds to 'g' in the above equation
-            - 'square_terms': corresponds to 'H' in the above equation
-        x_unit (np.ndarray): New data. Has shape (n_params,) or (n_samples,
-            n_params).
-
-    Returns:
-        np.ndarray or float: Model evaluations, an array with shape (n_samples,) if x
-            is 2d and a float otherwise.
-
-    """
-    is_flat_x = x_unit.ndim == 1
-
-    x = np.atleast_2d(x_unit)
-
-    y = x @ model.linear_terms + model.intercept
-
-    if model.square_terms is not None:
-        y += np.sum((x @ model.square_terms) * x, axis=1) / 2
-
-    if is_flat_x:
-        out = y.flatten()[0]
-    else:
-        out = y.flatten()
-
-    return out
-
-
-def n_free_params(dim, model_type):
-    """Number of free parameters in a model specified by name or model_info."""
-    out = dim + 1
-    if model_type in ("linear", "quadratic"):
-        if model_type == "quadratic":
-            out += n_second_order_terms(dim)
-    else:
-        raise ValueError()
-    return out
-
-
-@njit
-def n_second_order_terms(dim):
-    """Number of free second order terms in a quadratic model."""
-    return dim * (dim + 1) // 2
-
-
-@njit
-def n_interactions(dim):
-    """Number of free interaction terms in a quadratic model."""
-    return dim * (dim - 1) // 2
-
-
-def is_second_order_model(model_or_info):
-    """Check if a model has any second order terms."""
-    if isinstance(model_or_info, str):
-        out = model_or_info == "quadratic"
-    elif isinstance(model_or_info, (ScalarModel, VectorModel)):
-        out = model_or_info.square_terms is not None
-    else:
-        raise TypeError()
-    return out
diff --git a/src/estimagic/optimization/tranquilo/options.py b/src/estimagic/optimization/tranquilo/options.py
deleted file mode 100644
index 651f19100..000000000
--- a/src/estimagic/optimization/tranquilo/options.py
+++ /dev/null
@@ -1,210 +0,0 @@
-from typing import NamedTuple
-from estimagic.optimization.tranquilo.models import n_free_params
-
-import numpy as np
-
-
-def get_default_radius_options(x):
-    return RadiusOptions(initial_radius=0.1 * np.max(np.abs(x)))
-
-
-def get_default_batch_size(n_cores):
-    return n_cores
-
-
-def get_default_acceptance_decider(noisy):
-    return "noisy" if noisy else "classic"
-
-
-def get_default_sample_size(model_type, x):
-    if model_type == "quadratic":
-        out = 2 * len(x) + 1
-    else:
-        out = len(x) + 1
-
-    return out
-
-
-def get_default_model_fitter(model_type, sample_size, x):
-    n_params = n_free_params(dim=len(x), model_type=model_type)
-    if model_type == "linear" or sample_size >= n_params:
-        fitter = "ols"
-    else:
-        fitter = "tranquilo"
-    return fitter
-
-
-def get_default_residualize(model_fitter):
-    return model_fitter == "tranquilo"
-
-
-def get_default_subsolver(bounds, cube_subsolver, sphere_subsolver):
-    return cube_subsolver if bounds.has_any else sphere_subsolver
-
-
-def get_default_search_radius_factor(functype):
-    return 4.25 if functype == "scalar" else 5.0
-
-
-def get_default_model_type(functype):
-    return "quadratic" if functype == "scalar" else "linear"
-
-
-def get_default_aggregator(functype, model_type):
-    if functype == "scalar" and model_type == "quadratic":
-        aggregator = "identity"
-    elif functype == "least_squares" and model_type == "linear":
-        aggregator = "least_squares_linear"
-    elif functype == "likelihood" and model_type == "linear":
-        aggregator = "information_equality_linear"
-    else:
-        allowed_combinations = {
-            "scalar": "quadratic",
-            "least_squares": "linear",
-            "likelihood": "linear",
-        }
-        raise NotImplementedError(
-            "The requested combination of functype and model_type is not supported. "
-            f"Allowed combinations are: {list(allowed_combinations.items())}."
-        )
-
-    return aggregator
-
-
-def get_default_n_evals_at_start(noisy):
-    return 5 if noisy else 1
-
-
-class StopOptions(NamedTuple):
-    """Criteria for stopping without successful convergence."""
-
-    max_iter: int
-    max_eval: int
-    max_time: float
-
-
-class ConvOptions(NamedTuple):
-    """Criteria for successful convergence."""
-
-    disable: bool
-    ftol_abs: float
-    gtol_abs: float
-    xtol_abs: float
-    ftol_rel: float
-    gtol_rel: float
-    xtol_rel: float
-    min_radius: float
-
-
-class RadiusOptions(NamedTuple):
-    """Options for trust-region radius management."""
-
-    initial_radius: float
-    min_radius: float = 1e-6
-    max_radius: float = 1e6
-    rho_decrease: float = 0.1
-    rho_increase: float = 0.1
-    shrinking_factor: float = 0.5
-    expansion_factor: float = 2.0
-    large_step: float = 0.5
-    max_radius_to_step_ratio: float = np.inf
-
-
-class AcceptanceOptions(NamedTuple):
-    confidence_level: float = 0.8
-    power_level: float = 0.8
-    n_initial: int = 5
-    n_min: int = 5
-    n_max: int = 100
-    min_improvement: float = 0.0
-
-
-class StagnationOptions(NamedTuple):
-    min_relative_step_keep: float = 0.125
-    min_relative_step: float = 0.05
-    sample_increment: int = 1
-    max_trials: int = 1
-    drop: bool = True
-
-
-class SubsolverOptions(NamedTuple):
-    maxiter: int = 20
-    maxiter_gradient_descent: int = 5
-    conjugate_gradient_method: str = "cg"
-    gtol_abs: float = 1e-8
-    gtol_rel: float = 1e-8
-    gtol_scaled: float = 0.0
-    gtol_abs_conjugate_gradient: float = 1e-8
-    gtol_rel_conjugate_gradient: float = 1e-6
-    k_easy: float = 0.1
-    k_hard: float = 0.2
-
-
-class FitterOptions(NamedTuple):
-    l2_penalty_linear: float = 0.0
-    l2_penalty_square: float = 0.1
-    p_intercept: float = 0.05
-    p_linear: float = 0.4
-    p_square: float = 1.0
-
-
-class VarianceEstimatorOptions(NamedTuple):
-    max_distance_factor: float = 3.0
-    min_n_evals: int = 3
-
-
-class FilterOptions(NamedTuple):
-    strictness: float = 1e-10
-    shape: str = "sphere"
-
-
-class SamplerOptions(NamedTuple):
-    distribution: str = None
-    hardness: float = 1
-    algorithm: str = "scipy_lbfgsb"
-    algo_options: dict = None
-    criterion: str = None
-    n_points_randomsearch: int = 1
-    return_info: bool = False
-
-
-def update_option_bundle(default_options, user_options=None):
-    """Update default options with user options.
-
-    The user option is converted to the type of the default option if possible.
-
-    Args:
-        default_options (NamedTuple): Options that behave like a `typing.NamedTuple`,
-            i.e. have _fields as well as _asdict and _replace methods.
-        user_options (NamedTuple, Dict or None): User options as a dict or NamedTuple.
-            The default options will be updated by the user options.
-
-    """
-    if user_options is None:
-        return default_options
-
-    # convert user options to dict
-    if not isinstance(user_options, dict):
-        user_options = user_options._asdict()
-
-    # check that all user options are valid
-    invalid_fields = set(user_options) - set(default_options._fields)
-    if invalid_fields:
-        raise ValueError(
-            f"The following user options are not valid: {invalid_fields}. "
-            f"Valid options are {default_options._fields}."
-        )
-
-    # convert types if possible
-    typed = {}
-    for k, v in user_options.items():
-        target_type = type(getattr(default_options, k))
-        if isinstance(v, target_type):
-            typed[k] = v
-        else:
-            typed[k] = target_type(v)
-
-    # update default options
-    out = default_options._replace(**typed)
-
-    return out
diff --git a/src/estimagic/optimization/tranquilo/poisedness.py b/src/estimagic/optimization/tranquilo/poisedness.py
deleted file mode 100644
index 1359e0ddc..000000000
--- a/src/estimagic/optimization/tranquilo/poisedness.py
+++ /dev/null
@@ -1,211 +0,0 @@
-from functools import partial
-
-import numpy as np
-from scipy.optimize import Bounds, NonlinearConstraint, minimize
-
-
-def get_poisedness_constant(sample, shape="sphere"):
-    """Calculate the lambda poisedness constant of a sample.
-
-    Note that the sample space is a trust-region with center 0 and radius 1.
-    It may be a (hyper-) sphere or cube.
-
-    The implementation is based on :cite:`Conn2009`, Chapters 3 and 4.
-
-    In general, if the sample is lambda-poised with a small lambda, where lambda >=1,
-    the sample is said to have "good" geometry or "spans" the trust-region well.
-    As lambda grows, the system represented by these points becomes increasingly
-    linearly dependent.
-
-    Formal definition:
-    A sample Y is said to be lambda-poised on a region of interest if it is linearly
-    independent and the Lagrange polynomials L(i) of points i through N in Y satisfy:
-
-        lambda >= max_i max_x | L(i) |      (1)
-
-    i.e. for each point i in the sample, we maximize the absolute criterion value
-    of its lagrange polynomial L(i); we then take the maximum over all these
-    criterion values as the lambda constant.
-
-    When we compare different samples on the same trust-region, we are usually
-    interested in keeping the sample with the least lambda, so that (1) holds.
-
-
-    Args:
-        sample (np.ndarry): Array of shape (n_samples, n_params) containing the scaled
-            sample of points that lie within a trust-region with center 0 and radius 1.
-        shape (str): Geometric shape of the sample space. One of "sphere", "cube".
-            Default is "sphere".
-
-    Returns:
-        tuple:
-            - lambda (float): The lambda poisedness constant.
-            - argmax (np.ndarray): 1d array of shape (n_params,) containing the
-                parameter vector that maximizes lambda.
-            - idx_max (int): Index relating to the position of the argmax in the sample.
-
-    """
-    n_params = sample.shape[1]
-    options = _get_minimize_options(shape, n_params)
-
-    center = np.zeros(n_params)
-    lagrange_mat = _lagrange_poly_matrix(sample)
-
-    lambda_ = 0
-    idx_max = None
-
-    for idx, poly in enumerate(lagrange_mat):
-        intercept = poly[0]
-        linear_terms = poly[1 : n_params + 1]
-        _coef_square_terms = poly[n_params + 1 :]
-        square_terms = _reshape_coef_to_square_terms(_coef_square_terms, n_params)
-
-        neg_criterion = partial(
-            _eval_neg_absolute_value,
-            intercept=intercept,
-            linear_terms=linear_terms,
-            square_terms=square_terms,
-        )
-
-        result_max = minimize(fun=neg_criterion, x0=center, **options)
-
-        critval = _eval_absolute_value(
-            result_max.x, intercept, linear_terms, square_terms
-        )
-
-        if critval > lambda_:
-            lambda_ = critval
-            argmax = result_max.x
-            idx_max = idx
-
-    return lambda_, argmax, idx_max
-
-
-def improve_poisedness(sample, shape="sphere", maxiter=5):
-    """Improve the lambda poisedness of the sample.
-
-    The poisedness of the sample is improved in an incremental manner; replacing
-    one point at a time and reducing the upper bound on the absolute value of
-    the Lagrange polynomial.
-
-    The implementation is based on algorithm 6.3 in :cite:`Conn2009`,
-    Chapter 6, p. 95 ff.
-
-    Args:
-        sample (np.ndarry): Array of shape (n_samples, n_params).
-        shape (str): Geometric shape of the sample space. One of "sphere", "cube".
-            Default is "sphere".
-        maxiter (int): Maximum number of replacement iterations. Default is 5.
-
-    Returns:
-        tuple:
-            - sample_improved (np.ndarray): Sample with improved poisedness.
-            - lambdas (list): History of lambdas.
-
-    """
-    sample_improved = sample.copy()
-
-    lambdas = []
-
-    for _ in range(maxiter):
-        lambda_, argmax, idx_max = get_poisedness_constant(
-            sample=sample_improved, shape=shape
-        )
-
-        lambdas += [lambda_]
-        sample_improved[idx_max] = argmax
-
-    return sample_improved, lambdas
-
-
-def _lagrange_poly_matrix(sample):
-    """Construct matrix of lagrange polynomials.
-
-    See :cite:`Conn2009`, Chapter 4.2, p. 60.
-
-    Args:
-        sample (np.ndarry): Array of shape (n_samples, n_params).
-
-    Returns:
-        np.ndarray: Matrix of lagrange polynomials of shape
-            (n_samples, n_params * (n_params + 1) // 2).
-
-    """
-    basis_mat = _scaled_polynomial_features(sample)
-    lagrange_mat = basis_mat @ np.linalg.pinv(basis_mat.T @ basis_mat)
-
-    return lagrange_mat
-
-
-def _scaled_polynomial_features(x):
-    """Construct linear terms, interactions, and scaled square terms.
-
-    The square terms are scaled by 1 / 2.
-
-    Args:
-        x (np.ndarray): Array of shape (n_samples, n_params).
-
-    Returns:
-        np.ndarray: Linear terms, interactions and scaled square terms.
-            Has shape (n_samples, n_params * (n_params + 1) // 2).
-
-    """
-    n_samples, n_params = np.atleast_2d(x).shape
-    n_poly_terms = n_params * (n_params + 1) // 2
-
-    poly_terms = np.empty((n_poly_terms, n_samples), np.float64)
-    xt = x.T
-
-    idx = 0
-    for i in range(n_params):
-        poly_terms[idx] = 0.5 * xt[i] ** 2
-        idx += 1
-
-        for j in range(i + 1, n_params):
-            poly_terms[idx] = xt[i] * xt[j]
-            idx += 1
-
-    intercept = np.ones((1, n_samples), x.dtype)
-    out = np.concatenate((intercept, xt, poly_terms), axis=0)
-
-    return out.T
-
-
-def _reshape_coef_to_square_terms(coef, n_params):
-    """Reshape square coefficients to matrix of square terms."""
-    mat = np.empty((n_params, n_params))
-    idx = -1
-
-    for j in range(n_params):
-        for i in range(j + 1):
-            idx += 1
-            mat[i, j] = coef[idx]
-            mat[j, i] = coef[idx]
-
-    return mat
-
-
-def _get_minimize_options(shape, n_params):
-    """Get the minimizer options."""
-    if shape == "sphere":
-        nonlinear_constraint = NonlinearConstraint(lambda x: np.linalg.norm(x), 0, 1)
-        options = {"method": "trust-constr", "constraints": [nonlinear_constraint]}
-
-    elif shape == "cube":
-        bound_constraints = Bounds(-np.ones(n_params), np.ones(n_params))
-        options = {"method": "trust-constr", "bounds": bound_constraints}
-
-    else:
-        raise ValueError(
-            f"Invalid shape argument: {shape}. Must be one of sphere, cube."
-        )
-
-    return options
-
-
-def _eval_absolute_value(x, intercept, linear_terms, square_terms):
-    return np.abs(intercept + linear_terms.T @ x + 0.5 * x.T @ square_terms @ x)
-
-
-def _eval_neg_absolute_value(x, intercept, linear_terms, square_terms):
-    return -_eval_absolute_value(x, intercept, linear_terms, square_terms)
diff --git a/src/estimagic/optimization/tranquilo/process_arguments.py b/src/estimagic/optimization/tranquilo/process_arguments.py
deleted file mode 100644
index 0c75d8cd9..000000000
--- a/src/estimagic/optimization/tranquilo/process_arguments.py
+++ /dev/null
@@ -1,314 +0,0 @@
-import numpy as np
-
-from estimagic.optimization.algo_options import (
-    CONVERGENCE_RELATIVE_CRITERION_TOLERANCE,
-    CONVERGENCE_RELATIVE_GRADIENT_TOLERANCE,
-)
-from estimagic.optimization.tranquilo.acceptance_decision import get_acceptance_decider
-from estimagic.optimization.tranquilo.aggregate_models import get_aggregator
-from estimagic.optimization.tranquilo.bounds import Bounds
-from estimagic.optimization.tranquilo.estimate_variance import get_variance_estimator
-from estimagic.optimization.tranquilo.filter_points import get_sample_filter
-from estimagic.optimization.tranquilo.fit_models import get_fitter
-from estimagic.optimization.tranquilo.history import History
-from estimagic.optimization.tranquilo.options import (
-    ConvOptions,
-    StagnationOptions,
-    StopOptions,
-    get_default_acceptance_decider,
-    get_default_aggregator,
-    get_default_batch_size,
-    get_default_model_fitter,
-    get_default_residualize,
-    get_default_model_type,
-    get_default_n_evals_at_start,
-    get_default_radius_options,
-    get_default_sample_size,
-    get_default_search_radius_factor,
-    update_option_bundle,
-)
-from estimagic.optimization.tranquilo.region import Region
-from estimagic.optimization.tranquilo.sample_points import get_sampler
-from estimagic.optimization.tranquilo.solve_subproblem import get_subsolver
-from estimagic.optimization.tranquilo.wrap_criterion import get_wrapped_criterion
-
-
-def process_arguments(
-    # functype, will be partialled out
-    functype,
-    # problem description
-    criterion,
-    x,
-    lower_bounds=None,
-    upper_bounds=None,
-    *,
-    # basic options
-    noisy=False,
-    # convergence options
-    disable_convergence=False,
-    convergence_absolute_criterion_tolerance=0.0,
-    convergence_absolute_gradient_tolerance=0.0,
-    convergence_absolute_params_tolerance=0.0,
-    convergence_relative_criterion_tolerance=CONVERGENCE_RELATIVE_CRITERION_TOLERANCE,
-    convergence_relative_gradient_tolerance=CONVERGENCE_RELATIVE_GRADIENT_TOLERANCE,
-    convergence_relative_params_tolerance=1e-8,
-    convergence_min_trust_region_radius=0.0,
-    # stopping options
-    stopping_max_criterion_evaluations=2_000,
-    stopping_max_iterations=200,
-    stopping_max_time=np.inf,
-    # single advanced options
-    batch_evaluator="joblib",
-    n_cores=1,
-    batch_size=None,
-    sample_size=None,
-    model_type=None,
-    search_radius_factor=None,
-    n_evals_per_point=1,
-    n_evals_at_start=None,
-    seed=925408,
-    # bundled advanced options
-    radius_options=None,
-    stagnation_options=None,
-    # component names and related options
-    sampler="optimal_hull",
-    sampler_options=None,
-    sample_filter="keep_all",
-    sample_filter_options=None,
-    model_fitter=None,
-    model_fitter_options=None,
-    cube_subsolver="bntr_fast",
-    sphere_subsolver="gqtpar_fast",
-    subsolver_options=None,
-    acceptance_decider=None,
-    acceptance_decider_options=None,
-    variance_estimator="classic",
-    variance_estimator_options=None,
-    infinity_handler="relative",
-    residualize=None,
-):
-    # process convergence options
-    conv_options = ConvOptions(
-        disable=bool(disable_convergence),
-        ftol_abs=float(convergence_absolute_criterion_tolerance),
-        gtol_abs=float(convergence_absolute_gradient_tolerance),
-        xtol_abs=float(convergence_absolute_params_tolerance),
-        ftol_rel=float(convergence_relative_criterion_tolerance),
-        gtol_rel=float(convergence_relative_gradient_tolerance),
-        xtol_rel=float(convergence_relative_params_tolerance),
-        min_radius=float(convergence_min_trust_region_radius),
-    )
-
-    # process stopping options
-    stop_options = StopOptions(
-        max_iter=int(stopping_max_iterations),
-        max_eval=int(stopping_max_criterion_evaluations),
-        max_time=float(stopping_max_time),
-    )
-
-    # process simple options with static defaults
-    x = _process_x(x)
-    noisy = _process_noisy(noisy)
-    n_cores = _process_n_cores(n_cores)
-    stagnation_options = update_option_bundle(StagnationOptions(), stagnation_options)
-    n_evals_per_point = int(n_evals_per_point)
-    sampling_rng = _process_seed(seed)
-    n_evals_at_start = _process_n_evals_at_start(
-        n_evals_at_start,
-        noisy,
-    )
-
-    # process options that depend on arguments with static defaults
-    search_radius_factor = _process_search_radius_factor(search_radius_factor, functype)
-    batch_size = _process_batch_size(batch_size, n_cores)
-    radius_options = update_option_bundle(get_default_radius_options(x), radius_options)
-    model_type = _process_model_type(model_type, functype)
-    acceptance_decider = _process_acceptance_decider(acceptance_decider, noisy)
-
-    # process options that depend on arguments with dependent defaults
-    target_sample_size = _process_sample_size(
-        sample_size=sample_size,
-        model_type=model_type,
-        x=x,
-    )
-    model_fitter = _process_model_fitter(
-        model_fitter, model_type=model_type, sample_size=target_sample_size, x=x
-    )
-    residualize = _process_residualize(residualize, model_fitter=model_fitter)
-
-    # initialize components
-    history = History(functype=functype)
-    history.add_xs(x)
-    evaluate_criterion = get_wrapped_criterion(
-        criterion=criterion,
-        batch_evaluator=batch_evaluator,
-        n_cores=n_cores,
-        history=history,
-    )
-    _bounds = Bounds(lower_bounds, upper_bounds)
-    trustregion = Region(
-        center=x,
-        radius=radius_options.initial_radius,
-        bounds=_bounds,
-    )
-
-    sample_points = get_sampler(sampler, sampler_options)
-
-    solve_subproblem = get_subsolver(
-        cube_solver=cube_subsolver,
-        sphere_solver=sphere_subsolver,
-        user_options=subsolver_options,
-    )
-
-    filter_points = get_sample_filter(
-        sample_filter=sample_filter,
-        user_options=sample_filter_options,
-    )
-
-    fit_model = get_fitter(
-        fitter=model_fitter,
-        fitter_options=model_fitter_options,
-        model_type=model_type,
-        infinity_handling=infinity_handler,
-        residualize=residualize,
-    )
-
-    aggregate_model = get_aggregator(
-        aggregator=get_default_aggregator(functype=functype, model_type=model_type),
-    )
-
-    estimate_variance = get_variance_estimator(
-        variance_estimator,
-        variance_estimator_options,
-    )
-
-    accept_candidate = get_acceptance_decider(
-        acceptance_decider,
-        acceptance_decider_options,
-    )
-
-    # put everything in a dict
-    out = {
-        "evaluate_criterion": evaluate_criterion,
-        "x": x,
-        "noisy": noisy,
-        "conv_options": conv_options,
-        "stop_options": stop_options,
-        "radius_options": radius_options,
-        "batch_size": batch_size,
-        "target_sample_size": target_sample_size,
-        "stagnation_options": stagnation_options,
-        "search_radius_factor": search_radius_factor,
-        "n_evals_per_point": n_evals_per_point,
-        "n_evals_at_start": n_evals_at_start,
-        "trustregion": trustregion,
-        "sampling_rng": sampling_rng,
-        "history": history,
-        "sample_points": sample_points,
-        "solve_subproblem": solve_subproblem,
-        "filter_points": filter_points,
-        "fit_model": fit_model,
-        "aggregate_model": aggregate_model,
-        "estimate_variance": estimate_variance,
-        "accept_candidate": accept_candidate,
-    }
-
-    return out
-
-
-def _process_x(x):
-    return np.asarray(x, dtype=np.float64)
-
-
-def _process_noisy(noisy):
-    return bool(noisy)
-
-
-def _process_n_cores(n_cores):
-    return int(n_cores)
-
-
-def _process_batch_size(batch_size, n_cores):
-    if batch_size is None:
-        batch_size = get_default_batch_size(n_cores)
-
-    elif batch_size < n_cores:
-        raise ValueError("batch_size must be at least as large as n_cores.")
-
-    return int(batch_size)
-
-
-def _process_sample_size(sample_size, model_type, x):
-    if sample_size is None:
-        out = get_default_sample_size(model_type=model_type, x=x)
-    elif callable(sample_size):
-        out = sample_size(x=x, model_type=model_type)
-    else:
-        out = int(sample_size)
-    return out
-
-
-def _process_model_type(model_type, functype):
-    out = get_default_model_type(functype) if model_type is None else model_type
-
-    if out not in ["linear", "quadratic"]:
-        raise ValueError("model_type must be either 'linear' or 'quadratic'.")
-
-    return out
-
-
-def _process_search_radius_factor(search_radius_factor, functype):
-    if search_radius_factor is None:
-        out = get_default_search_radius_factor(functype)
-    else:
-        out = float(search_radius_factor)
-
-    if out <= 0:
-        raise ValueError("search_radius_factor must be positive.")
-
-    return out
-
-
-def _process_seed(seed):
-    return np.random.default_rng(seed)
-
-
-def _process_acceptance_decider(acceptance_decider, noisy):
-    if acceptance_decider is None:
-        out = get_default_acceptance_decider(noisy)
-    else:
-        out = acceptance_decider
-
-    return out
-
-
-def _process_model_fitter(model_fitter, model_type, sample_size, x):
-    if model_fitter is None:
-        out = get_default_model_fitter(model_type, sample_size=sample_size, x=x)
-    else:
-        out = model_fitter
-
-    return out
-
-
-def _process_residualize(residualize, model_fitter):
-    if residualize is None:
-        out = get_default_residualize(model_fitter)
-    else:
-        if not isinstance(residualize, bool):
-            raise ValueError("residualize must be a boolean.")
-        out = residualize
-
-    return out
-
-
-def _process_n_evals_at_start(n_evals, noisy):
-    if n_evals is None:
-        out = get_default_n_evals_at_start(noisy)
-    else:
-        out = int(n_evals)
-
-    if out < 1:
-        raise ValueError("n_initial_acceptance_evals must be non-negative.")
-
-    return out
diff --git a/src/estimagic/optimization/tranquilo/region.py b/src/estimagic/optimization/tranquilo/region.py
deleted file mode 100644
index 48abe2bcb..000000000
--- a/src/estimagic/optimization/tranquilo/region.py
+++ /dev/null
@@ -1,152 +0,0 @@
-from dataclasses import dataclass, replace
-
-import numpy as np
-
-from estimagic.optimization.tranquilo.bounds import Bounds
-from estimagic.optimization.tranquilo.volume import (
-    get_radius_of_cube_with_volume_of_sphere,
-)
-
-
-@dataclass(frozen=True)
-class Region:
-    """Trust region."""
-
-    center: np.ndarray
-    radius: float
-    bounds: Bounds = None
-
-    def __post_init__(self):
-        shape = _get_shape(self.center, self.radius, self.bounds)
-        cube_bounds = _get_cube_bounds(self.center, self.radius, self.bounds, shape)
-        cube_center = _get_cube_center(cube_bounds)
-        effective_center = _get_effective_center(shape, self.center, cube_center)
-        effective_radius = _get_effective_radius(shape, self.radius, cube_bounds)
-
-        # cannot use standard __setattr__ because it is frozen
-        super().__setattr__("shape", shape)
-        super().__setattr__("_cube_bounds", cube_bounds)
-        super().__setattr__("_cube_center", cube_center)
-        super().__setattr__("effective_center", effective_center)
-        super().__setattr__("effective_radius", effective_radius)
-
-    @property
-    def cube_bounds(self) -> Bounds:
-        if self.shape == "sphere":
-            raise AttributeError(
-                "The trustregion is a sphere, and thus has no cube bounds."
-            )
-        return self._cube_bounds
-
-    @property
-    def cube_center(self) -> np.ndarray:
-        if self.shape == "sphere":
-            raise AttributeError(
-                "The trustregion is a sphere, and thus has no cube center."
-            )
-        return self._cube_center
-
-    def map_to_unit(self, x: np.ndarray) -> np.ndarray:
-        """Map points from the trustregion to the unit sphere or cube."""
-        if self.shape == "sphere":
-            out = _map_to_unit_sphere(x, center=self.center, radius=self.radius)
-        else:
-            out = _map_to_unit_cube(x, cube_bounds=self.cube_bounds)
-        return out
-
-    def map_from_unit(self, x: np.ndarray) -> np.ndarray:
-        """Map points from the unit sphere or cube to the trustregion."""
-        if self.shape == "sphere":
-            out = _map_from_unit_sphere(x, center=self.center, radius=self.radius)
-        else:
-            cube_bounds = self.cube_bounds
-            out = _map_from_unit_cube(x, cube_bounds=cube_bounds)
-            # Bounds may not be satisfied exactly due to numerical inaccuracies.
-            out = np.clip(out, cube_bounds.lower, cube_bounds.upper)
-        return out
-
-    # make it behave like a NamedTuple
-    def _replace(self, **kwargs):
-        return replace(self, **kwargs)
-
-
-def _map_to_unit_cube(x, cube_bounds):
-    """Map points from the trustregion to the unit cube."""
-    out = 2 * (x - cube_bounds.lower) / (cube_bounds.upper - cube_bounds.lower) - 1
-    return out
-
-
-def _map_to_unit_sphere(x, center, radius):
-    """Map points from the trustregion to the unit sphere."""
-    out = (x - center) / radius
-    return out
-
-
-def _map_from_unit_cube(x, cube_bounds):
-    """Map points from the unit cube to the trustregion."""
-    out = (cube_bounds.upper - cube_bounds.lower) * (x + 1) / 2 + cube_bounds.lower
-    return out
-
-
-def _map_from_unit_sphere(x, center, radius):
-    """Map points from the unit sphere to the trustregion."""
-    out = x * radius + center
-    return out
-
-
-def _get_shape(center, radius, bounds):
-    any_bounds_binding = _any_bounds_binding(
-        bounds=bounds, center=center, radius=radius
-    )
-    return "cube" if any_bounds_binding else "sphere"
-
-
-def _get_cube_bounds(center, radius, bounds, shape):
-    if shape == "cube":
-        radius = get_radius_of_cube_with_volume_of_sphere(radius, len(center))
-    cube_bounds = _create_cube_bounds(center=center, radius=radius, bounds=bounds)
-    return cube_bounds
-
-
-def _get_cube_center(cube_bounds):
-    cube_center = (cube_bounds.lower + cube_bounds.upper) / 2
-    return cube_center
-
-
-def _get_effective_center(shape, center, cube_center):
-    effective_center = center if shape == "sphere" else cube_center
-    return effective_center
-
-
-def _get_effective_radius(shape, radius, cube_bounds):
-    if shape == "sphere":
-        effective_radius = radius
-    else:
-        effective_radius = (cube_bounds.upper - cube_bounds.lower) / 2
-    return effective_radius
-
-
-def _create_cube_bounds(center, radius, bounds):
-    """Get new bounds that define the intersection of the trustregion and the bounds."""
-    lower_bounds = center - radius
-    upper_bounds = center + radius
-
-    if bounds is not None and bounds.lower is not None:
-        lower_bounds = np.clip(lower_bounds, bounds.lower, np.inf)
-
-    if bounds is not None and bounds.upper is not None:
-        upper_bounds = np.clip(upper_bounds, -np.inf, bounds.upper)
-
-    return Bounds(lower=lower_bounds, upper=upper_bounds)
-
-
-def _any_bounds_binding(bounds, center, radius):
-    """Check if any bound is binding, i.e. inside the trustregion."""
-    out = False
-    if bounds is not None and bounds.has_any:
-        if bounds.lower is not None:
-            lower_binding = np.min(center - bounds.lower) <= radius
-        if bounds.upper is not None:
-            upper_binding = np.min(bounds.upper - center) <= radius
-        out = np.any(lower_binding) or np.any(upper_binding)
-    return out
diff --git a/src/estimagic/optimization/tranquilo/rho_noise.py b/src/estimagic/optimization/tranquilo/rho_noise.py
deleted file mode 100644
index 9df77335e..000000000
--- a/src/estimagic/optimization/tranquilo/rho_noise.py
+++ /dev/null
@@ -1,87 +0,0 @@
-import numpy as np
-
-from estimagic.optimization.tranquilo.acceptance_decision import calculate_rho
-
-
-def simulate_rho_noise(
-    xs,
-    vector_model,
-    trustregion,
-    noise_cov,
-    model_fitter,
-    model_aggregator,
-    subsolver,
-    rng,
-    n_draws=100,
-    ignore_corelation=True,
-):
-    """Simulate a rho that would obtain on average if there is no approximation error.
-
-    This can be used to adjust the sample size in the presence of noise.
-
-    Throughout this function the prefix true refers to what is considered as ground
-    truth for the purpose of the simulation. The prefix sim refers to the simulated
-    quantities.
-
-    Args:
-        xs (np.ndarray): Sample of points on which surrogate models will be
-            fitted during the simulation. This sample is not scaled to the trustregion.
-        vector_model (VectorModel): A vector surrogate model that is taken as true model
-            for the simulation. In many cases this model was fitted on xs but this is
-            not a requirement.
-        trustregion (Region): The trustregion in which the optimization is performed.
-        noise_cov(np.ndarray): Covariance matrix of the noise. The noise is assumed to
-            be drawn from a multivariate normal distribution with mean zero and this
-            covariance matrix.
-        model_fitter (callable): A function that fits a model.
-        model_aggregator (callable): A function that aggregates a vector model to a
-            scalar model.
-        subsolver (callable): A function that solves the subproblem.
-        rng (np.random.Generator): Random number generator.
-        n_draws (int): Number of draws used to estimate the rho noise.
-        ignore_corelation (bool): If True, the noise is assumed to be uncorrelated and
-            only the diagonal entries of the covariance matrix are used.
-
-    """
-    n_samples, n_params = xs.shape
-    n_residuals = len(noise_cov)
-
-    x_unit = trustregion.map_to_unit(xs)
-
-    true_fvecs = vector_model.predict(x_unit)
-
-    true_scalar_model = model_aggregator(vector_model=vector_model)
-
-    true_current_fval = true_scalar_model.predict(np.zeros(n_params))
-
-    if ignore_corelation:
-        noise_cov = np.diag(np.diag(noise_cov))
-
-    noise = rng.multivariate_normal(
-        mean=np.zeros(n_residuals), cov=noise_cov, size=n_draws * n_samples
-    ).reshape(n_draws, n_samples, n_residuals)
-
-    rhos = []
-    for draw in noise:
-        sim_fvecs = true_fvecs + draw
-        sim_vector_model = model_fitter(
-            xs,
-            sim_fvecs,
-            weights=None,
-            region=trustregion,
-            old_model=None,
-        )
-        sim_scalar_model = model_aggregator(vector_model=sim_vector_model)
-        sim_sub_sol = subsolver(sim_scalar_model, trustregion)
-
-        sim_candidate_fval = true_scalar_model.predict(sim_sub_sol.x_unit)
-        sim_actual_improvement = -(sim_candidate_fval - true_current_fval)
-
-        sim_rho = calculate_rho(
-            actual_improvement=sim_actual_improvement,
-            expected_improvement=sim_sub_sol.expected_improvement,
-        )
-
-        rhos.append(sim_rho)
-
-    return np.array(rhos)
diff --git a/src/estimagic/optimization/tranquilo/sample_points.py b/src/estimagic/optimization/tranquilo/sample_points.py
deleted file mode 100644
index b4d7a500c..000000000
--- a/src/estimagic/optimization/tranquilo/sample_points.py
+++ /dev/null
@@ -1,466 +0,0 @@
-from functools import partial
-
-import numpy as np
-from scipy.spatial.distance import pdist
-from scipy.special import gammainc, logsumexp
-
-import estimagic as em
-from estimagic.optimization.tranquilo.get_component import get_component
-from estimagic.optimization.tranquilo.options import SamplerOptions
-
-
-def get_sampler(sampler, user_options=None):
-    """Get sampling function partialled options.
-
-    Args:
-        sampler (str or callable): Name of a sampling method or sampling function.
-            The arguments of sampling functions need to be: ``trustregion``,
-            ``n_points``, ``rng``, ``existing_xs`` and ``bounds``.
-            Sampling functions need to return a dictionary with the entry "points"
-            (and arbitrary additional information). See ``reference_sampler`` for
-            details.
-        user_options (dict): Additional keyword arguments for the sampler. Options that
-            are not used by the sampler are ignored with a warning. If sampler is
-            'hull_sampler' or 'optimal_hull_sampler' the user options must contain the
-            argument 'order', which is a positive integer.
-
-    Returns:
-        callable: Function that depends on trustregion, n_points, existing_xs and
-            returns a new sample.
-
-    """
-    built_in_samplers = {
-        "random_interior": _interior_sampler,
-        "random_hull": _hull_sampler,
-        "optimal_hull": _optimal_hull_sampler,
-    }
-
-    mandatory_args = [
-        "trustregion",
-        "n_points",
-        "existing_xs",
-        "rng",
-    ]
-
-    out = get_component(
-        name_or_func=sampler,
-        component_name="sampler",
-        func_dict=built_in_samplers,
-        user_options=user_options,
-        default_options=SamplerOptions(),
-        mandatory_signature=mandatory_args,
-    )
-
-    return out
-
-
-def _interior_sampler(
-    trustregion,
-    n_points,
-    rng,
-    existing_xs=None,  # noqa: ARG001
-):
-    """Random generation of trustregion points inside a ball or box.
-
-    Args:
-        trustregion (Region): Trustregion. See module region.py.
-        n_points (int): how many new points to sample
-        rng (numpy.random.Generator): Random number generator.
-        existing_xs (np.ndarray or None): 2d numpy array in which each row is an
-            x vector at which the criterion function has already been evaluated, that
-            satisfies lower_bounds <= existing_xs <= upper_bounds.
-
-    """
-    if trustregion.shape == "sphere":
-        _sampler = _ball_sampler
-    else:
-        _sampler = _box_sampler
-
-    out = _sampler(
-        trustregion=trustregion,
-        n_points=n_points,
-        rng=rng,
-    )
-    return out
-
-
-def _box_sampler(
-    trustregion,
-    n_points,
-    rng,
-):
-    """Naive random generation of trustregion points inside a box.
-
-    Args:
-        trustregion (Region): Trustregion. See module region.py.
-        n_points (int): how many new points to sample
-        rng (numpy.random.Generator): Random number generator.
-        existing_xs (np.ndarray or None): 2d numpy array in which each row is an
-            x vector at which the criterion function has already been evaluated, that
-            satisfies lower_bounds <= existing_xs <= upper_bounds.
-
-    """
-    n_params = len(trustregion.center)
-    bounds = trustregion.cube_bounds
-    points = rng.uniform(
-        low=bounds.lower,
-        high=bounds.upper,
-        size=(n_points, n_params),
-    )
-    return points
-
-
-def _ball_sampler(
-    trustregion,
-    n_points,
-    rng,
-):
-    """Naive random generation of trustregion points inside a ball.
-
-    Code is adapted from https://tinyurl.com/y3p2dz6b.
-
-    Args:
-        trustregion (Region): Trustregion. See module region.py.
-        n_points (int): how many new points to sample
-        rng (numpy.random.Generator): Random number generator.
-        existing_xs (np.ndarray or None): 2d numpy array in which each row is an
-            x vector at which the criterion function has already been evaluated, that
-            satisfies lower_bounds <= existing_xs <= upper_bounds.
-
-    """
-    n_params = len(trustregion.center)
-    raw = rng.normal(size=(n_points, n_params))
-    norm = np.linalg.norm(raw, axis=1, ord=2)
-    scale = gammainc(n_params / 2, norm**2 / 2) ** (1 / n_params) / norm
-    points = raw * scale.reshape(-1, 1)
-    out = trustregion.map_from_unit(points)
-    return out
-
-
-def _hull_sampler(
-    trustregion,
-    n_points,
-    rng,
-    distribution,
-    existing_xs=None,  # noqa: ARG001
-):
-    """Random generation of trustregion points on the hull of general sphere / cube.
-
-    Points are sampled randomly on a hull of a sphere or cube. These points are then
-    mapped into the feasible region, which is defined by the intersection of the
-    trustregion and the bounds.
-
-    Args:
-        trustregion (Region): Trustregion. See module region.py.
-        n_points (int): how many new points to sample
-        rng (numpy.random.Generator): Random number generator.
-        distribution (str): Distribution to use for initial sample before points are
-            projected onto unit hull. Must be in {'normal', 'uniform'}.
-        existing_xs (np.ndarray or None): 2d numpy array in which each row is an
-            x vector at which the criterion function has already been evaluated, that
-            satisfies lower_bounds <= existing_xs <= upper_bounds.
-
-    """
-    n_params = len(trustregion.center)
-
-    if distribution is None:
-        distribution = "normal" if trustregion.shape == "sphere" else "uniform"
-    raw = _draw_from_distribution(distribution, rng=rng, size=(n_points, n_params))
-    points = _project_onto_unit_hull(raw, trustregion_shape=trustregion.shape)
-    out = trustregion.map_from_unit(points)
-    return out
-
-
-def _optimal_hull_sampler(
-    trustregion,
-    n_points,
-    rng,
-    distribution,
-    hardness,
-    algorithm,
-    algo_options,
-    criterion,
-    n_points_randomsearch,
-    return_info,
-    existing_xs=None,
-):
-    """Optimal generation of trustregion points on the hull of general sphere / cube.
-
-    Points are sampled optimally on a hull of a sphere or cube, where the criterion that
-    is maximized is the minimum distance of all pairs of points, except for pairs of
-    existing points. These points are then mapped into the feasible region, which is
-    defined by the intersection of the trustregion and the bounds. Instead of using a
-    hard minimum we return the soft minimum, whose accuracy we govern by the hardness
-    factor. For more information on the soft-minimum, seek:
-    https://tinyurl.com/mrythbk4.
-
-    Args:
-        trustregion (Region): Trustregion. See module region.py.
-        n_points (int): how many new points to sample
-        rng (numpy.random.Generator): Random number generator.
-        distribution (str): Distribution to use for initial sample before points are
-            projected onto unit hull. Must be in {'normal', 'uniform'}.
-        hardness (float): Positive scaling factor. As hardness tends to infinity the
-            soft minimum (logsumexp) approaches the hard minimum. Default is 1. A
-            detailed explanation is given in the docstring.
-        algorithm (str): Optimization algorithm.
-        algo_options (dict): Algorithm specific configuration of the optimization. See
-            :ref:`list_of_algorithms` for supported options of each algorithm. Default
-            sets ``stopping_max_iterations=n_params``.
-        criterion (str or None): "distance", "determinant" or None.
-            - "distance": maximize the minimal distance between points, excluding
-              distances between existing points. This is a fast and relatively simple
-              optimization problem and yields the same points as "determinant" in
-              many circumstances.
-            - "determinant": maximize the determinant of the x'x where x is the matrix
-              of points. This is known as d-optimality in the optimal design literature
-              and as fekete points in the function approximation literature. This
-              criterion has the best theoretical properties but is very hard to
-              optimize. Thus the practical performance can be bad.
-            - None: Use the "determinant" criterion if only one point is added and the
-              "distance" criterion if multiple points are added.
-        n_points_randomsearch (int): Number of random points to from which to select
-            the best in terms of the Fekete criterion before starting the optimization.
-            Default is 1.
-        existing_xs (np.ndarray or None): 2d numpy array in which each row is an
-            x vector at which the criterion function has already been evaluated, that
-            satisfies lower_bounds <= existing_xs <= upper_bounds.
-
-    Returns:
-        - np.ndarray: Generated points. Has shape (n_points, len(trustregion.center)).
-        - dict: Information about the optimization. Only returned if ``return_info`` is
-        True.
-
-    """
-    n_params = len(trustregion.center)
-
-    if n_points <= 0:
-        return np.array([])
-
-    if criterion is None:
-        criterion = "determinant" if n_points == 1 else "distance"
-
-    algo_options = {} if algo_options is None else algo_options
-    if "stopping_max_iterations" not in algo_options:
-        algo_options["stopping_max_iterations"] = 2 * n_params + 5
-
-    if existing_xs is not None:
-        # map existing points into unit space for easier optimization
-
-        existing_xs_unit = trustregion.map_to_unit(existing_xs)
-
-        if criterion == "distance":
-            dist_to_center = np.linalg.norm(existing_xs_unit, axis=1)
-            not_centric = dist_to_center >= 0.1
-            if not_centric.any():
-                existing_xs_unit = existing_xs_unit[not_centric]
-            else:
-                existing_xs_unit = None
-
-    else:
-        existing_xs_unit = None
-
-    # Define criterion functions. "determinant" is the Fekete criterion and "distance"
-    # corresponds to an approximation of the Fekete criterion.
-    criterion_kwargs = {
-        "existing_xs": existing_xs_unit,
-        "trustregion_shape": trustregion.shape,
-        "n_params": n_params,
-    }
-
-    func_dict = {
-        "determinant": partial(_determinant_on_hull, **criterion_kwargs),
-        "distance": partial(
-            _minimal_pairwise_distance_on_hull,
-            **criterion_kwargs,
-            hardness=hardness,
-        ),
-    }
-
-    # Select start params through random search
-    if distribution is None:
-        distribution = "normal" if trustregion.shape == "sphere" else "uniform"
-
-    candidates = _draw_from_distribution(
-        distribution, rng=rng, size=(n_points_randomsearch, n_points, n_params)
-    )
-    candidates = [
-        _project_onto_unit_hull(_x, trustregion_shape=trustregion.shape)
-        for _x in candidates
-    ]
-
-    if n_points_randomsearch == 1:
-        x0 = candidates[0]
-    else:
-        _fekete_criterion = [func_dict["determinant"](_x) for _x in candidates]
-        x0 = candidates[np.argmax(_fekete_criterion)]
-
-    x0 = x0.flatten()  # flatten so that em.maximize uses fast path
-
-    # This would raise an error because there are zero pairs to calculate the
-    # pairwise distance
-    if existing_xs_unit is None and n_points == 1:
-        opt_params = x0
-    else:
-        res = em.maximize(
-            criterion=func_dict[criterion],
-            params=x0,
-            algorithm=algorithm,
-            lower_bounds=-np.ones_like(x0),
-            upper_bounds=np.ones_like(x0),
-            algo_options=algo_options,
-        )
-        opt_params = res.params
-
-    # Make sure the optimal sampling is actually better than the initial one with
-    # respect to the fekete criterion. This could be violated if the surrogate
-    # criterion is not a good approximation or if the optimization fails.
-    start_fekete = func_dict["determinant"](x0)
-    end_fekete = func_dict["determinant"](opt_params)
-
-    if start_fekete >= end_fekete:
-        opt_params = x0
-
-    points = _project_onto_unit_hull(
-        opt_params.reshape(-1, n_params), trustregion_shape=trustregion.shape
-    )
-    points = trustregion.map_from_unit(points)
-
-    # Collect additional information. Mostly used for testing.
-    info = {
-        "start_params": x0,
-        "opt_params": opt_params,
-        "start_fekete": start_fekete,
-        "opt_fekete": end_fekete,
-    }
-
-    out = (points, info) if return_info else points
-    return out
-
-
-# ======================================================================================
-# Helper functions
-# ======================================================================================
-
-
-def _minimal_pairwise_distance_on_hull(
-    x, existing_xs, trustregion_shape, hardness, n_params
-):
-    """Compute minimal pairwise distance of new and existing points.
-
-    Instead of optimizing the distance of points in the feasible trustregion, this
-    criterion function leads to the maximization of the minimum distance of the points
-    in the unit space. These can then be mapped into the feasible trustregion. We do not
-    consider the distances between existing points. Instead of using a hard minimum we
-    return the soft minimum, whose accuracy we govern by the hardness factor. For more
-    information on the soft-minimum, seek: https://tinyurl.com/mrythbk4.
-
-    Args:
-        x (np.ndarray): Flattened 1d array of internal points. Each value is in [-1, 1].
-        existing_xs (np.ndarray or None): 2d numpy array in which each row is an
-            x vector at which the criterion function has already been evaluated, that
-            satisfies -1 <= existing_xs <= 1.
-        trustregion_shape (str): Shape of the trustregion. Either "cube" or "sphere".
-        hardness (float): Positive scaling factor. As hardness tends to infinity the
-            soft minimum (logsumexp) approaches the hard minimum. Default is 1. A
-            detailed explanation is given in the docstring.
-        n_params (int): Dimensionality of the problem.
-
-    Returns:
-        float: The criterion value.
-
-    """
-    x = x.reshape(-1, n_params)
-    x = _project_onto_unit_hull(x, trustregion_shape=trustregion_shape)
-
-    if existing_xs is not None:
-        sample = np.row_stack([x, existing_xs])
-        n_existing_pairs = len(existing_xs) * (len(existing_xs) - 1) // 2
-        slc = slice(0, -n_existing_pairs) if n_existing_pairs else slice(None)
-    else:
-        sample = x
-        slc = slice(None)
-
-    dist = pdist(sample) ** 2
-
-    # drop distances between existing points. They could introduce flat spots.
-    dist = dist[slc]
-
-    # soft minimum
-    crit_value = -logsumexp(-hardness * dist)
-    return crit_value
-
-
-def _determinant_on_hull(x, existing_xs, trustregion_shape, n_params):
-    """Compute d-optimality criterion of new and existing points.
-
-    Instead of optimizing the distance of points in the feasible trustregion, this
-    criterion function leads to the maximization of the minimum distance of the points
-    in the unit space.
-
-    Args:
-        x (np.ndarray): Flattened 1d array of internal points. Each value is in [-1, 1].
-        existing_xs (np.ndarray or None): 2d numpy array in which each row is an
-            x vector at which the criterion function has already been evaluated, that
-            satisfies -1 <= existing_xs <= 1.
-        trustregion_shape (str): Shape of the trustregion. Either "cube" or "sphere".
-        n_params (int): Dimensionality of the problem.
-
-    Returns:
-        float: The criterion value.
-
-    """
-    x = x.reshape(-1, n_params)
-    n_samples = len(x)
-
-    x = _project_onto_unit_hull(x, trustregion_shape=trustregion_shape)
-
-    if existing_xs is not None:
-        sample = np.row_stack([x, existing_xs])
-    else:
-        sample = x
-
-    crit_value = np.linalg.det(sample.T @ sample / n_samples)
-
-    return crit_value
-
-
-def _draw_from_distribution(distribution, rng, size):
-    """Draw points from distribution.
-
-    Args:
-        distribution (str): Distribution to use for initial sample before points are
-            projected onto unit hull. Must be in {'normal', 'uniform'}.
-        rng (np.random.Generator): Random number generator.
-        size (Union[int, tuple[int]]): Output shape.
-
-    Returns:
-        np.ndarray: Randomly drawn points.
-
-    """
-    if distribution == "normal":
-        draw = rng.normal(size=size)
-    elif distribution == "uniform":
-        draw = rng.uniform(-1, 1, size=size)
-    else:
-        raise ValueError(
-            f"distribution is {distribution}, but needs to be in ('normal', 'uniform')."
-        )
-    return draw
-
-
-def _project_onto_unit_hull(x, trustregion_shape):
-    """Project points from the unit space onto the hull of a geometric figure.
-
-    Args:
-        x (np.ndarray): 2d array of points to be projects. Each value is in [-1, 1].
-        trustregion_shape (str): Shape of the trustregion: {'sphere', 'cube'}.
-
-    Returns:
-        np.ndarray: The projected points.
-
-    """
-    order = 2 if trustregion_shape == "sphere" else np.inf
-    norm = np.linalg.norm(x, axis=1, ord=order).reshape(-1, 1)
-    projected = x / norm
-    return projected
diff --git a/src/estimagic/optimization/tranquilo/solve_subproblem.py b/src/estimagic/optimization/tranquilo/solve_subproblem.py
deleted file mode 100644
index 58839a683..000000000
--- a/src/estimagic/optimization/tranquilo/solve_subproblem.py
+++ /dev/null
@@ -1,200 +0,0 @@
-from functools import partial
-from typing import NamedTuple
-
-import numpy as np
-
-from estimagic.optimization.tranquilo.get_component import get_component
-from estimagic.optimization.subsolvers.bntr import (
-    bntr,
-)
-from estimagic.optimization.subsolvers.bntr_fast import (
-    bntr_fast,
-)
-from estimagic.optimization.subsolvers.gqtpar import (
-    gqtpar,
-)
-from estimagic.optimization.subsolvers.gqtpar_fast import gqtpar_fast
-from estimagic.optimization.tranquilo.wrapped_subsolvers import (
-    slsqp_sphere,
-    solve_multistart,
-)
-from estimagic.optimization.tranquilo.options import SubsolverOptions
-
-
-def get_subsolver(sphere_solver, cube_solver, user_options=None):
-    """Get an algorithm-function with partialled options.
-
-    Args:
-        sphere_solver (str or callable): Name of a subproblem solver or a subproblem
-            solver, designed to solve the problem in the unit sphere. The first argument
-            of any subsolver needs to be ``model``. The second argument needs to be
-            ``x_candidate``, an initial guess for the solution in the unit space.
-            Moreover, subsolvers can have any number of additional keyword arguments.
-        cube_solver (str or callable): Name of a subproblem solver or a subproblem
-            solver, designed to solve the problem in the unit box. The first argument
-            of any subsolver needs to be ``model``. The second and third arguments have
-            to be ``lower_bounds`` and ``upper_bounds``. The fourth argument needs to be
-            ``x_candidate``, an initial guess for the solution in the unit space.
-            Moreover, subsolvers can have any number of additional keyword arguments.
-        user_options (dict):
-            Options for the subproblem solver. The following are supported:
-            - maxiter (int): Maximum number of iterations to perform when solving the
-                trust-region subproblem ("bntr" and "gqtpar").
-            - maxiter_gradient_descent (int): Maximum number of gradient descent
-                iterations to perform ("bntr").
-            - conjugate_gradient_method (str): Method for computing the conjugate
-                gradient step ("bntr").
-                Available conjugate gradient methods are:
-                - "cg"
-                - "steihaug_toint"
-                - "trsbox" (default)
-            - gtol_abs (float): Convergence tolerance for the absolute gradient norm
-                in the trust-region subproblem ("bntr").
-            - gtol_rel (float): Convergence tolerance for the relative gradient norm
-                in the trust-region subproblem ("bntr").
-            - gtol_scaled (float): Convergence tolerance for the scaled gradient norm
-                in the trust-region subproblem ("bntr").
-            - gtol_abs_conjugate_gradient (float): Convergence tolerance for the
-                absolute gradient norm in the conjugate gradient step ("bntr").
-            - gtol_rel_conjugate_gradient (float): Convergence tolerance for the
-                relative gradient norm in the conjugate gradient step ("bntr").
-            - k_easy (float): topping criterion for the "easy" case in the trust-region
-                subproblem ("gqtpar").
-            - k_hard (float): Stopping criterion for the "hard" case in the trust-region
-                subproblem ("gqtpar").
-
-    Returns:
-        callable: The subsolver.
-
-    """
-    built_in_sphere_solvers = {
-        "gqtpar": gqtpar,
-        "gqtpar_fast": gqtpar_fast,
-        "slsqp_sphere": slsqp_sphere,
-    }
-
-    built_in_cube_solvers = {
-        "bntr": bntr,
-        "bntr_fast": bntr_fast,
-        "multistart": solve_multistart,
-    }
-
-    _sphere_subsolver = get_component(
-        name_or_func=sphere_solver,
-        component_name="sphere_solver",
-        func_dict=built_in_sphere_solvers,
-        default_options=SubsolverOptions(),
-        user_options=user_options,
-        mandatory_signature=["model", "x_candidate"],
-    )
-
-    _cube_subsolver = get_component(
-        name_or_func=cube_solver,
-        component_name="cube_solver",
-        func_dict=built_in_cube_solvers,
-        default_options=SubsolverOptions(),
-        user_options=user_options,
-        mandatory_signature=["model", "x_candidate", "lower_bounds", "upper_bounds"],
-    )
-
-    solver = partial(
-        _solve_subproblem_template,
-        sphere_solver=_sphere_subsolver,
-        cube_solver=_cube_subsolver,
-    )
-
-    return solver
-
-
-def _solve_subproblem_template(
-    model,
-    trustregion,
-    sphere_solver,
-    cube_solver,
-):
-    """Solve the quadratic subproblem.
-
-    Args:
-        model (ScalarModel): The fitted model of which we want to find the minimum.
-        trustregion (Region): The trustregion on which the model was fitted.
-        sphere_solver (callable): Spherical subproblem solver, designed to solve the
-            problem in the unit sphere. The first argument of any subsolver needs to be
-            ``model``. The second argument needs to be ``x_candidate``, an initial guess
-            for the solution in the unit space. Moreover, subsolvers can have any number
-            of additional keyword arguments.
-        cube_solver (callable): Cubical subproblem solver, designed to solve the problem
-            in the unit box. The first argument of any subsolver needs to be ``model``.
-            The second and third arguments have to be ``lower_bounds`` and
-            ``upper_bounds``. The fourth argument needs to be ``x_candidate``, an
-            initial guess for the solution in the unit space. Moreover, subsolvers can
-            have any number of additional keyword arguments.
-
-
-    Returns:
-        SubproblemResult: Namedtuple with the following entries:
-            - "x" (np.ndarray): The optimal x in terms of the original parameter space.
-            - "expected_improvement" (float): The expected improvement at the solution.
-              The sign has already been flipped, i.e. large means more improvement.
-            - "n_iterations" (int): Number of iterations performed before termination.
-            - "success" (bool): Boolean indicating whether a solution has been found
-              before reaching maxiter.
-            - "x_unit" (np.ndarray): The optimal x in terms of the unit space.
-            - "shape" (str): Whether the trustregion was a sphere or a cube, which in
-              turn determines whether the sphere or cube solver was used.
-
-    """
-    old_x_unit = trustregion.map_to_unit(trustregion.center)
-
-    solver = sphere_solver if trustregion.shape == "sphere" else cube_solver
-
-    raw_result = solver(
-        model=model,
-        x_candidate=old_x_unit,
-        # bounds can be passed to both solvers because the functions returned by
-        # `get_component` ignore redundant arguments.
-        lower_bounds=-np.ones_like(old_x_unit),
-        upper_bounds=np.ones_like(old_x_unit),
-    )
-
-    if trustregion.shape == "cube":
-        raw_result["x"] = np.clip(raw_result["x"], -1.0, 1.0)
-
-    # make sure expected improvement is calculated accurately in case of clipping and
-    # does not depend on whether the subsolver ignores intercepts or not.
-    fval_old = model.predict(old_x_unit)
-    fval_candidate = model.predict(raw_result["x"])
-
-    expected_improvement = -(fval_candidate - fval_old)
-
-    # in case of negative expected improvement, we return the old point
-    if expected_improvement >= 0:
-        success = raw_result["success"]
-        x_unit = raw_result["x"]
-        x = trustregion.map_from_unit(raw_result["x"])
-    else:
-        success = False
-        x_unit = old_x_unit
-        x = trustregion.center
-        expected_improvement = 0.0
-
-    result = SubproblemResult(
-        x=x,
-        expected_improvement=expected_improvement,
-        n_iterations=raw_result["n_iterations"],
-        success=success,
-        x_unit=x_unit,
-        shape=trustregion.shape,
-    )
-
-    return result
-
-
-class SubproblemResult(NamedTuple):
-    """Result of the subproblem solver."""
-
-    x: np.ndarray
-    expected_improvement: float
-    n_iterations: int
-    success: bool
-    x_unit: np.ndarray
-    shape: str
diff --git a/src/estimagic/optimization/tranquilo/tranquilo.py b/src/estimagic/optimization/tranquilo/tranquilo.py
deleted file mode 100644
index 83844b1bf..000000000
--- a/src/estimagic/optimization/tranquilo/tranquilo.py
+++ /dev/null
@@ -1,467 +0,0 @@
-import functools
-from functools import partial
-from typing import NamedTuple
-
-import numpy as np
-
-from estimagic.decorators import mark_minimizer
-from estimagic.optimization.tranquilo.adjust_radius import adjust_radius
-from estimagic.optimization.tranquilo.filter_points import (
-    drop_worst_points,
-)
-from estimagic.optimization.tranquilo.models import (
-    ScalarModel,
-    VectorModel,
-)
-from estimagic.optimization.tranquilo.process_arguments import process_arguments
-from estimagic.optimization.tranquilo.region import Region
-
-
-# wrapping gives us the signature and docstring of process arguments
-@functools.wraps(process_arguments)
-def _tranquilo(*args, **kwargs):
-    internal_kwargs = process_arguments(*args, **kwargs)
-    return _internal_tranquilo(**internal_kwargs)
-
-
-def _internal_tranquilo(
-    evaluate_criterion,
-    x,
-    noisy,
-    conv_options,
-    stop_options,
-    radius_options,
-    batch_size,
-    target_sample_size,
-    stagnation_options,
-    search_radius_factor,
-    n_evals_per_point,
-    n_evals_at_start,
-    trustregion,
-    sampling_rng,
-    history,
-    sample_points,
-    solve_subproblem,
-    filter_points,
-    fit_model,
-    aggregate_model,
-    estimate_variance,
-    accept_candidate,
-):
-    eval_info = {0: n_evals_at_start}
-    evaluate_criterion(eval_info)
-
-    _init_fvec = history.get_fvecs(0).mean(axis=0)
-
-    _init_vector_model = VectorModel(
-        intercepts=_init_fvec,
-        linear_terms=np.zeros((len(_init_fvec), len(x))),
-        square_terms=np.zeros((len(_init_fvec), len(x), len(x))),
-        shift=trustregion.center,
-        scale=trustregion.radius,
-    )
-
-    _init_model = aggregate_model(_init_vector_model)
-
-    state = State(
-        trustregion=trustregion,
-        model_indices=[0],
-        model=_init_model,
-        vector_model=_init_vector_model,
-        index=0,
-        x=x,
-        fval=np.mean(history.get_fvals(0)),
-        rho=np.nan,
-        accepted=True,
-        new_indices=[0],
-        old_indices_discarded=[],
-        old_indices_used=[],
-        candidate_index=0,
-        candidate_x=x,
-    )
-
-    states = [state]
-
-    # ==================================================================================
-    # main optimization loop
-    # ==================================================================================
-    converged, msg = False, None
-    for _ in range(stop_options.max_iter):
-        # ==============================================================================
-        # find, filter and count points
-        # ==============================================================================
-
-        search_region = state.trustregion._replace(
-            radius=search_radius_factor * state.trustregion.radius
-        )
-
-        old_indices = history.get_x_indices_in_region(search_region)
-
-        old_xs = history.get_xs(old_indices)
-
-        model_xs, model_indices = filter_points(
-            xs=old_xs,
-            indices=old_indices,
-            state=state,
-            target_size=target_sample_size,
-        )
-
-        # ==========================================================================
-        # sample points if necessary and do simple iteration
-        # ==========================================================================
-        new_xs = sample_points(
-            trustregion=state.trustregion,
-            n_points=max(0, target_sample_size - len(model_xs)),
-            existing_xs=model_xs,
-            rng=sampling_rng,
-        )
-
-        new_indices = history.add_xs(new_xs)
-
-        eval_info = {i: n_evals_per_point for i in new_indices}
-
-        evaluate_criterion(eval_info)
-
-        model_indices = _concatenate_indices(model_indices, new_indices)
-
-        model_xs = history.get_xs(model_indices)
-        model_data = history.get_model_data(
-            x_indices=model_indices,
-            average=True,
-        )
-
-        vector_model = fit_model(
-            *model_data,
-            region=state.trustregion,
-            old_model=state.vector_model,
-            weights=None,
-        )
-
-        scalar_model = aggregate_model(
-            vector_model=vector_model,
-        )
-
-        sub_sol = solve_subproblem(model=scalar_model, trustregion=state.trustregion)
-
-        _relative_step_length = (
-            np.linalg.norm(sub_sol.x - state.x) / state.trustregion.radius
-        )
-
-        # ==========================================================================
-        # If we have enough points, drop points until the relative step length
-        # becomes large enough
-        # ==========================================================================
-
-        if len(model_xs) > target_sample_size:
-            while (
-                _relative_step_length < stagnation_options.min_relative_step_keep
-                and len(model_xs) > target_sample_size
-            ):
-                model_xs, model_indices = drop_worst_points(
-                    xs=model_xs,
-                    indices=model_indices,
-                    state=state,
-                    n_to_drop=1,
-                )
-
-                model_data = history.get_model_data(
-                    x_indices=model_indices,
-                    average=True,
-                )
-
-                vector_model = fit_model(
-                    *model_data,
-                    region=state.trustregion,
-                    old_model=state.vector_model,
-                    weights=None,
-                )
-
-                scalar_model = aggregate_model(
-                    vector_model=vector_model,
-                )
-
-                sub_sol = solve_subproblem(
-                    model=scalar_model, trustregion=state.trustregion
-                )
-
-                _relative_step_length = (
-                    np.linalg.norm(sub_sol.x - state.x) / state.trustregion.radius
-                )
-
-        # ==========================================================================
-        # If step length is still too small, replace the worst point with a new one
-        # ==========================================================================
-
-        sample_counter = 0
-        while _relative_step_length < stagnation_options.min_relative_step:
-            if stagnation_options.drop:
-                model_xs, model_indices = drop_worst_points(
-                    xs=model_xs,
-                    indices=model_indices,
-                    state=state,
-                    n_to_drop=stagnation_options.sample_increment,
-                )
-
-            new_xs = sample_points(
-                trustregion=state.trustregion,
-                n_points=stagnation_options.sample_increment,
-                existing_xs=model_xs,
-                rng=sampling_rng,
-            )
-
-            new_indices = history.add_xs(new_xs)
-
-            eval_info = {i: n_evals_per_point for i in new_indices}
-
-            evaluate_criterion(eval_info)
-
-            model_indices = _concatenate_indices(model_indices, new_indices)
-            model_xs = history.get_xs(model_indices)
-            model_data = history.get_model_data(
-                x_indices=model_indices,
-                average=True,
-            )
-
-            vector_model = fit_model(
-                *model_data,
-                region=state.trustregion,
-                old_model=state.vector_model,
-                weights=None,
-            )
-
-            scalar_model = aggregate_model(
-                vector_model=vector_model,
-            )
-
-            sub_sol = solve_subproblem(
-                model=scalar_model, trustregion=state.trustregion
-            )
-
-            _relative_step_length = (
-                np.linalg.norm(sub_sol.x - state.x) / state.trustregion.radius
-            )
-
-            sample_counter += 1
-            if sample_counter >= stagnation_options.max_trials:
-                break
-
-        # ==============================================================================
-        # fit noise model based on previous acceptance samples
-        # ==============================================================================
-
-        if noisy:
-            scalar_noise_variance = estimate_variance(
-                trustregion=state.trustregion,
-                history=history,
-                model_type="scalar",
-            )
-        else:
-            scalar_noise_variance = None
-
-        # ==============================================================================
-        # acceptance decision
-        # ==============================================================================
-
-        acceptance_result = accept_candidate(
-            subproblem_solution=sub_sol,
-            state=state,
-            wrapped_criterion=evaluate_criterion,
-            noise_variance=scalar_noise_variance,
-            history=history,
-        )
-
-        # ==============================================================================
-        # update state with information on this iteration
-        # ==============================================================================
-
-        state = state._replace(
-            model_indices=model_indices,
-            model=scalar_model,
-            new_indices=np.setdiff1d(model_indices, old_indices),
-            old_indices_used=np.intersect1d(model_indices, old_indices),
-            old_indices_discarded=np.setdiff1d(old_indices, model_indices),
-            **acceptance_result._asdict(),
-        )
-
-        states.append(state)
-
-        # ==============================================================================
-        # update state for beginning of next iteration
-        # ==============================================================================
-
-        new_radius = adjust_radius(
-            radius=state.trustregion.radius,
-            rho=acceptance_result.rho,
-            step_length=acceptance_result.step_length,
-            options=radius_options,
-        )
-
-        new_trustregion = state.trustregion._replace(
-            center=acceptance_result.x, radius=new_radius
-        )
-
-        state = state._replace(trustregion=new_trustregion)
-
-        # ==============================================================================
-        # convergence check
-        # ==============================================================================
-
-        if acceptance_result.accepted and not conv_options.disable:
-            converged, msg = _is_converged(states=states, options=conv_options)
-            if converged:
-                break
-
-        if history.get_n_fun() >= stop_options.max_eval:
-            converged = False
-            msg = "Maximum number of criterion evaluations reached."
-            break
-
-    # ==================================================================================
-    # results processing
-    # ==================================================================================
-    res = {
-        "solution_x": state.x,
-        "solution_criterion": state.fval,
-        "states": states,
-        "message": msg,
-        "tranquilo_history": history,
-    }
-
-    return res
-
-
-class State(NamedTuple):
-    trustregion: Region
-    """The trustregion at the beginning of the iteration."""
-
-    # Information about the model used to make the acceptance decision in the iteration
-    model_indices: np.ndarray
-    """The indices of points used to build the current surrogate model `State.model`.
-
-    The points can be retrieved through calling `history.get_xs(model_indices)`.
-
-    """
-
-    model: ScalarModel
-    """The current surrogate model.
-
-    The solution to the subproblem with this model as the criterion is stored as
-    `State.candidate_x`.
-
-    """
-
-    vector_model: VectorModel
-
-    # candidate information
-    candidate_index: int
-    """The index of the candidate point in the history.
-
-    This corresponds to the index of the point in the history that solved the
-    subproblem.
-
-    """
-
-    candidate_x: np.ndarray
-    """The candidate point.
-
-    Is the same as `history.get_xs(candidate_index)`.
-
-    """
-
-    # accepted parameters and function values at the end of the iteration
-    index: int
-    """The index of the accepted point in the history."""
-
-    x: np.ndarray
-    """The accepted point.
-
-    Is the same as  `history.get_xs(index)`.
-
-    """
-
-    fval: np.ndarray  # this is an estimate for noisy functions
-    """The function value at the accepted point.
-
-    If `noisy=False` this is the same as `history.get_fval(index)`. Otherwise, this is
-    an average.
-
-    """
-
-    # success information
-    rho: float
-    """The calculated rho in the current iteration."""
-
-    accepted: bool
-    """Whether the candidate point was accepted."""
-
-    # information on existing and new points
-    new_indices: np.ndarray
-    """The indices of new points generated for the model fitting in this iteration."""
-
-    old_indices_used: np.ndarray
-    """The indices of existing points used to build the model in this iteration."""
-
-    old_indices_discarded: np.ndarray
-    """The indices of existing points not used to build the model in this iteration."""
-
-    # information on step length
-    step_length: float = None
-    """The euclidian distance between `State.x` and `State.trustregion.center`."""
-
-    relative_step_length: float = None
-    """The step_length divided by the radius of the trustregion."""
-
-
-def _is_converged(states, options):
-    old, new = states[-2:]
-
-    f_change_abs = np.abs(old.fval - new.fval)
-    f_change_rel = f_change_abs / max(np.abs(old.fval), 1)
-    x_change_abs = np.linalg.norm(old.x - new.x)
-    x_change_rel = np.linalg.norm((old.x - new.x) / np.clip(np.abs(old.x), 1, np.inf))
-    g_norm_abs = np.linalg.norm(new.model.linear_terms)
-    g_norm_rel = g_norm_abs / max(g_norm_abs, 1)
-
-    converged = True
-    if g_norm_rel <= options.gtol_rel:
-        msg = "Relative gradient norm smaller than tolerance."
-    elif g_norm_abs <= options.gtol_abs:
-        msg = "Absolute gradient norm smaller than tolerance."
-    elif f_change_rel <= options.ftol_rel:
-        msg = "Relative criterion change smaller than tolerance."
-    elif f_change_abs <= options.ftol_abs:
-        msg = "Absolute criterion change smaller than tolerance."
-    elif x_change_rel <= options.xtol_rel:
-        msg = "Relative params change smaller than tolerance."
-    elif x_change_abs <= options.xtol_abs:
-        msg = "Absolute params change smaller than tolerance."
-    else:
-        converged = False
-        msg = None
-
-    return converged, msg
-
-
-tranquilo = mark_minimizer(
-    func=partial(_tranquilo, functype="scalar"),
-    name="tranquilo",
-    primary_criterion_entry="value",
-    needs_scaling=True,
-    is_available=True,
-    is_global=False,
-)
-
-tranquilo_ls = mark_minimizer(
-    func=partial(_tranquilo, functype="least_squares"),
-    primary_criterion_entry="root_contributions",
-    name="tranquilo_ls",
-    needs_scaling=True,
-    is_available=True,
-    is_global=False,
-)
-
-
-def _concatenate_indices(first, second):
-    first = np.atleast_1d(first).astype(int)
-    second = np.atleast_1d(second).astype(int)
-    return np.hstack((first, second))
diff --git a/src/estimagic/optimization/tranquilo/volume.py b/src/estimagic/optimization/tranquilo/volume.py
deleted file mode 100644
index 1c092f84a..000000000
--- a/src/estimagic/optimization/tranquilo/volume.py
+++ /dev/null
@@ -1,81 +0,0 @@
-"""Functions to calculate volumes of hyperspheres and hypercubes.
-
-Hypercubes can be seen as hyperspheres when the distance from the center is calculated
-in an infinity norm rather than a euclidean norm.
-
-This is why we caracterize hypercubes by their radius (half the side length).
-
-"""
-import numpy as np
-from scipy.special import gamma, loggamma
-
-
-def get_radius_after_volume_scaling(radius, dim, scaling_factor):
-    out = radius * scaling_factor ** (1 / dim)
-    return out
-
-
-def get_radius_of_sphere_with_volume_of_cube(cube_radius, dim, scaling_factor=None):
-    log_radius = (
-        loggamma(dim / 2 + 1) / dim
-        - np.log(np.pi) / 2
-        + np.log(2)
-        + np.log(cube_radius)
-    )
-    if scaling_factor is not None:
-        log_radius += np.log(scaling_factor) / dim
-    out = np.exp(log_radius)
-    return out
-
-
-def get_radius_of_cube_with_volume_of_sphere(sphere_radius, dim, scaling_factor=None):
-    log_radius = (
-        np.log(np.pi) / 2
-        + np.log(sphere_radius)
-        - np.log(2)
-        - loggamma(dim / 2 + 1) / dim
-    )
-    if scaling_factor is not None:
-        log_radius += np.log(scaling_factor) / dim
-    out = np.exp(log_radius)
-    return out
-
-
-def get_volume(radius, dim, shape):
-    if shape == "sphere":
-        out = _sphere_volume(radius, dim)
-    elif shape == "cube":
-        out = _cube_volume(radius, dim)
-    else:
-        raise ValueError(f"shape must be 'shpere' or 'cube', not: {shape}")
-    return out
-
-
-def get_radius(volume, dim, shape):
-    if shape == "sphere":
-        out = _sphere_radius(volume, dim)
-    elif shape == "cube":
-        out = _cube_radius(volume, dim)
-    else:
-        raise ValueError(f"shape must be 'shpere' or 'cube', not: {shape}")
-    return out
-
-
-def _sphere_volume(radius, dim):
-    vol = np.pi ** (dim / 2) * radius**dim / gamma(dim / 2 + 1)
-    return vol
-
-
-def _cube_volume(radius, dim):
-    vol = (radius * 2) ** dim
-    return vol
-
-
-def _sphere_radius(volume, dim):
-    radius = ((volume * gamma(dim / 2 + 1)) / (np.pi ** (dim / 2))) ** (1 / dim)
-    return radius
-
-
-def _cube_radius(volume, dim):
-    radius = 0.5 * volume ** (1 / dim)
-    return radius
diff --git a/src/estimagic/optimization/tranquilo/weighting.py b/src/estimagic/optimization/tranquilo/weighting.py
deleted file mode 100644
index 1f655ff83..000000000
--- a/src/estimagic/optimization/tranquilo/weighting.py
+++ /dev/null
@@ -1,27 +0,0 @@
-from functools import partial
-
-
-def get_sample_weighter(weighter, bounds):
-    """Get a function that calculates weights for points in a sample.
-
-    The resulting function takes the following arguments:
-    - xs (np.ndarray): A 2d numpy array containing a sample.
-    - trustregion (Region): Trustregion. See module region.py.
-
-    Args:
-        weighter (str)
-        bounds (Bounds)
-
-    """
-    if isinstance(weighter, str):
-        built_in_weighters = {"no_weights": no_weights}
-        weighter = built_in_weighters[weighter]
-    elif not callable(weighter):
-        raise TypeError("weighter must be a string or callable.")
-
-    out = partial(weighter, bounds=bounds)
-    return out
-
-
-def no_weights(xs, trustregion, bounds):  # noqa: ARG001
-    return None
diff --git a/src/estimagic/optimization/tranquilo/wrap_criterion.py b/src/estimagic/optimization/tranquilo/wrap_criterion.py
deleted file mode 100644
index 848571382..000000000
--- a/src/estimagic/optimization/tranquilo/wrap_criterion.py
+++ /dev/null
@@ -1,56 +0,0 @@
-import functools
-
-import numpy as np
-
-from estimagic.batch_evaluators import process_batch_evaluator
-
-
-def get_wrapped_criterion(criterion, batch_evaluator, n_cores, history):
-    """Wrap the criterion function to do get parallelization and history handling.
-
-    The wrapped criterion function takes a dict mapping x_indices to required numbers of
-    evaluations as only argument. It evaluates the criterion function in parallel and
-    saves the resulting function evaluations in the history.
-
-    The wrapped criterion function does not return anything.
-
-    """
-    batch_evaluator = process_batch_evaluator(batch_evaluator)
-
-    @functools.wraps(criterion)
-    def wrapper_criterion(eval_info):
-        if not isinstance(eval_info, dict):
-            raise ValueError("eval_info must be a dict.")
-
-        if len(eval_info) == 0:
-            return
-
-        x_indices = list(eval_info)
-        repetitions = list(eval_info.values())
-
-        xs = history.get_xs(x_indices)
-        xs = np.repeat(xs, repetitions, axis=0)
-
-        arguments = list(xs)
-
-        effective_n_cores = min(n_cores, len(arguments))
-
-        raw_evals = batch_evaluator(
-            criterion,
-            arguments=arguments,
-            n_cores=effective_n_cores,
-        )
-
-        # replace NaNs but keep infinite values. NaNs would be problematic in many
-        # places, infs are only a problem in model fitting and will be handled there
-        clipped_evals = [
-            np.nan_to_num(critval, nan=np.inf, posinf=np.inf, neginf=-np.inf)
-            for critval in raw_evals
-        ]
-
-        history.add_evals(
-            x_indices=np.repeat(x_indices, repetitions),
-            evals=clipped_evals,
-        )
-
-    return wrapper_criterion
diff --git a/src/estimagic/optimization/tranquilo/wrapped_subsolvers.py b/src/estimagic/optimization/tranquilo/wrapped_subsolvers.py
deleted file mode 100644
index b025bccd5..000000000
--- a/src/estimagic/optimization/tranquilo/wrapped_subsolvers.py
+++ /dev/null
@@ -1,94 +0,0 @@
-from functools import partial
-
-import numpy as np
-from scipy.optimize import Bounds, NonlinearConstraint, minimize
-
-from estimagic.optimization.tiktak import draw_exploration_sample
-
-
-def solve_multistart(model, x_candidate, lower_bounds, upper_bounds):
-    np.random.seed(12345)
-    start_values = draw_exploration_sample(
-        x=x_candidate,
-        lower=lower_bounds,
-        upper=upper_bounds,
-        n_samples=100,
-        sampling_distribution="uniform",
-        sampling_method="sobol",
-        seed=1234,
-    )
-
-    def crit(x):
-        return model.predict(x)
-
-    bounds = Bounds(lower_bounds, upper_bounds)
-
-    best_crit = np.inf
-    accepted_x = None
-    critvals = []
-    for x in start_values:
-        res = minimize(
-            crit,
-            x,
-            method="L-BFGS-B",
-            bounds=bounds,
-        )
-        if res.fun <= best_crit:
-            accepted_x = res.x
-        critvals.append(res.fun)
-
-    return {
-        "x": accepted_x,
-        "std": np.std(critvals),
-        "n_iterations": None,
-        "success": None,
-    }
-
-
-def slsqp_sphere(model, x_candidate):
-    crit, grad = get_crit_and_grad(model)
-    constraints = get_constraints()
-
-    res = minimize(
-        crit,
-        x_candidate,
-        method="slsqp",
-        jac=grad,
-        constraints=constraints,
-    )
-
-    return {
-        "x": res.x,
-        "success": res.success,
-        "n_iterations": res.nit,
-    }
-
-
-def get_crit_and_grad(model):
-    def _crit(x, c, g, h):
-        return c + x @ g + 0.5 * x @ h @ x
-
-    def _grad(x, g, h):
-        return g + x @ h
-
-    crit = partial(_crit, c=model.intercept, g=model.linear_terms, h=model.square_terms)
-    grad = partial(_grad, g=model.linear_terms, h=model.square_terms)
-
-    return crit, grad
-
-
-def get_constraints():
-    def _constr_fun(x):
-        return x @ x
-
-    def _constr_jac(x):
-        return 2 * x
-
-    constr = NonlinearConstraint(
-        fun=_constr_fun,
-        lb=-np.inf,
-        ub=1,
-        jac=_constr_jac,
-    )
-
-    return (constr,)
diff --git a/src/estimagic/visualization/visualize_tranquilo.py b/src/estimagic/visualization/visualize_tranquilo.py
deleted file mode 100644
index 9b1a8f1c7..000000000
--- a/src/estimagic/visualization/visualize_tranquilo.py
+++ /dev/null
@@ -1,590 +0,0 @@
-from copy import deepcopy
-
-import numpy as np
-import pandas as pd
-import plotly.express as px
-from numba import njit
-from plotly import figure_factory as ff
-from plotly import graph_objects as go
-from plotly.subplots import make_subplots
-
-from estimagic.optimization.optimize_result import OptimizeResult
-from estimagic.optimization.tranquilo.clustering import cluster
-from estimagic.optimization.tranquilo.geometry import log_d_quality_calculator
-from estimagic.optimization.tranquilo.volume import get_radius_after_volume_scaling
-
-
-def visualize_tranquilo(results, iterations):
-    """Plot diagnostic information of optimization result in given iteration(s).
-
-    Generates plots with sample points (trustregion or heatmap), criterion evaluations,
-    trustregion radii and other diagnostic information to compare different algorithms
-    at an iteration or different iterations for a given algorithm.
-
-    Currently works for the following algorithms: `tranquilo`, `tranquilo_ls`,
-    `nag_pybobyqa` and `nag_dfols`.
-
-    Args:
-        results (dict or OptimizeResult): An estimagic optimization result or a
-            dictionary with different estimagic optimization results.
-        iterations (int, list, tuple or dict): The iterations to compare the results
-            at. Can be an integer if we want to compare different results at the same
-            iteration, a list or tuple if we want to compare different iterations of
-            the same optimization result, or dictionary with the same keys as results
-            and with integer values if we want to compare different iterations of
-            different results.
-    Returns:
-        fig (plotly.Figure): Plotly figure that combines the following plots:
-            - sample points: plot with model points at current iteration and the
-                trust region, if number of parameters is not larger than 2, or
-                a heatmap of (absolute) correlations of sample points for higher
-                dimensional parameter spaces.
-            - distance plot: L2 and infinity norm-distances of model points from
-                the trustregion center.
-            - criterion plot: function evaluations with sample points and current
-                accepted point highlighted.
-            - rho plots: the ratio of expected and actual improvement in function
-                values at each iteration.
-            - radius plots: trustregion radii at each iteration.
-            - cluster plots: number of clusters relative to number of sample points
-                at each iteration.
-            - fekete criterion plots: the value of the fekete criterion at each
-                iteration.
-
-    """
-    results = deepcopy(results)
-    if isinstance(iterations, int):
-        iterations = {case: iterations for case in results}
-        results = {case: _process_results(results[case]) for case in results}
-    elif isinstance(results, OptimizeResult):
-        results = _process_results(results)
-        results = {f"iteration {i}": results for i in iterations}
-        iterations = {f"iteration {iteration}": iteration for iteration in iterations}
-
-    cases = results.keys()
-    nrows = 8
-    ncols = len(cases)
-    fig = make_subplots(
-        rows=nrows,
-        cols=ncols,
-        subplot_titles=list(cases),
-        horizontal_spacing=1 / (ncols * 6),
-        vertical_spacing=(1 / (nrows - 1)) / 4,
-        shared_yaxes=True,
-    )
-    color_dict = {
-        "existing": "rgb(0,0,255)",
-        "new": "rgb(230,0,0)",
-        "discarded": "rgb(0,0,0)",
-    }
-    xl = []
-    xu = []
-    for i, case in enumerate(cases):
-        result = results[case]
-        iteration = iterations[case]
-        state = result.algorithm_output["states"][iteration]
-        params_history = np.array(result.history["params"])
-        criterion_history = np.array(result.history["criterion"])
-        fig = _plot_sample_points(
-            params_history, state, color_dict, fig, row=1, col=i + 1
-        )
-        fig = _plot_distances_from_center(
-            params_history, state, fig, rows=[2, 3], col=i + 1
-        )
-        xl.append(fig.get_subplot(row=2, col=i + 1).xaxis.range[0])
-        xu.append(fig.get_subplot(row=2, col=i + 1).xaxis.range[1])
-        fig = _plot_criterion(
-            criterion_history, state, color_dict, fig, row=4, col=i + 1
-        )
-        fig = _plot_rhos(result, fig, iteration=iteration, row=5, col=i + 1)
-        fig = _plot_radii(result, fig, iteration=iteration, row=6, col=i + 1)
-        fig = _plot_clusters_points_ratio(result, iteration, fig, row=7, col=i + 1)
-        fig = _plot_fekete_criterion(result, fig, iteration=iteration, row=8, col=i + 1)
-        fig.layout.annotations[i].update(y=1.015)
-    for r in [2, 3]:
-        for c in range(1, ncols + 1):
-            fig.update_xaxes(range=[min(xl) - 0.25, max(xu) + 0.25], row=r, col=c)
-    fig = _clean_legend_duplicates(fig)
-    fig.update_layout(height=400 * nrows, width=460 * ncols, template="plotly_white")
-    fig.update_yaxes(
-        showgrid=False, showline=True, linewidth=1, linecolor="black", zeroline=False
-    )
-    fig.update_xaxes(
-        showgrid=False, showline=True, linewidth=1, linecolor="black", zeroline=False
-    )
-    fig.update_layout(hovermode="x unified")
-
-    return fig
-
-
-def _plot_criterion(history, state, color_dict, fig, row, col):
-    fig.add_trace(
-        go.Scatter(
-            y=history,
-            x=np.arange(len(history)),
-            showlegend=False,
-            line_color="#C0C0C0",
-            name="Criterion",
-            mode="lines",
-        ),
-        row=row,
-        col=col,
-    )
-
-    fig.add_trace(
-        go.Scatter(
-            y=history[state.old_indices_used],
-            x=state.old_indices_used,
-            mode="markers",
-            marker_size=10,
-            name="existing ",
-            showlegend=False,
-            marker_color=color_dict["existing"],
-            opacity=0.6,
-        ),
-        col=col,
-        row=row,
-    )
-    fig.add_trace(
-        go.Scatter(
-            y=history[state.new_indices],
-            x=state.new_indices,
-            mode="markers",
-            marker_size=10,
-            name="new ",
-            showlegend=False,
-            marker_color=color_dict["new"],
-            opacity=0.6,
-        ),
-        col=col,
-        row=row,
-    )
-    fig.add_trace(
-        go.Scatter(
-            y=history[
-                getattr(state, "old_indices_discarded", np.array([], dtype="int"))
-            ],
-            x=getattr(state, "old_indices_discarded", np.array([], dtype="int")),
-            mode="markers",
-            marker_size=10,
-            name="discarded ",
-            showlegend=False,
-            marker_color=color_dict["discarded"],
-            opacity=0.6,
-        ),
-        col=col,
-        row=row,
-    )
-    fig.add_trace(
-        go.Scatter(
-            y=[history[state.index]],
-            x=[state.index],
-            mode="markers",
-            marker_size=12,
-            name="current index",
-            showlegend=False,
-            marker_color="red",
-            marker_symbol="star",
-            marker_line_color="black",
-            marker_line_width=1,
-            opacity=0.6,
-        ),
-        col=col,
-        row=row,
-    )
-    fig.update_xaxes(title_text="Function evaluations", row=row, col=col)
-    if col == 1:
-        fig.update_yaxes(title_text="Criterion value", row=row, col=col)
-    return fig
-
-
-def _plot_sample_points(history, state, color_dict, fig, row, col):
-    sample_points = _get_sample_points(state, history)
-    if state.x.shape[0] <= 2:
-        trustregion = state.trustregion
-        radius = trustregion.radius
-        center = trustregion.center
-        fig.add_shape(
-            type="circle",
-            xref="x",
-            yref="y",
-            x0=center[0] - radius,
-            y0=center[1] - radius,
-            x1=center[0] + radius,
-            y1=center[1] + radius,
-            line_width=0.5,
-            col=col,
-            row=row,
-            line_color="grey",
-        )
-
-        fig.add_traces(
-            px.scatter(
-                sample_points,
-                x=0,
-                y=1,
-                color="case",
-                color_discrete_map=color_dict,
-                opacity=0.7,
-            ).data,
-            cols=col,
-            rows=row,
-        )
-        fig.update_traces(
-            marker_size=10,
-            marker_line_color="black",
-            marker_line_width=2,
-            col=col,
-            row=row,
-        )
-        fig.update_yaxes(scaleanchor="x", scaleratio=1, col=col, row=row)
-        fig.update_xaxes(scaleanchor="y", scaleratio=1, col=col, row=row)
-    else:
-        params = [col for col in sample_points.columns if col != "case"]
-        corr = sample_points[params].corr().abs()
-        mask = np.zeros_like(corr, dtype=bool)
-        mask[np.tril_indices_from(mask, k=-1)] = True
-        corr = corr.where(mask)
-        fig.add_trace(
-            go.Heatmap(
-                z=corr,
-                x=corr.columns.values,
-                y=corr.index.values,
-                showscale=False,
-                colorscale="Magenta",
-                zmin=0,
-                zmax=1,
-                text=corr.to_numpy().round(2).tolist(),  # xxxx,
-                texttemplate="%{text}",
-            ),
-            row=row,
-            col=col,
-        )
-        fig.update_layout(yaxis_autorange="reversed")
-        fig.update_xaxes(tickmode="array", tickvals=corr.index.values, col=col, row=row)
-        fig.update_yaxes(
-            tickmode="array", tickvals=corr.columns.values, col=col, row=row
-        )
-    return fig
-
-
-def _plot_radii(res, fig, row, col, iteration):
-    radii = [state.trustregion.radius for state in res.algorithm_output["states"]]
-    traces = plot_line_with_lighlighted_point(
-        x=np.arange(len(radii)), y=radii, highlighted_point=iteration, name="Radius"
-    )
-    fig.add_traces(
-        traces,
-        rows=row,
-        cols=col,
-    )
-    fig.update_xaxes(title_text="Iteration", row=row, col=col)
-    if col == 1:
-        fig.update_yaxes(title_text="Radius", row=row, col=col)
-    return fig
-
-
-def _plot_rhos(res, fig, row, col, iteration):
-    rhos = np.array([state.rho for state in res.algorithm_output["states"]])
-    rhos[~pd.isna(rhos)] = np.clip(rhos[~pd.isna(rhos)], -1, 3)
-    traces = plot_line_with_lighlighted_point(
-        x=np.arange(len(rhos)), y=rhos, highlighted_point=iteration, name="Rho"
-    )
-    fig.add_traces(
-        traces,
-        rows=row,
-        cols=col,
-    )
-    fig.update_xaxes(title_text="Iteration", row=row, col=col)
-    if col == 1:
-        fig.update_yaxes(title_text="Rho", row=row, col=col)
-    return fig
-
-
-def _plot_fekete_criterion(res, fig, row, col, iteration):
-    fekete = _get_fekete_criterion(res)
-    traces = plot_line_with_lighlighted_point(
-        x=np.arange(len(fekete)), y=fekete, highlighted_point=iteration, name="Fekete"
-    )
-    fig.add_traces(
-        traces,
-        rows=row,
-        cols=col,
-    )
-    fig.update_xaxes(title_text="Iteration", row=row, col=col)
-    if col == 1:
-        fig.update_yaxes(title_text="Fekete criterion", row=row, col=col)
-    return fig
-
-
-def _plot_clusters_points_ratio(res, iteration, fig, row, col):
-    dim = res.params.shape[0]
-    history = np.array(res.history["params"])
-    states = res.algorithm_output["states"]
-    colors = [
-        "rgb(251,106,74)",
-        "rgb(203,24,29)",
-        "rgb(103,0,13)",
-    ]
-    for i, f in enumerate([1, 2, 10]):
-        ratios = [np.nan]
-        for state in states[1:]:
-            n_points = state.model_indices.shape[0]
-            points = history[state.model_indices]
-            scaling = 1 / (f * n_points)
-            radius = get_radius_after_volume_scaling(
-                state.trustregion.radius, dim, scaling
-            )
-            _, centers = cluster(points, radius)
-            n_clusters = centers.shape[0]
-            ratios.append(n_clusters / n_points)
-        fig.add_trace(
-            go.Scatter(
-                y=ratios,
-                x=np.arange(len(ratios)),
-                mode="lines",
-                opacity=0.5,
-                line_color=colors[i],
-                line_width=1.5,
-                name=f"s={f}*n",
-            ),
-            col=col,
-            row=row,
-        )
-        fig.add_trace(
-            go.Scatter(
-                y=[ratios[iteration]],
-                x=[iteration],
-                mode="markers",
-                marker_color=colors[i],
-                opacity=1,
-                marker_size=10,
-                name=f"s={f}*n",
-                showlegend=False,
-            ),
-            col=col,
-            row=row,
-        )
-    fig.update_xaxes(title_text="Iteration", row=row, col=col)
-    if col == 1:
-        fig.update_yaxes(title_text="Cluster ratio", row=row, col=col)
-    return fig
-
-
-def _plot_distances_from_center(history, state, fig, col, rows):
-    dist_sq = (
-        np.linalg.norm(
-            history[state.model_indices] - state.trustregion.center,
-            axis=1,
-        )
-        / state.trustregion.radius
-    )
-
-    dist_inf = (
-        np.linalg.norm(
-            history[state.model_indices] - state.trustregion.center,
-            axis=1,
-            ord=np.inf,
-        )
-        / state.trustregion.radius
-    )
-
-    for r, inputs in enumerate([dist_sq, dist_inf]):
-        data = ff.create_distplot(
-            [inputs],
-            show_curve=False,
-            show_rug=True,
-            group_labels=[""],
-            show_hist=False,
-        ).data
-
-        data[0].update(
-            {
-                "yaxis": "y",
-                "y": [0] * len(inputs),
-                "showlegend": False,
-                "marker_size": 20,
-            }
-        )
-        fig.add_traces(data, cols=col, rows=rows[r])
-
-    min_dist = min(dist_inf.min(), dist_sq.min())
-    max_dist = max(dist_inf.max(), dist_sq.max())
-
-    fig.update_xaxes(
-        title_text="L2 norm", range=[min_dist, max_dist], row=rows[0], col=col
-    )
-    fig.update_xaxes(
-        title_text="Inf norm", range=[min_dist, max_dist], row=rows[1], col=col
-    )
-    return fig
-
-
-def _get_fekete_criterion(res):
-    states = res.algorithm_output["states"][1:]
-    history = np.array(res.history["params"])
-
-    out = [np.nan] + [
-        log_d_quality_calculator(
-            sample=history[state.model_indices],
-            trustregion=state.trustregion,
-        )
-        for state in states
-    ]
-    return out
-
-
-def _get_sample_points(state, history):
-    current_points = history[state.model_indices]
-    discarded_points = history[
-        getattr(state, "old_indices_discarded", np.array([], dtype="int"))
-    ]
-    df = pd.DataFrame(
-        data=np.vstack([current_points, discarded_points]),
-        index=np.hstack(
-            [
-                state.model_indices,
-                getattr(state, "old_indices_discarded", np.array([], dtype="int")),
-            ]
-        ),
-    )
-    df["case"] = np.nan
-    df.loc[state.new_indices, "case"] = "new"
-    df.loc[state.old_indices_used, "case"] = "existing"
-    df.loc[
-        getattr(state, "old_indices_discarded", np.array([], dtype="int")), "case"
-    ] = "discarded"
-    return df
-
-
-def plot_line_with_lighlighted_point(x, y, name, highlighted_point):
-    """Plot line x,y, add markers to the line to highlight data points.
-    args:
-        x(np.ndarray or list): 1d array or list of data for x axis
-        y(np.ndarray or list): 1d array or list of data for y axis
-        highlight_points(np.ndarray or list): 1d array of indices of the to highlight.
-        in case of
-                - criterion: x is the array with numbers of function evaluations
-                             y is the array with function values
-                             highlight points is a nested list with lists of
-                                    - existing points
-                                    - new points
-                                    - discarded points
-                - other plots: x is the array with iteration numbers
-                               y is the array with corresponding objective values.
-                               highlight points is the index of the current iteration.
-
-    returns:
-        go.Figure
-
-    """
-    highlight_color = "#035096"
-    highlight_size = 10
-    line_color = "#C0C0C0"
-    data = [
-        go.Scatter(
-            y=y, x=x, mode="lines", line_color=line_color, name=name, showlegend=False
-        ),
-        go.Scatter(
-            x=[highlighted_point],
-            y=[y[highlighted_point]],
-            mode="markers",
-            marker_color=highlight_color,
-            marker_size=highlight_size,
-            name="current val",
-            showlegend=False,
-        ),
-    ]
-
-    return data
-
-
-def _clean_legend_duplicates(fig):
-    trace_names = set()
-
-    def disable_legend_if_duplicate(trace):
-        if trace.name in trace_names:
-            # in this case the legend is a duplicate
-            trace.update(showlegend=False)
-        else:
-            trace_names.add(trace.name)
-
-    fig.for_each_trace(disable_legend_if_duplicate)
-    return fig
-
-
-def _process_results(result):
-    """Add model indices to states of optimization result."""
-    result = deepcopy(result)
-    xs = np.array(result.history["params"])
-    if result.algorithm in ["nag_pybobyqa", "nag_dfols"]:
-        for i in range(1, len(result.algorithm_output["states"])):
-            state = result.algorithm_output["states"][i]
-            result.algorithm_output["states"][i] = state._replace(
-                model_indices=_get_model_indices(xs, state),
-                new_indices=_get_model_indices(xs, state),
-                index=_find_index(
-                    xs,
-                    state.x,
-                )[0],
-            )
-    elif result.algorithm in ["tranquilo", "tranquilo_ls"]:
-        pass
-    else:
-        NotImplementedError(
-            f"Diagnostic plots are not implemented for {result.algorithm}"
-        )
-    return result
-
-
-@njit
-def _find_indices_in_trust_region(xs, center, radius):
-    """Get the row indices of all parameter vectors in a trust region.
-
-    This is for square trust regions, i.e. balls in term of an infinity norm.
-
-    Args:
-        xs (np.ndarray): 2d numpy array where each row is a parameter vector.
-        center (np.ndarray): 1d numpy array that marks the center of the trust region.
-        radius (float): Radius of the trust region.
-
-    Returns:
-        np.ndarray: The indices of parameters in the trust region.
-
-    """
-    n_obs, dim = xs.shape
-    out = np.zeros(n_obs).astype(np.int64)
-    success_counter = 0
-    upper = center + radius
-    lower = center - radius
-    for i in range(n_obs):
-        success = True
-        for j in range(dim):
-            value = xs[i, j]
-            if not (lower[j] <= value <= upper[j]) or np.isnan(value):
-                success = False
-                continue
-        if success:
-            out[success_counter] = i
-            success_counter += 1
-
-    return out[:success_counter]
-
-
-def _find_index(xs, point):
-    radius = 1e-100
-    out = np.array([])
-    while len(out) == 0:
-        out = _find_indices_in_trust_region(xs=xs, center=point, radius=radius)
-        radius = np.sqrt(radius)
-    if len(out) == 1:
-        return out
-    else:
-        return out[0]
-
-
-def _get_model_indices(xs, state):
-    model_indices = np.array([])
-    for point in state.model_points:
-        model_indices = np.concatenate([model_indices, _find_index(xs, point)])
-    return model_indices.astype(int)
diff --git a/tests/optimization/subsolvers/test_bntr_fast.py b/tests/optimization/subsolvers/test_bntr_fast.py
deleted file mode 100644
index cf24d6216..000000000
--- a/tests/optimization/subsolvers/test_bntr_fast.py
+++ /dev/null
@@ -1,551 +0,0 @@
-import numpy as np
-import pandas as pd
-import pytest
-from estimagic.config import TEST_FIXTURES_DIR
-from estimagic.optimization.subsolvers.bntr import (
-    ActiveBounds,
-    _update_trustregion_radius_and_gradient_descent,
-    bntr,
-)
-from estimagic.optimization.subsolvers.bntr import (
-    _apply_bounds_to_conjugate_gradient_step as bounds_cg_orig,
-)
-from estimagic.optimization.subsolvers.bntr import (
-    _apply_bounds_to_x_candidate as apply_bounds_orig,
-)
-from estimagic.optimization.subsolvers.bntr import (
-    _compute_conjugate_gradient_step as cg_step_orig,
-)
-from estimagic.optimization.subsolvers.bntr import (
-    _compute_predicted_reduction_from_conjugate_gradient_step as reduction_cg_step_orig,
-)
-from estimagic.optimization.subsolvers.bntr import (
-    _evaluate_model_criterion as eval_criterion_orig,
-)
-from estimagic.optimization.subsolvers.bntr import (
-    _find_hessian_submatrix_where_bounds_inactive as find_hessian_inact_orig,
-)
-from estimagic.optimization.subsolvers.bntr import (
-    _get_fischer_burmeister_direction_vector as fb_vector_orig,
-)
-from estimagic.optimization.subsolvers.bntr import (
-    _get_information_on_active_bounds as get_info_bounds_orig,
-)
-from estimagic.optimization.subsolvers.bntr import (
-    _perform_gradient_descent_step as gradient_descent_orig,
-)
-from estimagic.optimization.subsolvers.bntr import (
-    _project_gradient_onto_feasible_set as grad_feas_orig,
-)
-from estimagic.optimization.subsolvers.bntr import (
-    _take_preliminary_gradient_descent_step_and_check_for_solution as pgd_orig,
-)
-from estimagic.optimization.subsolvers.bntr import (
-    _update_trustregion_radius_conjugate_gradient as update_radius_cg_orig,
-)
-from estimagic.optimization.subsolvers.bntr_fast import (
-    _apply_bounds_to_conjugate_gradient_step as bounds_cg_fast,
-)
-from estimagic.optimization.subsolvers.bntr_fast import (
-    _apply_bounds_to_x_candidate as apply_bounds_fast,
-)
-from estimagic.optimization.subsolvers.bntr_fast import (
-    _bntr_fast_jitted,
-)
-from estimagic.optimization.subsolvers.bntr_fast import (
-    _compute_conjugate_gradient_step as cg_step_fast,
-)
-from estimagic.optimization.subsolvers.bntr_fast import (
-    _compute_predicted_reduction_from_conjugate_gradient_step as reduction_cg_step_fast,
-)
-from estimagic.optimization.subsolvers.bntr_fast import (
-    _evaluate_model_criterion as eval_criterion_fast,
-)
-from estimagic.optimization.subsolvers.bntr_fast import (
-    _find_hessian_submatrix_where_bounds_inactive as find_hessian_inact_fast,
-)
-from estimagic.optimization.subsolvers.bntr_fast import (
-    _get_fischer_burmeister_direction_vector as fb_vector_fast,
-)
-from estimagic.optimization.subsolvers.bntr_fast import (
-    _get_information_on_active_bounds as get_info_bounds_fast,
-)
-from estimagic.optimization.subsolvers.bntr_fast import (
-    _perform_gradient_descent_step as gradient_descent_fast,
-)
-from estimagic.optimization.subsolvers.bntr_fast import (
-    _project_gradient_onto_feasible_set as grad_feas_fast,
-)
-from estimagic.optimization.subsolvers.bntr_fast import (
-    _take_preliminary_gradient_descent_step_and_check_for_solution as pgd_fast,
-)
-from estimagic.optimization.subsolvers.bntr_fast import (
-    _update_trustregion_radius_and_gradient_descent as _update_trr_and_gd_fast,
-)
-from estimagic.optimization.subsolvers.bntr_fast import (
-    _update_trustregion_radius_conjugate_gradient as update_radius_cg_fast,
-)
-from estimagic.optimization.tranquilo.models import ScalarModel
-from numpy.testing import assert_array_almost_equal as aaae
-from numpy.testing import assert_array_equal as aae
-
-
-def test_eval_criterion():
-    x_candidate = np.zeros(5)
-    linear_terms = np.arange(5).astype(float)
-    square_terms = np.arange(25).reshape(5, 5).astype(float)
-    assert eval_criterion_orig(
-        x_candidate, linear_terms, square_terms
-    ) == eval_criterion_fast(x_candidate, linear_terms, square_terms)
-
-
-def test_get_info_on_active_bounds():
-    x_candidate = np.array([-1.5, -1.5, 0, 1.5, 1.5])
-    indices = np.arange(len(x_candidate))
-    linear_terms = np.array([1, 1, 0, -1, -1])
-    lower_bounds = -np.ones(5)
-    upper_bounds = np.ones(5)
-    info_orig = get_info_bounds_orig(
-        x_candidate, linear_terms, lower_bounds, upper_bounds
-    )
-    (
-        active_lower,
-        active_upper,
-        active_fixed,
-        inactive,
-    ) = get_info_bounds_fast(x_candidate, linear_terms, lower_bounds, upper_bounds)
-    aae(info_orig.lower, indices[active_lower])
-    aae(info_orig.upper, indices[active_upper])
-    aae(info_orig.fixed, indices[active_fixed])
-    aae(info_orig.active, indices[~inactive])
-    aae(info_orig.inactive, indices[inactive])
-
-
-def test_project_gradient_on_feasible_set():
-    grad = np.arange(5).astype(float)
-    bounds_info = ActiveBounds(
-        inactive=np.array([0, 1, 2]),
-    )
-    inactive = np.array([True, True, True, False, False])
-    aae(grad_feas_orig(grad, bounds_info), grad_feas_fast(grad, inactive))
-
-
-def test_find_hessian_inactive_bounds():
-    hessian = np.arange(25).reshape(5, 5).astype(float)
-    inactive = np.array([False, False, True, True, True])
-    model = ScalarModel(square_terms=hessian, intercept=0, linear_terms=np.zeros(5))
-
-    bounds_info = ActiveBounds(
-        inactive=np.arange(5)[inactive],
-    )
-
-    aae(
-        find_hessian_inact_orig(model, bounds_info),
-        find_hessian_inact_fast(hessian, inactive),
-    )
-
-
-def test_fischer_burmeister_direction_vector():
-    x = np.array([-1.5, -1.5, 0, 1.5, 1.5])
-    grad = np.ones(5)
-    lb = -np.ones(5)
-    ub = np.ones(5)
-    aae(fb_vector_orig(x, grad, lb, ub), fb_vector_fast(x, grad, lb, ub))
-
-
-def test_apply_bounds_candidate_x():
-    x = np.array([-1.5, -1.5, 0, 1.5, 1.5])
-    lb = -np.ones(5)
-    ub = np.ones(5)
-    aae(apply_bounds_orig(x, lb, ub), apply_bounds_fast(x, lb, ub))
-
-
-@pytest.mark.slow()
-def test_take_preliminary_gradient_descent_and_check_for_convergence():
-    model_gradient = np.array(
-        [
-            -5.71290e02,
-            -3.11506e03,
-            -8.18100e02,
-            2.47760e02,
-            -1.26540e02,
-        ]
-    )
-    model_hessian = np.array(
-        [
-            [-619.23, -1229.2, 321.9, 106.98, -45.45],
-            [-1229.2, -668.95, -250.05, 165.77, -47.47],
-            [321.9, -250.05, -1456.88, -144.75, 900.99],
-            [106.98, 165.77, -144.75, 686.35, -3.51],
-            [-45.45, -47.47, 900.99, -3.51, -782.91],
-        ]
-    )
-    model = ScalarModel(
-        linear_terms=model_gradient, square_terms=model_hessian, intercept=0
-    )
-    x_candidate = np.zeros(5)
-    lower_bounds = -np.ones(len(x_candidate))
-    upper_bounds = np.ones(len(x_candidate))
-    kwargs = {
-        "x_candidate": x_candidate,
-        "model": model,
-        "lower_bounds": lower_bounds,
-        "upper_bounds": upper_bounds,
-        "maxiter_gradient_descent": 5,
-        "gtol_abs": 1e-08,
-        "gtol_rel": 1e-08,
-        "gtol_scaled": 0,
-    }
-    kwargs_fast = {
-        "model_gradient": model_gradient,
-        "model_hessian": model_hessian,
-        "lower_bounds": lower_bounds,
-        "upper_bounds": upper_bounds,
-        "x_candidate": x_candidate,
-        "maxiter_gradient_descent": 5,
-        "gtol_abs": 1e-08,
-        "gtol_rel": 1e-08,
-        "gtol_scaled": 0,
-    }
-    res_fast = pgd_fast(**kwargs_fast)
-    res_orig = pgd_orig(**kwargs)
-    for i in range(5):
-        aae(np.array(res_fast[i]), np.array(res_orig[i]))
-    bounds_info_orig = res_orig[5]
-    indices = np.arange(5)
-    for i, bounds in enumerate(["lower", "upper", "fixed", "inactive"]):
-        aae(
-            np.array(getattr(bounds_info_orig, bounds)),
-            indices[res_fast[5 + i]],
-        )
-    assert res_orig[6] == res_fast[10]
-
-
-@pytest.mark.slow()
-def test_apply_bounds_to_conjugate_gradient_step():
-    step_inactive = np.ones(7)
-    x_candidate = np.zeros(10)
-    lower_bounds = -np.ones(10)
-    upper_bounds = np.array([1] * 7 + [-0.01] * 3)
-    indices = np.arange(len(x_candidate))
-    inactive_bounds = np.array([True] * 7 + [False] * 3)
-    active_lower_bounds = np.array([False] * 10)
-    active_upper_bounds = np.array([False] * 7 + [True] * 3)
-    active_fixed_bounds = np.array([False] * 10)
-    bounds_info = ActiveBounds(
-        lower=indices[active_lower_bounds],
-        upper=indices[active_upper_bounds],
-        fixed=indices[active_fixed_bounds],
-        inactive=indices[inactive_bounds],
-    )
-    res_fast = bounds_cg_fast(
-        step_inactive,
-        x_candidate,
-        lower_bounds,
-        upper_bounds,
-        inactive_bounds,
-        active_lower_bounds,
-        active_upper_bounds,
-        active_fixed_bounds,
-    )
-    res_orig = bounds_cg_orig(
-        step_inactive, x_candidate, lower_bounds, upper_bounds, bounds_info
-    )
-    aae(res_orig, res_fast)
-    pass
-
-
-@pytest.mark.slow()
-def test_compute_conjugate_gradient_setp():
-    x_candidate = np.array([0] * 8 + [1.5] * 2)
-    gradient_inactive = np.arange(6).astype(float)
-    hessian_inactive = np.arange(36).reshape(6, 6).astype(float)
-    lower_bounds = np.array([-1] * 6 + [0.5] * 2 + [-1] * 2)
-    upper_bounds = np.ones(10)
-    indices = np.arange(len(x_candidate))
-    inactive = np.array([True] * 6 + [False] * 4)
-    active_lower = np.array([False] * 5 + [True, True] + [False] * 3)
-    active_upper = np.array([False] * 8 + [True] * 2)
-    active_fixed = np.array([False] * 10)
-    bounds_info = ActiveBounds(
-        inactive=indices[inactive],
-        lower=indices[active_lower],
-        upper=indices[active_upper],
-        fixed=indices[active_fixed],
-    )
-    tr_radius = 10.0
-    cg_method = "trsbox"
-    gtol_abs = 1e-8
-    gtol_rel = 1e-8
-    default_radius = 100.00
-    min_radius = 1e-10
-    max_radius = 1e10
-
-    res_fast = cg_step_fast(
-        x_candidate,
-        gradient_inactive,
-        hessian_inactive,
-        lower_bounds,
-        upper_bounds,
-        inactive,
-        active_lower,
-        active_upper,
-        active_fixed,
-        tr_radius,
-        cg_method,
-        gtol_abs,
-        gtol_rel,
-        default_radius,
-        min_radius,
-        max_radius,
-    )
-    res_orig = cg_step_orig(
-        x_candidate=x_candidate,
-        gradient_inactive=gradient_inactive,
-        hessian_inactive=hessian_inactive,
-        lower_bounds=lower_bounds,
-        upper_bounds=upper_bounds,
-        active_bounds_info=bounds_info,
-        trustregion_radius=tr_radius,
-        conjugate_gradient_method=cg_method,
-        gtol_abs_conjugate_gradient=gtol_abs,
-        gtol_rel_conjugate_gradient=gtol_rel,
-        options_update_radius={
-            "default_radius": default_radius,
-            "min_radius": min_radius,
-            "max_radius": max_radius,
-        },
-    )
-    aae(res_orig[0], res_fast[0])
-    aae(res_orig[1], res_fast[1])
-    aaae(res_orig[2], res_fast[2])
-
-
-@pytest.mark.slow()
-def test_compute_predicet_reduction_from_conjugate_gradient_step():
-    cg_step = np.arange(10).astype(float) / 10
-    cg_step_inactive = np.array([1, 2, 3]).astype(float)
-    grad = np.arange(10).astype(float)
-    grad_inactive = np.arange(3).astype(float)
-    hessian_inactive = np.arange(9).reshape(3, 3).astype(float)
-    indices = np.arange(10)
-    inactive_bounds = np.array([False] + [True] * 3 + [False] * 6)
-    res_fast = reduction_cg_step_fast(
-        cg_step,
-        cg_step_inactive,
-        grad,
-        grad_inactive,
-        hessian_inactive,
-        inactive_bounds,
-    )
-    bounds_info = ActiveBounds(
-        inactive=indices[inactive_bounds], active=indices[~inactive_bounds]
-    )
-    res_orig = reduction_cg_step_orig(
-        cg_step, cg_step_inactive, grad, grad_inactive, hessian_inactive, bounds_info
-    )
-    aae(res_orig, res_fast)
-
-
-@pytest.mark.slow()
-def test_update_trustregion_radius_conjugate_gradient():
-    f_candidate = -1234.56
-    predicted_reduction = 200
-    actual_reduction = 150
-    x_norm_cg = 3.16
-    tr_radius = 5
-    options_update_radius = {
-        "eta1": 1.0e-4,
-        "eta2": 0.25,
-        "eta3": 0.50,
-        "eta4": 0.90,
-        "alpha1": 0.25,
-        "alpha2": 0.50,
-        "alpha3": 1.00,
-        "alpha4": 2.00,
-        "alpha5": 4.00,
-        "min_radius": 1e-10,
-        "max_radius": 1e10,
-    }
-    res_fast = update_radius_cg_fast(
-        f_candidate=f_candidate,
-        predicted_reduction=predicted_reduction,
-        actual_reduction=actual_reduction,
-        x_norm_cg=x_norm_cg,
-        trustregion_radius=tr_radius,
-        **options_update_radius,
-    )
-    res_orig = update_radius_cg_orig(
-        f_candidate=f_candidate,
-        predicted_reduction=predicted_reduction,
-        actual_reduction=actual_reduction,
-        x_norm_cg=x_norm_cg,
-        trustregion_radius=tr_radius,
-        options=options_update_radius,
-    )
-    assert res_orig[0] == res_fast[0]
-    assert res_orig[1] == res_fast[1]
-
-
-@pytest.mark.slow()
-def test_perform_gradient_descent_step():
-    x_candidate = np.zeros(10)
-    f_candidate_initial = 1234.56
-    gradient_projected = np.arange(10).astype(float)
-    hessian_inactive = np.arange(64).reshape(8, 8).astype(float)
-    model_gradient = gradient_projected / 2
-    model_hessian = np.arange(100).reshape(10, 10).astype(float)
-    lower_bounds = -np.ones(10)
-    upper_bounds = np.array([1] * 8 + [-0.01] * 2)
-    indices = np.arange(10)
-    inactive_bounds = np.array([True] * 8 + [False] * 2)
-
-    maxiter = 3
-    options_update_radius = {
-        "mu1": 0.35,
-        "mu2": 0.50,
-        "gamma1": 0.0625,
-        "gamma2": 0.5,
-        "gamma3": 2.0,
-        "gamma4": 5.0,
-        "theta": 0.25,
-        "default_radius": 100,
-    }
-    model = ScalarModel(
-        linear_terms=model_gradient, square_terms=model_hessian, intercept=0
-    )
-    bounds_info = ActiveBounds(inactive=indices[inactive_bounds])
-    res_fast = gradient_descent_fast(
-        x_candidate=x_candidate,
-        f_candidate_initial=f_candidate_initial,
-        gradient_projected=gradient_projected,
-        hessian_inactive=hessian_inactive,
-        model_gradient=model_gradient,
-        model_hessian=model_hessian,
-        lower_bounds=lower_bounds,
-        upper_bounds=upper_bounds,
-        inactive_bounds=inactive_bounds,
-        maxiter_steepest_descent=maxiter,
-        **options_update_radius,
-    )
-    res_orig = gradient_descent_orig(
-        x_candidate=x_candidate,
-        f_candidate_initial=f_candidate_initial,
-        gradient_projected=gradient_projected,
-        hessian_inactive=hessian_inactive,
-        model=model,
-        lower_bounds=lower_bounds,
-        upper_bounds=upper_bounds,
-        active_bounds_info=bounds_info,
-        maxiter_steepest_descent=maxiter,
-        options_update_radius=options_update_radius,
-    )
-    aae(res_orig[0], res_fast[0])
-    for i in range(1, len(res_orig)):
-        assert res_orig[i] == res_fast[i]
-
-
-@pytest.mark.slow()
-def test_update_trustregion_radius_and_gradient_descent():
-    options_update_radius = {
-        "mu1": 0.35,
-        "mu2": 0.50,
-        "gamma1": 0.0625,
-        "gamma2": 0.5,
-        "gamma3": 2.0,
-        "gamma4": 5.0,
-        "theta": 0.25,
-        "min_radius": 1e-10,
-        "max_radius": 1e10,
-        "default_radius": 100,
-    }
-
-    trustregion_radius = 100.00
-    radius_lower_bound = 90.00
-    predicted_reduction = 0.9
-    actual_reduction = 1.1
-    gradient_norm = 10.0
-    res_orig = _update_trustregion_radius_and_gradient_descent(
-        trustregion_radius,
-        radius_lower_bound,
-        predicted_reduction,
-        actual_reduction,
-        gradient_norm,
-        options_update_radius,
-    )
-    options_update_radius.pop("min_radius")
-    options_update_radius.pop("max_radius")
-    options_update_radius.pop("default_radius")
-    res_fast = _update_trr_and_gd_fast(
-        trustregion_radius,
-        radius_lower_bound,
-        predicted_reduction,
-        actual_reduction,
-        gradient_norm,
-        **options_update_radius,
-    )
-    assert res_orig[0] == res_fast[0]
-    assert res_fast[1] == res_orig[1]
-
-
-@pytest.mark.slow()
-def test_minimize_bntr():
-    model_data = pd.read_pickle(TEST_FIXTURES_DIR / "scalar_model.pkl")
-    model = ScalarModel(**model_data)
-    lower_bounds = -np.ones(len(model.linear_terms))
-    upper_bounds = np.ones(len(model.linear_terms))
-    options = {
-        "maxiter": 20,
-        "maxiter_gradient_descent": 5,
-        "conjugate_gradient_method": "cg",
-        "gtol_abs": 1e-08,
-        "gtol_rel": 1e-08,
-        "gtol_scaled": 0.0,
-        "gtol_abs_conjugate_gradient": 1e-08,
-        "gtol_rel_conjugate_gradient": 1e-06,
-    }
-    x0 = np.zeros_like(lower_bounds)
-    res_orig = bntr(model, lower_bounds, upper_bounds, x_candidate=x0, **options)
-    res_fast = _bntr_fast_jitted(
-        model_gradient=model.linear_terms,
-        model_hessian=model.square_terms,
-        lower_bounds=lower_bounds,
-        upper_bounds=upper_bounds,
-        x_candidate=x0,
-        **options,
-    )
-    # using aaae to get tests run on windows machines.
-    aaae(res_orig["x"], res_fast[0])
-    aaae(res_orig["criterion"], res_fast[1])
-    assert res_orig["success"] == res_fast[3]
-
-
-@pytest.mark.slow()
-def test_minimize_bntr_break_loop_early():
-    model_data = pd.read_pickle(TEST_FIXTURES_DIR / "scalar_model.pkl")
-    model = ScalarModel(**model_data)
-    lower_bounds = -np.ones(len(model.linear_terms))
-    upper_bounds = np.ones(len(model.linear_terms))
-    options = {
-        "maxiter": 20,
-        "maxiter_gradient_descent": 5,
-        "conjugate_gradient_method": "cg",
-        "gtol_abs": 10,
-        "gtol_rel": 10,
-        "gtol_scaled": 10,
-        "gtol_abs_conjugate_gradient": 10,
-        "gtol_rel_conjugate_gradient": 10,
-    }
-    res_fast = _bntr_fast_jitted(
-        model_gradient=model.linear_terms,
-        model_hessian=model.square_terms,
-        lower_bounds=lower_bounds,
-        upper_bounds=upper_bounds,
-        x_candidate=np.zeros_like(lower_bounds),
-        **options,
-    )
-    # using aaae to get tests run on windows machines.
-    aaae(np.zeros(len(model.linear_terms)), res_fast[0])
-    aaae(0, res_fast[1])
-    assert res_fast[3]
-    assert res_fast[2] == 0
diff --git a/tests/optimization/subsolvers/test_gqtpar_fast.py b/tests/optimization/subsolvers/test_gqtpar_fast.py
deleted file mode 100644
index 2c8fdb66e..000000000
--- a/tests/optimization/subsolvers/test_gqtpar_fast.py
+++ /dev/null
@@ -1,98 +0,0 @@
-import numpy as np
-from estimagic.optimization.subsolvers.gqtpar import (
-    DampingFactors,
-    HessianInfo,
-)
-from estimagic.optimization.subsolvers.gqtpar import (
-    _compute_smallest_step_len_for_candidate_vector as compute_smallest_step_orig,
-)
-from estimagic.optimization.subsolvers.gqtpar import (
-    _find_new_candidate_and_update_parameters as find_new_and_update_candidate_orig,
-)
-from estimagic.optimization.subsolvers.gqtpar import (
-    _get_initial_guess_for_lambdas as init_lambdas_orig,
-)
-from estimagic.optimization.subsolvers.gqtpar_fast import (
-    _compute_smallest_step_len_for_candidate_vector as compute_smallest_step_fast,
-)
-from estimagic.optimization.subsolvers.gqtpar_fast import (
-    _find_new_candidate_and_update_parameters as find_new_and_update_candidate_fast,
-)
-from estimagic.optimization.subsolvers.gqtpar_fast import (
-    _get_initial_guess_for_lambdas as init_lambdas_fast,
-)
-from estimagic.optimization.tranquilo.models import ScalarModel
-from numpy.testing import assert_array_almost_equal as aaae
-
-
-def test_get_initial_guess_for_lambda():
-    rng = np.random.default_rng(12345)
-    model_gradient = rng.random(10)
-    model_hessian = rng.random((10, 10))
-    model_hessian = model_hessian @ model_hessian.T
-    model = ScalarModel(
-        linear_terms=model_gradient, square_terms=model_hessian, intercept=None
-    )
-    res = init_lambdas_fast(model_gradient, model_hessian)
-    expected = init_lambdas_orig(model)
-    assert res[0] == expected.candidate
-    assert res[1] == expected.lower_bound
-    aaae(res[2], expected.upper_bound)
-
-
-def test_find_new_candidate_and_update_parameters():
-    rng = np.random.default_rng(12345)
-    model_gradient = rng.random(10)
-    model_hessian = rng.random((10, 10))
-    model_hessian = model_hessian @ model_hessian.T
-    model = ScalarModel(
-        linear_terms=model_gradient, square_terms=model_hessian, intercept=None
-    )
-    hessian_upper_triangular = np.triu(model_hessian)
-    candidate = 0.8
-    hessian_plus_lambda = model_hessian + candidate * np.eye(10)
-    lower_bound = 0.3
-    upper_bound = 1.3
-    criteria = {"k_easy": 0.1, "k_hard": 0.2}
-    converged = False
-    already_factorized = False
-    lambdas = DampingFactors(
-        candidate=candidate, lower_bound=lower_bound, upper_bound=upper_bound
-    )
-    hessian_info = HessianInfo(
-        hessian_plus_lambda=hessian_plus_lambda,
-        upper_triangular=hessian_upper_triangular,
-        already_factorized=already_factorized,
-    )
-    res = find_new_and_update_candidate_fast(
-        model_gradient,
-        model_hessian,
-        hessian_upper_triangular,
-        hessian_plus_lambda,
-        already_factorized,
-        candidate,
-        lower_bound,
-        upper_bound,
-        criteria,
-        converged,
-    )
-    expected = find_new_and_update_candidate_orig(
-        model, hessian_info, lambdas, criteria, converged
-    )
-    aaae(res[0], expected[0])
-    aaae(res[1], expected[1].hessian_plus_lambda)
-    aaae(res[2], expected[1].already_factorized)
-    aaae(res[3], expected[2].candidate)
-    aaae(res[4], expected[2].lower_bound)
-    aaae(res[5], expected[2].upper_bound)
-    assert res[6] == expected[3]
-
-
-def test_compute_smallest_step_len_for_candidate_vector():
-    rng = np.random.default_rng(12345)
-    x_candidate = rng.random(10)
-    rng = np.random.default_rng(45667)
-    z_min = rng.random(10)
-    res = compute_smallest_step_fast(x_candidate, z_min)
-    expected = compute_smallest_step_orig(x_candidate, z_min)
-    aaae(res, expected)
diff --git a/tests/optimization/subsolvers/test_gqtpar_lambdas.py b/tests/optimization/subsolvers/test_gqtpar_lambdas.py
deleted file mode 100644
index 606af5a98..000000000
--- a/tests/optimization/subsolvers/test_gqtpar_lambdas.py
+++ /dev/null
@@ -1,20 +0,0 @@
-import estimagic as em
-from estimagic.benchmarking.get_benchmark_problems import get_benchmark_problems
-
-
-def test_gqtpar_lambdas():
-    algo_options = {
-        "disable_convergence": True,
-        "stopping_max_iterations": 30,
-        "sample_filter": "keep_all",
-        "sampler": "random_hull",
-        "subsolver_options": {"k_hard": 0.001, "k_easy": 0.001},
-    }
-    problem_info = get_benchmark_problems("more_wild")["freudenstein_roth_good_start"]
-
-    em.minimize(
-        criterion=problem_info["inputs"]["criterion"],
-        params=problem_info["inputs"]["params"],
-        algo_options=algo_options,
-        algorithm="tranquilo",
-    )
diff --git a/tests/optimization/subsolvers/test_minimize_trust_region.py b/tests/optimization/subsolvers/test_minimize_trust_region.py
deleted file mode 100644
index b00941fa8..000000000
--- a/tests/optimization/subsolvers/test_minimize_trust_region.py
+++ /dev/null
@@ -1,484 +0,0 @@
-import numpy as np
-import pytest
-from estimagic.optimization.subsolvers._conjugate_gradient import (
-    _get_distance_to_trustregion_boundary as gdtb,
-)
-from estimagic.optimization.subsolvers._conjugate_gradient import (
-    _update_vectors_for_next_iteration as uvnr,
-)
-from estimagic.optimization.subsolvers._conjugate_gradient import (
-    minimize_trust_cg,
-)
-from estimagic.optimization.subsolvers._conjugate_gradient_fast import (
-    _get_distance_to_trustregion_boundary as gdtb_fast,
-)
-from estimagic.optimization.subsolvers._conjugate_gradient_fast import (
-    _update_vectors_for_next_iteration as uvnr_fast,
-)
-from estimagic.optimization.subsolvers._conjugate_gradient_fast import (
-    minimize_trust_cg_fast,
-)
-from estimagic.optimization.subsolvers._steihaug_toint import (
-    minimize_trust_stcg,
-)
-from estimagic.optimization.subsolvers._steihaug_toint_fast import (
-    minimize_trust_stcg_fast,
-)
-from estimagic.optimization.subsolvers._trsbox import (
-    _apply_bounds_to_candidate_vector,
-    minimize_trust_trsbox,
-)
-from estimagic.optimization.subsolvers._trsbox import (
-    _calc_greatest_criterion_reduction as greatest_reduction_orig,
-)
-from estimagic.optimization.subsolvers._trsbox import (
-    _calc_new_reduction as new_reduction_orig,
-)
-from estimagic.optimization.subsolvers._trsbox import (
-    _calc_upper_bound_on_tangent as upper_bound_tangent_orig,
-)
-from estimagic.optimization.subsolvers._trsbox import (
-    _compute_new_search_direction_and_norm as new_dir_and_norm_orig,
-)
-from estimagic.optimization.subsolvers._trsbox import (
-    _take_constrained_step_up_to_boundary as step_constrained_orig,
-)
-from estimagic.optimization.subsolvers._trsbox import (
-    _take_unconstrained_step_up_to_boundary as step_unconstrained_orig,
-)
-from estimagic.optimization.subsolvers._trsbox import (
-    _update_candidate_vectors_and_reduction as update_candidate_orig,
-)
-from estimagic.optimization.subsolvers._trsbox import (
-    _update_candidate_vectors_and_reduction_alt_step as update_candidate_alt_orig,
-)
-from estimagic.optimization.subsolvers._trsbox import (
-    _update_tangent as update_tanget_orig,
-)
-from estimagic.optimization.subsolvers._trsbox_fast import (
-    _calc_greatest_criterion_reduction as greatest_reduction_fast,
-)
-from estimagic.optimization.subsolvers._trsbox_fast import (
-    _calc_new_reduction as new_reduction_fast,
-)
-from estimagic.optimization.subsolvers._trsbox_fast import (
-    _calc_upper_bound_on_tangent as upper_bound_tangent_fast,
-)
-from estimagic.optimization.subsolvers._trsbox_fast import (
-    _compute_new_search_direction_and_norm as new_dir_and_norm_fast,
-)
-from estimagic.optimization.subsolvers._trsbox_fast import (
-    _perform_alternative_trustregion_step as perform_step_alt_fast,
-)
-from estimagic.optimization.subsolvers._trsbox_fast import (
-    _perform_alternative_trustregion_step as perform_step_alt_orig,
-)
-from estimagic.optimization.subsolvers._trsbox_fast import (
-    _take_constrained_step_up_to_boundary as step_constrained_fast,
-)
-from estimagic.optimization.subsolvers._trsbox_fast import (
-    _take_unconstrained_step_up_to_boundary as step_unconstrained_fast,
-)
-from estimagic.optimization.subsolvers._trsbox_fast import (
-    _update_candidate_vectors_and_reduction as update_candidate_fast,
-)
-from estimagic.optimization.subsolvers._trsbox_fast import (
-    _update_candidate_vectors_and_reduction_alt_step as update_candidate_alt_fast,
-)
-from estimagic.optimization.subsolvers._trsbox_fast import (
-    _update_tangent as update_tanget_fast,
-)
-from estimagic.optimization.subsolvers._trsbox_fast import (
-    minimize_trust_trsbox_fast,
-)
-from numpy.testing import assert_array_almost_equal as aaae
-from numpy.testing import assert_array_equal as aae
-
-
-def test_minimize_trust_cg():
-    grad = np.arange(5).astype(float)
-    hessian = np.arange(25).reshape(5, 5).astype(float)
-    radius = 2
-    gtol_abs = 1e-8
-    gtol_rel = 1e-6
-    aae(
-        minimize_trust_cg(grad, hessian, radius),
-        minimize_trust_cg_fast(grad, hessian, radius, gtol_abs, gtol_rel),
-    )
-
-
-def test_get_distance_to_trustregion_boundary():
-    x = np.arange(5).astype(float)
-    direction = np.arange(5).astype(float)
-    radius = 2
-    assert gdtb(x, direction, radius) == gdtb_fast(x, direction, radius)
-
-
-def test_update_vectors():
-    x = np.arange(5).astype(float)
-    residual = np.ones(5) * 0.5
-    direction = np.ones(5)
-    hessian = np.arange(25).reshape(5, 5)
-    alpha = 0.5
-    res_orig = uvnr(x, residual, direction, hessian, alpha)
-    res_fast = uvnr_fast(x, residual, direction, hessian, alpha)
-    for i in range(len(res_orig)):
-        aae(res_orig[i], res_fast[i])
-
-
-def test_take_unconstrained_step_towards_boundary():
-    raw_distance = np.array([0.5])
-    gradient_sumsq = 5.0
-    gradient_projected_sumsq = 2.5
-    g_x = 0.3
-    g_hess_g = -0.3
-    for i in range(2):
-        assert (
-            step_unconstrained_orig(
-                raw_distance, gradient_sumsq, gradient_projected_sumsq, g_x, g_hess_g
-            )[i]
-            == step_unconstrained_fast(
-                raw_distance, gradient_sumsq, gradient_projected_sumsq, g_x, g_hess_g
-            )[i]
-        )
-
-
-def test_take_constrained_step_towards_boundary():
-    x_candidate = np.zeros(5)
-    gradient_projected = np.ones(5)
-    step_len = 2.5
-    lower_bounds = np.array([-1.0] * 3 + [0.01] * 2)
-    upper_bounds = np.ones(5)
-    for i in range(2):
-        assert (
-            step_constrained_orig(
-                x_candidate, gradient_projected, step_len, lower_bounds, upper_bounds
-            )[i]
-            == step_constrained_fast(
-                x_candidate, gradient_projected, step_len, lower_bounds, upper_bounds
-            )[i]
-        )
-
-
-def test_update_candidate_vector_and_reduction_alt_step():
-    x = np.zeros(5)
-    search_direction = 0.5 * np.ones(5)
-    x_bounded = np.array([0] * 2 + [1] * 3)
-    g = np.ones(5)
-    cosine = 0.5
-    sine = 0.5
-    hessian_s = np.ones(5)
-    hes_red = np.ones(5)
-    res_orig = update_candidate_alt_orig(
-        x, search_direction, x_bounded, g, cosine, sine, hessian_s, hes_red
-    )
-
-    res_fast = update_candidate_alt_fast(
-        x, search_direction, x_bounded, g, cosine, sine, hessian_s, hes_red
-    )
-    for i in range(len(res_orig)):
-        aae(res_orig[i], res_fast[i])
-
-
-def test_update_candidate_vector_and_reduction():
-    x_candidate = np.zeros(5)
-    x_bounded = np.array([0] * 3 + [-0.01] * 2)
-    gradient_candidate = np.ones(5)
-    gradient_projected = 0.5 * np.ones(5)
-    step_len = 0.05
-    total_reduction = 0
-    curve_min = -0.5
-    index_bound_active = 3
-    gradient_projected_sumsq = 25
-    gradient_sumsq = 25
-    g_hess_g = 100
-    hess_g = np.arange(5).astype(float)
-    res_fast = update_candidate_fast(
-        x_candidate,
-        x_bounded,
-        gradient_candidate,
-        gradient_projected,
-        step_len,
-        total_reduction,
-        curve_min,
-        index_bound_active,
-        gradient_projected_sumsq,
-        gradient_sumsq,
-        g_hess_g,
-        hess_g,
-    )
-    res_orig = update_candidate_orig(
-        x_candidate,
-        x_bounded,
-        gradient_candidate,
-        gradient_projected,
-        step_len,
-        total_reduction,
-        curve_min,
-        index_bound_active,
-        gradient_projected_sumsq,
-        gradient_sumsq,
-        g_hess_g,
-        hess_g,
-    )
-    for i in range(len(res_orig)):
-        aae(res_orig[i], res_fast[i])
-
-
-def test_update_candidate_vector_and_reduction_without_active_bounds():
-    x_candidate = np.zeros(5)
-    x_bounded = np.zeros(5)
-    gradient_candidate = np.ones(5)
-    gradient_projected = 0.5 * np.ones(5)
-    step_len = 0.05
-    total_reduction = 0
-    curve_min = -0.5
-    gradient_projected_sumsq = 25
-    gradient_sumsq = 25
-    g_hess_g = 100
-    hess_g = np.arange(5).astype(float)
-    res_fast = update_candidate_fast(
-        x_candidate,
-        x_bounded,
-        gradient_candidate,
-        gradient_projected,
-        step_len,
-        total_reduction,
-        curve_min,
-        np.array([]),
-        gradient_projected_sumsq,
-        gradient_sumsq,
-        g_hess_g,
-        hess_g,
-    )
-    res_orig = update_candidate_orig(
-        x_candidate,
-        x_bounded,
-        gradient_candidate,
-        gradient_projected,
-        step_len,
-        total_reduction,
-        curve_min,
-        None,
-        gradient_projected_sumsq,
-        gradient_sumsq,
-        g_hess_g,
-        hess_g,
-    )
-    for i in range(len(res_orig)):
-        aae(res_orig[i], res_fast[i])
-
-
-@pytest.mark.slow()
-def test_perform_alternative_tr_step():
-    x_candidate = np.zeros(5)
-    x_bounded = np.array([0.1] * 2 + [0] * 3)
-    gradient_candidate = np.ones(5).astype(float)
-    model_hessian = np.arange(25).reshape(5, 5).astype(float)
-    lower_bounds = np.array([0.1] * 2 + [-1] * 3)
-    upper_bounds = np.ones(5)
-    n_fixed_variables = 1
-    total_reduction = 1.5
-    res_orig = perform_step_alt_orig(
-        x_candidate,
-        x_bounded,
-        gradient_candidate,
-        model_hessian,
-        lower_bounds,
-        upper_bounds,
-        n_fixed_variables,
-        total_reduction,
-    )
-    res_fast = perform_step_alt_fast(
-        x_candidate,
-        x_bounded,
-        gradient_candidate,
-        model_hessian,
-        lower_bounds,
-        upper_bounds,
-        n_fixed_variables,
-        total_reduction,
-    )
-    aae(res_orig, res_fast)
-
-
-@pytest.mark.slow()
-def test_perform_alternative_tr_step_without_active_bounds():
-    x_candidate = np.zeros(5)
-    x_bounded = np.zeros(5)
-    gradient_candidate = np.ones(5).astype(float)
-    model_hessian = np.arange(25).reshape(5, 5).astype(float)
-    lower_bounds = -10 * np.ones(5)
-    upper_bounds = 10 * np.ones(5)
-    n_fixed_variables = 1
-    total_reduction = 1.5
-    res_orig = perform_step_alt_orig(
-        x_candidate,
-        x_bounded,
-        gradient_candidate,
-        model_hessian,
-        lower_bounds,
-        upper_bounds,
-        n_fixed_variables,
-        total_reduction,
-    )
-    res_fast = perform_step_alt_fast(
-        x_candidate,
-        x_bounded,
-        gradient_candidate,
-        model_hessian,
-        lower_bounds,
-        upper_bounds,
-        n_fixed_variables,
-        total_reduction,
-    )
-    aae(res_orig, res_fast)
-
-
-def test_calc_upper_bound_on_tangent():
-    x_candidate = np.zeros(5)
-    search_direction = 0.5 * np.ones(5)
-    x_bounded = np.array([0] * 4 + [0.01])
-    lower_bounds = np.array([-1] * 4 + [0.01])
-    upper_bounds = np.ones(5)
-    n_fixed_variables = 2
-    res_orig = upper_bound_tangent_orig(
-        x_candidate,
-        search_direction,
-        x_bounded,
-        lower_bounds,
-        upper_bounds,
-        n_fixed_variables,
-    )
-    res_fast = upper_bound_tangent_fast(
-        x_candidate,
-        search_direction,
-        x_bounded,
-        lower_bounds,
-        upper_bounds,
-        n_fixed_variables,
-    )
-    for i in range(len(res_orig)):
-        aae(res_orig[i], res_fast[i])
-
-
-def test_calc_upper_bound_on_tangent_without_active_bounds():
-    x_candidate = np.zeros(5)
-    search_direction = 0.5 * np.ones(5)
-    x_bounded = np.zeros(5)
-    lower_bounds = -np.ones(5)
-    upper_bounds = np.ones(5)
-    n_fixed_variables = 2
-    res_orig = upper_bound_tangent_orig(
-        x_candidate,
-        search_direction,
-        x_bounded,
-        lower_bounds,
-        upper_bounds,
-        n_fixed_variables,
-    )
-    res_fast = upper_bound_tangent_fast(
-        x_candidate,
-        search_direction,
-        x_bounded,
-        lower_bounds,
-        upper_bounds,
-        n_fixed_variables,
-    )
-    for i in range(len(res_orig)):
-        if res_orig[i] is not None:
-            aae(res_orig[i], res_fast[i])
-        else:
-            assert res_fast[i].size == 0
-
-
-@pytest.mark.slow()
-def test_minimize_trs_box_quadratic():
-    model_gradient = np.arange(10).astype(float)
-    model_hessian = np.arange(100).reshape(10, 10).astype(float)
-    trustregion_radius = 10.0
-    lower_bounds = -np.ones(10)
-    upper_bounds = np.ones(10)
-    res_fast = minimize_trust_trsbox_fast(
-        model_gradient, model_hessian, trustregion_radius, lower_bounds, upper_bounds
-    )
-    res_orig = minimize_trust_trsbox(
-        model_gradient,
-        model_hessian,
-        trustregion_radius,
-        lower_bounds=lower_bounds,
-        upper_bounds=upper_bounds,
-    )
-    aae(res_fast, res_orig)
-
-
-@pytest.mark.slow()
-def test_minimize_stcg_fast():
-    model_gradient = np.arange(10).astype(float)
-    model_hessian = np.arange(100).reshape(10, 10).astype(float)
-    trustregion_radius = 10.0
-    res_orig = minimize_trust_stcg(model_gradient, model_hessian, trustregion_radius)
-    res_fast = minimize_trust_stcg_fast(
-        model_gradient, model_hessian, trustregion_radius
-    )
-    aaae(res_orig, res_fast)
-
-
-def test_minimize_cg():
-    model_gradient = np.arange(10).astype(float)
-    model_hessian = np.arange(100).reshape(10, 10).astype(float)
-    trustregion_radius = 10.0
-    gtol_abs = 1e-8
-    gtol_rel = 1e-6
-    res_orig = minimize_trust_cg(model_gradient, model_hessian, trustregion_radius)
-    res_fast = minimize_trust_cg_fast(
-        model_gradient, model_hessian, trustregion_radius, gtol_abs, gtol_rel
-    )
-    aaae(res_orig, res_fast)
-
-
-def test_apply_bounds_to_candidate_vector():
-    x_bounded = np.array([-1, 1, 0, 0, 0])
-    x_candidate = np.zeros(5)
-    lower_bounds = np.array([-1, -1, 0.01, -1, -1])
-    upper_bounds = np.array([1, 1, 1, -0.01, 1])
-    res = _apply_bounds_to_candidate_vector(
-        x_candidate, x_bounded, lower_bounds, upper_bounds
-    )
-    expected = np.array([-1, 1, 0.01, -0.01, 0])
-    aae(res, expected)
-
-
-def test_calc_greatest_criterion_reduction():
-    res = greatest_reduction_fast(0.8, 1.1, 1.1, 1.1, 1.1, 1.1)
-    expected = greatest_reduction_orig(0.8, 1.1, 1.1, 1.1, 1.1, 1.1)
-    assert res == expected
-
-
-def test_calc_new_reduction():
-    res = new_reduction_fast(0.8, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1)
-    expected = new_reduction_orig(0.8, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1)
-    assert res == expected
-
-
-def test_update_tangent():
-    res = update_tanget_fast(0, 0.8, 2, 2, 1, 3)
-    expected = update_tanget_orig(0, 0.8, 2, 2, 1, 3)
-    assert res == expected
-
-
-def test_compute_new_search_direction_and_norm():
-    x_candidate = np.zeros(5)
-    x_bounded = np.zeros(5)
-    gradient_candidate = np.ones(5)
-    x_reduced = 0.5
-    x_grad = 1
-    raw_reduction = 0.5
-    res = new_dir_and_norm_fast(
-        x_candidate, x_bounded, x_reduced, gradient_candidate, x_grad, raw_reduction
-    )
-    expected = new_dir_and_norm_orig(
-        x_candidate, x_bounded, x_reduced, gradient_candidate, x_grad, raw_reduction
-    )
-    aaae(expected[0], res[0])
-    aaae(expected[1], res[1])
diff --git a/tests/optimization/test_history_collection.py b/tests/optimization/test_history_collection.py
index 0354dfe61..57042825e 100644
--- a/tests/optimization/test_history_collection.py
+++ b/tests/optimization/test_history_collection.py
@@ -3,7 +3,7 @@
 import numpy as np
 import pytest
 from estimagic.logging.read_log import OptimizeLogReader
-from estimagic.optimization import AVAILABLE_ALGORITHMS
+from estimagic.algorithms import AVAILABLE_ALGORITHMS
 from estimagic.optimization.optimize import minimize
 from numpy.testing import assert_array_almost_equal as aaae
 from numpy.testing import assert_array_equal as aae
diff --git a/tests/optimization/test_many_algorithms.py b/tests/optimization/test_many_algorithms.py
index ff738982b..8ab1373c6 100644
--- a/tests/optimization/test_many_algorithms.py
+++ b/tests/optimization/test_many_algorithms.py
@@ -9,7 +9,7 @@
 
 import numpy as np
 import pytest
-from estimagic.optimization import AVAILABLE_ALGORITHMS, GLOBAL_ALGORITHMS
+from estimagic.algorithms import AVAILABLE_ALGORITHMS, GLOBAL_ALGORITHMS
 from estimagic.optimization.optimize import minimize
 from numpy.testing import assert_array_almost_equal as aaae
 
diff --git a/tests/optimization/test_quadratic_subsolvers.py b/tests/optimization/test_quadratic_subsolvers.py
index f541d4a82..c2d9f1fe2 100644
--- a/tests/optimization/test_quadratic_subsolvers.py
+++ b/tests/optimization/test_quadratic_subsolvers.py
@@ -12,9 +12,6 @@
 from estimagic.optimization.subsolvers.bntr import (
     bntr,
 )
-from estimagic.optimization.subsolvers.bntr_fast import (
-    bntr_fast,
-)
 from estimagic.optimization.subsolvers.gqtpar import (
     gqtpar,
 )
@@ -473,15 +470,7 @@ def test_bounded_newton_trustregion(
         x_candidate=np.zeros_like(x_expected),
         **options
     )
-    result_fast = bntr_fast(
-        main_model,
-        lower_bounds,
-        upper_bounds,
-        x_candidate=np.zeros_like(x_expected),
-        **options
-    )
     aaae(result["x"], x_expected, decimal=5)
-    aaae(result_fast["x"], x_expected, decimal=5)
 
 
 # ======================================================================================
diff --git a/tests/optimization/test_with_nonlinear_constraints.py b/tests/optimization/test_with_nonlinear_constraints.py
index 9aabfc233..284816864 100644
--- a/tests/optimization/test_with_nonlinear_constraints.py
+++ b/tests/optimization/test_with_nonlinear_constraints.py
@@ -5,7 +5,7 @@
 import pytest
 from estimagic import maximize, minimize
 from estimagic.config import IS_CYIPOPT_INSTALLED
-from estimagic.optimization import AVAILABLE_ALGORITHMS
+from estimagic.algorithms import AVAILABLE_ALGORITHMS
 from numpy.testing import assert_array_almost_equal as aaae
 
 NLC_ALGORITHMS = [
diff --git a/tests/optimization/tranquilo/test_acceptance_decision.py b/tests/optimization/tranquilo/test_acceptance_decision.py
deleted file mode 100644
index 57457be2f..000000000
--- a/tests/optimization/tranquilo/test_acceptance_decision.py
+++ /dev/null
@@ -1,140 +0,0 @@
-from collections import namedtuple
-
-import numpy as np
-import pytest
-from estimagic.optimization.tranquilo.acceptance_decision import (
-    _accept_simple,
-    _get_acceptance_result,
-    calculate_rho,
-)
-from estimagic.optimization.tranquilo.history import History
-from estimagic.optimization.tranquilo.region import Region
-from estimagic.optimization.tranquilo.solve_subproblem import SubproblemResult
-from numpy.testing import assert_array_equal
-
-# ======================================================================================
-# Fixtures
-# ======================================================================================
-
-
-@pytest.fixture()
-def subproblem_solution():
-    res = SubproblemResult(
-        x=1 + np.arange(2.0),
-        expected_improvement=1.0,
-        n_iterations=1,
-        success=True,
-        x_unit=None,
-        shape=None,
-    )
-    return res
-
-
-# ======================================================================================
-# Test accept_xxx
-# ======================================================================================
-
-
-trustregion = Region(center=np.zeros(2), radius=2.0)
-State = namedtuple("State", "x trustregion fval index")
-states = [  # we will parametrize over `states`
-    State(np.arange(2.0), trustregion, 0.25, 0),  # better than candidate
-    State(np.arange(2.0), trustregion, 1, 0),  # worse than candidate
-]
-
-
-@pytest.mark.parametrize("state", states)
-def test_accept_simple(
-    state,
-    subproblem_solution,
-):
-    history = History(functype="scalar")
-
-    idxs = history.add_xs(np.arange(10).reshape(5, 2))
-
-    history.add_evals(idxs.repeat(2), np.arange(10))
-
-    def wrapped_criterion(eval_info):
-        indices = np.array(list(eval_info)).repeat(np.array(list(eval_info.values())))
-        history.add_evals(indices, -indices)
-
-    res_got = _accept_simple(
-        subproblem_solution=subproblem_solution,
-        state=state,
-        history=history,
-        wrapped_criterion=wrapped_criterion,
-        min_improvement=0.0,
-        n_evals=2,
-    )
-
-    assert res_got.accepted
-    assert res_got.index == 5
-    assert res_got.candidate_index == 5
-    assert_array_equal(res_got.x, subproblem_solution.x)
-    assert_array_equal(res_got.candidate_x, 1.0 + np.arange(2))
-
-
-# ======================================================================================
-# Test _get_acceptance_result
-# ======================================================================================
-
-
-def test_get_acceptance_result():
-    candidate_x = 1 + np.arange(2)
-    candidate_fval = 0
-    candidate_index = 0
-    rho = 1
-    tr = Region(center=np.zeros(2), radius=2.0)
-    old_state = namedtuple("State", "x fval index trustregion")(np.arange(2), 1, 1, tr)
-
-    ar_when_accepted = _get_acceptance_result(
-        candidate_x=candidate_x,
-        candidate_fval=candidate_fval,
-        candidate_index=candidate_index,
-        rho=rho,
-        old_state=old_state,
-        is_accepted=True,
-    )
-
-    assert_array_equal(ar_when_accepted.x, candidate_x)
-    assert ar_when_accepted.fval == candidate_fval
-    assert ar_when_accepted.index == candidate_index
-    assert ar_when_accepted.accepted is True
-    assert ar_when_accepted.step_length == np.sqrt(2)
-    assert ar_when_accepted.relative_step_length == np.sqrt(2) / 2
-
-    ar_when_not_accepted = _get_acceptance_result(
-        candidate_x=candidate_x,
-        candidate_fval=candidate_fval,
-        candidate_index=candidate_index,
-        rho=rho,
-        old_state=old_state,
-        is_accepted=False,
-    )
-
-    assert_array_equal(ar_when_not_accepted.x, old_state.x)
-    assert ar_when_not_accepted.fval == old_state.fval
-    assert ar_when_not_accepted.index == old_state.index
-    assert ar_when_not_accepted.accepted is False
-    assert ar_when_not_accepted.step_length == 0
-    assert ar_when_not_accepted.relative_step_length == 0
-
-
-# ======================================================================================
-# Test calculate_rho
-# ======================================================================================
-
-
-CASES = [
-    (0, 0, -np.inf),
-    (-1, 0, -np.inf),
-    (1, 0, np.inf),
-    (0, 1, 0),
-    (1, 2, 1 / 2),
-]
-
-
-@pytest.mark.parametrize("actual_improvement, expected_improvement, expected", CASES)
-def test_calculate_rho(actual_improvement, expected_improvement, expected):
-    rho = calculate_rho(actual_improvement, expected_improvement)
-    assert rho == expected
diff --git a/tests/optimization/tranquilo/test_acceptance_sample_size.py b/tests/optimization/tranquilo/test_acceptance_sample_size.py
deleted file mode 100644
index 85f279c3e..000000000
--- a/tests/optimization/tranquilo/test_acceptance_sample_size.py
+++ /dev/null
@@ -1,86 +0,0 @@
-import pytest
-from estimagic.optimization.tranquilo.acceptance_sample_size import (
-    _compute_factor,
-    _get_optimal_sample_sizes,
-)
-from scipy.stats import norm
-
-TEST_CASES = [
-    (0.5, 0.5, 0.5, 0),
-    (1.0, norm.cdf(0.5), norm.sf(0.5), 1),
-    (2.0, norm.cdf(0.5), norm.sf(0.5), 1 / 4),
-]
-
-
-@pytest.mark.parametrize(
-    "minimal_effect_size, power_level, significance_level, expected_factor", TEST_CASES
-)
-def test_factor(minimal_effect_size, power_level, significance_level, expected_factor):
-    assert (
-        abs(
-            expected_factor
-            - _compute_factor(minimal_effect_size, power_level, significance_level)
-        )
-        < 1e-6
-    )
-
-
-@pytest.mark.parametrize("minimal_effect_size", [0.5, 1.0, 2.0])
-@pytest.mark.parametrize("power_level", [0.25, 0.5, 0.75])
-@pytest.mark.parametrize("significance_level", [0.01, 0.05, 0.1, 0.2])
-def test_bounds(minimal_effect_size, power_level, significance_level):
-    res = [
-        _get_optimal_sample_sizes(
-            sd_1=1,
-            sd_2=1,
-            existing_n1=_n1,
-            minimal_effect_size=minimal_effect_size,
-            power_level=power_level,
-            significance_level=significance_level,
-        )
-        for _n1 in (0, 10)
-    ]
-    # test that if both sample sizes are chosen optimally the overall number is smaller
-    assert sum(res[0]) <= sum(res[1]) + 10
-    # test that if there are existing samples in the first group, the second group
-    # can be smaller than if there are no existing samples in the first group
-    assert res[0][1] >= res[1][1]
-
-
-def test_standard_deviation_influence():
-    n1, n2 = _get_optimal_sample_sizes(
-        sd_1=1,
-        sd_2=3,
-        existing_n1=0,
-        minimal_effect_size=0.5,
-        power_level=0.5,
-        significance_level=0.2,
-    )
-    assert n1 < n2
-
-
-def test_inequality():
-    # Test that the inequality condition is satisfied
-    n1, n2 = _get_optimal_sample_sizes(
-        sd_1=1,
-        sd_2=2,
-        existing_n1=0,
-        minimal_effect_size=0.5,
-        power_level=0.5,
-        significance_level=0.2,
-    )
-    factor = _compute_factor(0.5, 0.5, 0.2)
-    lhs = (1 / n1 + 2 / n2) ** (-1)
-    assert lhs >= factor
-
-
-def test_first_group_is_not_sampled():
-    n1, _ = _get_optimal_sample_sizes(
-        sd_1=1,
-        sd_2=1,
-        existing_n1=10,
-        minimal_effect_size=0.5,
-        power_level=0.5,
-        significance_level=0.2,
-    )
-    assert n1 == 0
diff --git a/tests/optimization/tranquilo/test_adjust_radius.py b/tests/optimization/tranquilo/test_adjust_radius.py
deleted file mode 100644
index 472ac3aca..000000000
--- a/tests/optimization/tranquilo/test_adjust_radius.py
+++ /dev/null
@@ -1,104 +0,0 @@
-import numpy as np
-import pytest
-from estimagic.optimization.tranquilo.adjust_radius import adjust_radius
-from estimagic.optimization.tranquilo.options import RadiusOptions
-
-
-@pytest.fixture()
-def options():
-    return RadiusOptions(initial_radius=0.1)
-
-
-def test_increase(options):
-    calculated = adjust_radius(
-        radius=1,
-        rho=1.5,
-        step_length=np.linalg.norm(np.ones(2)),
-        options=options,
-    )
-
-    expected = 2
-
-    assert calculated == expected
-
-
-def test_increase_blocked_by_small_step(options):
-    calculated = adjust_radius(
-        radius=1,
-        rho=1.5,
-        step_length=np.linalg.norm(np.array([0.1, 0.1])),
-        options=options,
-    )
-
-    expected = 1
-
-    assert calculated == expected
-
-
-def test_decrease(options):
-    calculated = adjust_radius(
-        radius=1,
-        rho=0.05,
-        step_length=np.linalg.norm(np.ones(2)),
-        options=options,
-    )
-
-    expected = 0.5
-
-    assert calculated == expected
-
-
-def test_max_radius_is_not_violated(options):
-    calculated = adjust_radius(
-        radius=750_000,
-        rho=1.5,
-        step_length=np.linalg.norm(np.array([750_000])),
-        options=options,
-    )
-
-    expected = 1e6
-
-    assert calculated == expected
-
-
-def test_min_radius_is_not_violated(options):
-    calculated = adjust_radius(
-        radius=1e-09,
-        rho=0.05,
-        step_length=np.linalg.norm(np.ones(2)),
-        options=options,
-    )
-
-    expected = 1e-06
-
-    assert calculated == expected
-
-
-def test_constant_radius():
-    options = RadiusOptions(rho_increase=1.6, initial_radius=0.1)
-
-    calculated = adjust_radius(
-        radius=1,
-        rho=1.5,
-        step_length=np.linalg.norm(np.ones(2)),
-        options=options,
-    )
-
-    expected = 1
-
-    assert calculated == expected
-
-
-def test_max_radius_to_step_ratio_is_not_violated():
-    options = RadiusOptions(max_radius_to_step_ratio=2, initial_radius=0.1)
-
-    calculated = adjust_radius(
-        radius=1,
-        rho=1.5,
-        step_length=np.linalg.norm(np.array([0.75])),
-        options=options,
-    )
-
-    expected = 1.5
-
-    assert calculated == expected
diff --git a/tests/optimization/tranquilo/test_aggregate_models.py b/tests/optimization/tranquilo/test_aggregate_models.py
deleted file mode 100644
index 1e2f1ed1a..000000000
--- a/tests/optimization/tranquilo/test_aggregate_models.py
+++ /dev/null
@@ -1,79 +0,0 @@
-import numpy as np
-import pytest
-from estimagic.optimization.tranquilo.aggregate_models import (
-    aggregator_identity,
-    aggregator_information_equality_linear,
-    aggregator_least_squares_linear,
-    aggregator_sum,
-)
-from estimagic.optimization.tranquilo.models import ScalarModel, VectorModel
-from numpy.testing import assert_array_equal
-
-
-@pytest.mark.parametrize("square_terms", [np.arange(9).reshape(1, 3, 3), None])
-def test_aggregator_identity(square_terms):
-    vector_model = VectorModel(
-        intercepts=np.array([2.0]),
-        linear_terms=np.arange(3).reshape(1, 3),
-        square_terms=square_terms,
-    )
-
-    if square_terms is None:
-        expected_square_terms = np.zeros((3, 3))
-    else:
-        expected_square_terms = np.arange(9).reshape(3, 3)
-
-    got = ScalarModel(*aggregator_identity(vector_model))
-
-    assert_array_equal(got.intercept, 2.0)
-    assert_array_equal(got.linear_terms, np.arange(3))
-    assert_array_equal(got.square_terms, expected_square_terms)
-
-
-def test_aggregator_sum():
-    vector_model = VectorModel(
-        intercepts=np.array([1.0, 2.0]),
-        linear_terms=np.arange(6).reshape(2, 3),
-        square_terms=np.arange(18).reshape(2, 3, 3),
-    )
-
-    got = ScalarModel(*aggregator_sum(vector_model))
-
-    assert_array_equal(got.intercept, 3.0)
-    assert_array_equal(got.linear_terms, np.array([3, 5, 7]))
-    assert_array_equal(
-        got.square_terms, np.array([[9, 11, 13], [15, 17, 19], [21, 23, 25]])
-    )
-
-
-def test_aggregator_least_squares_linear():
-    vector_model = VectorModel(
-        intercepts=np.array([0, 2]),
-        linear_terms=np.arange(6).reshape(2, 3),
-        square_terms=np.arange(18).reshape(2, 3, 3),  # should not be used by aggregator
-    )
-
-    got = ScalarModel(*aggregator_least_squares_linear(vector_model))
-
-    assert_array_equal(got.intercept, 4.0)
-    assert_array_equal(got.linear_terms, np.array([12, 16, 20]))
-    assert_array_equal(
-        got.square_terms, np.array([[18, 24, 30], [24, 34, 44], [30, 44, 58]])
-    )
-
-
-def test_aggregator_information_equality_linear():
-    vector_model = VectorModel(
-        intercepts=np.array([1.0, 2.0]),
-        linear_terms=np.arange(6).reshape(2, 3),
-        square_terms=np.arange(18).reshape(2, 3, 3),  # should not be used by aggregator
-    )
-
-    got = ScalarModel(*aggregator_information_equality_linear(vector_model))
-
-    assert_array_equal(got.intercept, 3.0)
-    assert_array_equal(got.linear_terms, np.array([3, 5, 7]))
-    assert_array_equal(
-        got.square_terms,
-        np.array([[-4.5, -6.0, -7.5], [-6.0, -8.5, -11.0], [-7.5, -11.0, -14.5]]),
-    )
diff --git a/tests/optimization/tranquilo/test_bounds.py b/tests/optimization/tranquilo/test_bounds.py
deleted file mode 100644
index fec2dbbf6..000000000
--- a/tests/optimization/tranquilo/test_bounds.py
+++ /dev/null
@@ -1,38 +0,0 @@
-import numpy as np
-import pytest
-from estimagic.optimization.tranquilo.bounds import Bounds, _any_finite
-
-CASES = [
-    (np.array([1, 2]), np.array([5, 6]), True),
-    (np.array([1, 2]), None, True),
-    (None, np.array([5, 6]), True),
-    (None, None, False),
-    (np.array([np.inf, np.inf]), np.array([np.inf, np.inf]), False),
-    (np.array([-np.inf, -np.inf]), np.array([np.inf, np.inf]), False),
-    (np.array([1, 2]), np.array([np.inf, np.inf]), True),
-]
-
-
-@pytest.mark.parametrize("lb, ub, exp", CASES)
-def test_any_finite_true(lb, ub, exp):
-    out = _any_finite(lb, ub)
-    assert out is exp
-
-
-def test_bounds_none():
-    bounds = Bounds(lower=None, upper=None)
-    assert bounds.has_any is False
-
-
-def test_bounds_inifinite():
-    lb = np.array([np.inf, np.inf])
-    ub = np.array([np.inf, np.inf])
-    bounds = Bounds(lower=lb, upper=ub)
-    assert bounds.has_any is False
-
-
-def test_bounds_finite():
-    lb = np.array([1, 2])
-    ub = np.array([5, 6])
-    bounds = Bounds(lower=lb, upper=ub)
-    assert bounds.has_any is True
diff --git a/tests/optimization/tranquilo/test_clustering.py b/tests/optimization/tranquilo/test_clustering.py
deleted file mode 100644
index 8cf2d588b..000000000
--- a/tests/optimization/tranquilo/test_clustering.py
+++ /dev/null
@@ -1,34 +0,0 @@
-import numpy as np
-from estimagic.optimization.tranquilo.clustering import cluster
-from numpy.testing import assert_array_equal as aae
-
-
-def test_cluster_lollipop():
-    rng = np.random.default_rng(123456)
-    center = np.array([0.25, 0.25])
-    radius = 0.05
-
-    x = np.array(
-        [
-            center,
-            *(center + rng.uniform(low=-radius, high=radius, size=(6, 2))).tolist(),
-            [0.5, 0.5],
-            [0.75, 0.75],
-        ],
-    )
-
-    clusters, centers = cluster(x, epsilon=0.1)
-    assert len(centers) == 3
-    aae(np.unique(clusters), np.arange(3))
-
-
-def test_cluster_grid():
-    base_grid = np.linspace(-1, 1, 11)
-    a, b = np.meshgrid(base_grid, base_grid)
-    x = np.column_stack([a.flatten(), b.flatten()])
-
-    clusters, centers = cluster(x, epsilon=0.1)
-
-    assert len(centers) == len(x)
-    aae(np.sort(clusters), np.arange(len(x)))
-    aae(np.sort(centers), np.arange(len(x)))
diff --git a/tests/optimization/tranquilo/test_estimate_variance.py b/tests/optimization/tranquilo/test_estimate_variance.py
deleted file mode 100644
index 8e7cd1793..000000000
--- a/tests/optimization/tranquilo/test_estimate_variance.py
+++ /dev/null
@@ -1,44 +0,0 @@
-import numpy as np
-import pytest
-from estimagic.optimization.tranquilo.estimate_variance import (
-    _estimate_variance_classic,
-)
-from estimagic.optimization.tranquilo.history import History
-from estimagic.optimization.tranquilo.tranquilo import Region
-from numpy.testing import assert_array_almost_equal as aaae
-
-
-@pytest.mark.parametrize("model_type", ["scalar", "vector"])
-def test_estimate_variance_classic(model_type):
-    xs = np.array(
-        [
-            [0.0, 0.0],  # center with multiple evaluations
-            [10, -10],  # far away with multiple evaluations
-            [0.1, 0.1],  # close to center with too few evaluations
-        ]
-    )
-
-    history = History(functype="scalar")
-    idxs = history.add_xs(xs)
-
-    repetitions = np.array([5, 5, 2])
-
-    # squaring makes sure variance is not the same across all subsamples
-    evals = np.arange(12) ** 2
-
-    history.add_evals(idxs.repeat(repetitions), evals)
-
-    got = _estimate_variance_classic(
-        trustregion=Region(center=np.array([0.0, 0.0]), radius=1.0),
-        history=history,
-        model_type=model_type,
-        max_distance_factor=1.0,
-        min_n_evals=4,
-    )
-
-    if model_type == "scalar":
-        expected = np.var(evals[:5], ddof=1)
-    else:
-        expected = np.var(evals[:5], ddof=1).reshape(1, 1)
-
-    aaae(got, expected)
diff --git a/tests/optimization/tranquilo/test_filter_points.py b/tests/optimization/tranquilo/test_filter_points.py
deleted file mode 100644
index 97ded5a11..000000000
--- a/tests/optimization/tranquilo/test_filter_points.py
+++ /dev/null
@@ -1,48 +0,0 @@
-from estimagic.optimization.tranquilo.filter_points import get_sample_filter
-from estimagic.optimization.tranquilo.tranquilo import State
-from estimagic.optimization.tranquilo.region import Region
-from numpy.testing import assert_array_equal as aae
-import pytest
-import numpy as np
-
-
-@pytest.fixture()
-def state():
-    out = State(
-        trustregion=Region(center=np.ones(2), radius=0.3),
-        model_indices=None,
-        model=None,
-        vector_model=None,
-        candidate_index=5,
-        candidate_x=np.array([1.1, 1.2]),
-        index=2,
-        x=np.ones(2),
-        fval=15,
-        rho=None,
-        accepted=True,
-        old_indices_used=None,
-        old_indices_discarded=None,
-        new_indices=None,
-        step_length=0.1,
-        relative_step_length=0.1 / 0.3,
-    )
-    return out
-
-
-def test_discard_all(state):
-    filter = get_sample_filter("discard_all")
-    xs = np.arange(10).reshape(5, 2)
-    indices = np.arange(5)
-    got_xs, got_idxs = filter(xs=xs, indices=indices, state=state)
-    expected_xs = np.ones((1, 2))
-    aae(got_xs, expected_xs)
-    aae(got_idxs, np.array([2]))
-
-
-def test_keep_all():
-    filter = get_sample_filter("keep_all")
-    xs = np.arange(10).reshape(5, 2)
-    indices = np.arange(5)
-    got_xs, got_idxs = filter(xs=xs, indices=indices, state=None)
-    aae(got_xs, xs)
-    aae(got_idxs, indices)
diff --git a/tests/optimization/tranquilo/test_fit_models.py b/tests/optimization/tranquilo/test_fit_models.py
deleted file mode 100644
index de5b9f065..000000000
--- a/tests/optimization/tranquilo/test_fit_models.py
+++ /dev/null
@@ -1,145 +0,0 @@
-import numpy as np
-import pytest
-from estimagic import first_derivative, second_derivative
-from estimagic.optimization.tranquilo.fit_models import _quadratic_features, get_fitter
-from estimagic.optimization.tranquilo.region import Region
-from numpy.testing import assert_array_almost_equal, assert_array_equal
-
-
-def aaae(x, y, decimal=None, case=None):
-    tolerance = {
-        "hessian": 2,
-        "gradient": 3,
-    }
-    decimal = decimal or tolerance.get(case, None)
-    assert_array_almost_equal(x, y, decimal=decimal)
-
-
-# ======================================================================================
-# Fixtures
-# ======================================================================================
-
-
-@pytest.fixture()
-def quadratic_case():
-    """Test scenario with true quadratic function.
-
-    We return true function, and function evaluations and data on random points.
-
-    """
-    n_params = 4
-    n_samples = 15
-
-    # theoretical terms
-    linear_terms = 1 + np.arange(n_params)
-    square_terms = np.arange(n_params**2).reshape(n_params, n_params)
-    square_terms = square_terms + square_terms.T
-
-    def func(x):
-        y = -10 + linear_terms @ x + 0.5 * x.T @ square_terms @ x
-        return y
-
-    x0 = np.ones(n_params)
-
-    # random data
-    rng = np.random.default_rng(56789)
-    x = np.array([x0 + rng.uniform(-0.01 * x0, 0.01 * x0) for _ in range(n_samples)])
-    y = np.array([func(_x) for _x in list(x)]).reshape(-1, 1)
-
-    out = {
-        "func": func,
-        "x0": x0,
-        "x": x,
-        "y": y,
-        "linear_terms_expected": linear_terms,
-        "square_terms_expected": square_terms,
-    }
-    return out
-
-
-# ======================================================================================
-# Tests
-# ======================================================================================
-
-
-@pytest.mark.parametrize("fitter", ["ols", "ridge", "powell", "tranquilo"])
-def test_fit_against_truth_quadratic(fitter, quadratic_case):
-    options = {"l2_penalty_square": 0}
-    fit_pounders = get_fitter(
-        fitter,
-        options,
-        model_type="quadratic",
-        residualize=False,
-        infinity_handling="relative",
-    )
-    got = fit_pounders(
-        quadratic_case["x"],
-        quadratic_case["y"],
-        region=Region(center=np.zeros(4), radius=1.0),
-        old_model=None,
-    )
-    decimal = 3 if fitter != "ridge" else 1
-    aaae(
-        got.linear_terms.flatten(),
-        quadratic_case["linear_terms_expected"],
-        decimal=decimal,
-    )
-    aaae(
-        got.square_terms.reshape((4, 4)),
-        quadratic_case["square_terms_expected"],
-        decimal=decimal,
-    )
-
-
-@pytest.mark.parametrize("model", ["ols", "ridge", "tranquilo"])
-def test_fit_ols_against_gradient(model, quadratic_case):
-    options = {"l2_penalty_square": 0}
-    fit_ols = get_fitter(
-        model,
-        options,
-        model_type="quadratic",
-        residualize=False,
-        infinity_handling="relative",
-    )
-    got = fit_ols(
-        quadratic_case["x"],
-        quadratic_case["y"],
-        region=Region(center=np.zeros(4), radius=1.0),
-        old_model=None,
-    )
-
-    a = got.linear_terms.flatten()
-    hess = got.square_terms.reshape((4, 4))
-    grad = a + hess @ quadratic_case["x0"]
-
-    gradient = first_derivative(quadratic_case["func"], quadratic_case["x0"])
-    aaae(gradient["derivative"], grad, case="gradient")
-
-
-@pytest.mark.parametrize("model", ("ols", "ridge", "tranquilo", "powell"))
-def test_fit_ols_against_hessian(model, quadratic_case):
-    options = {"l2_penalty_square": 0}
-    fit_ols = get_fitter(
-        model,
-        options,
-        model_type="quadratic",
-        residualize=False,
-        infinity_handling="relative",
-    )
-    got = fit_ols(
-        quadratic_case["x"],
-        quadratic_case["y"],
-        region=Region(center=np.zeros(4), radius=1.0),
-        old_model=None,
-    )
-    hessian = second_derivative(quadratic_case["func"], quadratic_case["x0"])
-    hess = got.square_terms.reshape((4, 4))
-    aaae(hessian["derivative"], hess, case="hessian")
-
-
-def test_quadratic_features():
-    x = np.array([[0, 1, 2], [3, 4, 5]])
-
-    expected = np.array([[0, 1, 2, 0, 0, 0, 1, 2, 4], [3, 4, 5, 9, 12, 15, 16, 20, 25]])
-    got = _quadratic_features(x)
-    assert_array_equal(got, expected)
diff --git a/tests/optimization/tranquilo/test_get_component.py b/tests/optimization/tranquilo/test_get_component.py
deleted file mode 100644
index 08ebf7263..000000000
--- a/tests/optimization/tranquilo/test_get_component.py
+++ /dev/null
@@ -1,170 +0,0 @@
-import pytest
-from collections import namedtuple
-from estimagic.optimization.tranquilo.get_component import (
-    _add_redundant_argument_handling,
-    _fail_if_mandatory_argument_is_missing,
-    _get_function_and_name,
-    _get_valid_options,
-    get_component,
-)
-
-
-@pytest.fixture()
-def func_dict():
-    out = {
-        "f": lambda x: x,
-        "g": lambda x, y: x + y,
-    }
-    return out
-
-
-@pytest.fixture
-def default_options():
-    options = namedtuple("default_options", "x y")
-    return options(x=1, y=1)
-
-
-def test_get_component(func_dict, default_options):
-    got = get_component(
-        name_or_func="g",
-        component_name="component",
-        func_dict=func_dict,
-        default_options=default_options,
-        user_options={"y": 2},
-        redundant_option_handling="ignore",
-        redundant_argument_handling="ignore",
-        mandatory_signature=["x"],
-    )
-
-    assert got() == 3
-    assert got(bla=15) == 3
-
-
-def test_get_function_and_name_valid_string(func_dict):
-    _func, _name = _get_function_and_name(
-        name_or_func="f",
-        component_name="component",
-        func_dict=func_dict,
-    )
-    assert _func == func_dict["f"]
-    assert _name == "f"
-
-
-def test_get_function_and_name_invalid_string():
-    with pytest.raises(ValueError, match="If component is a string, it must be one of"):
-        _get_function_and_name(
-            name_or_func="h",
-            component_name="component",
-            func_dict={"f": lambda x: x, "g": lambda x, y: x + y},
-        )
-
-
-def test_get_function_and_name_valid_function():
-    def _f(x):
-        return x
-
-    _func, _name = _get_function_and_name(
-        name_or_func=_f,
-        component_name="component",
-        func_dict=None,
-    )
-    assert _func == _f
-    assert _name == "_f"
-
-
-def test_get_function_and_string_wrong_type():
-    with pytest.raises(TypeError, match="name_or_func must be a string or a callable."):
-        _get_function_and_name(
-            name_or_func=1,
-            component_name="component",
-            func_dict=None,
-        )
-
-
-def test_get_valid_options_ignore(default_options):
-    got = _get_valid_options(
-        default_options=default_options,
-        user_options={"x": 3, "y": 4},
-        signature=["x", "y"],
-        name="bla",
-        component_name="component",
-        redundant_option_handling="ignore",
-    )
-    expected = {"x": 3, "y": 4}
-
-    assert got == expected
-
-
-def test_get_valid_options_raise_update_option_bundle(default_options):
-    # provokes error in update_option_bundle
-    with pytest.raises(ValueError, match="The following user options are not valid"):
-        _get_valid_options(
-            default_options=default_options,
-            user_options={"x": 3, "z": 4},
-            signature=["x", "y"],
-            name="bla",
-            component_name="component",
-            redundant_option_handling="raise",
-        )
-
-
-def test_get_valid_options_raise(default_options):
-    with pytest.raises(ValueError, match="The following options are not supported"):
-        _get_valid_options(
-            default_options=default_options,
-            user_options={"y": 3},
-            signature=["x"],
-            name="bla",
-            component_name="component",
-            redundant_option_handling="raise",
-        )
-
-
-def test_get_valid_options_warn(default_options):
-    with pytest.warns(UserWarning, match="The following options are not supported"):
-        _get_valid_options(
-            default_options=default_options,
-            user_options={"y": 3},
-            signature=["x"],
-            name="bla",
-            component_name="component",
-            redundant_option_handling="warn",
-        )
-
-
-def test_fail_if_mandatory_argument_is_missing():
-    with pytest.raises(
-        ValueError, match="The following mandatory arguments are missing"
-    ):
-        _fail_if_mandatory_argument_is_missing(
-            mandatory_arguments=["a", "c"],
-            signature=["a", "b"],
-            name="bla",
-            component_name="component",
-        )
-
-
-def test_add_redundant_argument_handling_ignore():
-    def f(a, b):
-        return a + b
-
-    _f = _add_redundant_argument_handling(
-        func=f,
-        signature=["a", "b"],
-        warn=False,
-    )
-
-    assert _f(1, b=2, c=3) == 3
-
-
-def test_add_redundant_argument_handling_warn():
-    def f(a, b):
-        return a + b
-
-    _f = _add_redundant_argument_handling(
-        func=f,
-        signature=["a", "b"],
-        warn=True,
-    )
-    with pytest.warns(UserWarning, match="The following arguments are not supported"):
-        _f(1, b=2, c=3)
diff --git a/tests/optimization/tranquilo/test_handle_infinity.py b/tests/optimization/tranquilo/test_handle_infinity.py
deleted file mode 100644
index 6030b9a0f..000000000
--- a/tests/optimization/tranquilo/test_handle_infinity.py
+++ /dev/null
@@ -1,15 +0,0 @@
-import numpy as np
-from estimagic.optimization.tranquilo.handle_infinity import get_infinity_handler
-from numpy.testing import assert_array_almost_equal as aaae
-
-
-def test_clip_relative():
-    func = get_infinity_handler("relative")
-
-    fvecs = np.array([[1, np.inf, 3, 1], [-np.inf, 0, 1, 2], [-1, 5, 6, 3]])
-
-    got = func(fvecs)
-
-    expected = np.array([[1, 16, 3, 1], [-6, 0, 1, 2], [-1, 5, 6, 3]])
-
-    aaae(got, expected)
diff --git a/tests/optimization/tranquilo/test_history.py b/tests/optimization/tranquilo/test_history.py
deleted file mode 100644
index bf2410df5..000000000
--- a/tests/optimization/tranquilo/test_history.py
+++ /dev/null
@@ -1,230 +0,0 @@
-"""Test the history class for least-squares optimizers."""
-import numpy as np
-import pytest
-from estimagic.optimization.tranquilo.history import History
-from estimagic.optimization.tranquilo.region import Region
-from numpy.testing import assert_array_almost_equal as aaae
-
-
-XS = [
-    np.arange(3),
-    np.arange(3).tolist(),
-    np.arange(3).reshape(1, 3),
-    np.arange(3).reshape(1, 3).tolist(),
-]
-
-
-@pytest.mark.parametrize("xs", XS)
-def test_add_xs_not_initialized(xs):
-    history = History(functype="least_squares")
-
-    new_indices = history.add_xs(xs)
-
-    if len(xs) == 1:
-        aaae(new_indices, np.array([0]))
-    else:
-        assert new_indices == 0
-
-    assert isinstance(history.xs, np.ndarray)
-    aaae(history.xs[0], np.arange(3))
-
-    assert history.index_mapper == {0: []}
-    assert history.n_xs == 1
-    assert history.n_fun == 0
-
-
-@pytest.mark.parametrize("xs", XS)
-def test_add_xs_initialized_with_space(xs):
-    history = History(functype="least_squares")
-
-    history.add_xs(np.ones((20, 3)))
-    new_indices = history.add_xs(xs)
-
-    if len(xs) == 1:
-        aaae(new_indices, np.array([20]))
-    else:
-        assert new_indices == 20
-
-    assert isinstance(history.xs, np.ndarray)
-    aaae(history.xs[:21], np.vstack([np.ones((20, 3)), np.arange(3)]))
-
-    assert history.index_mapper == {i: [] for i in range(21)}
-    assert history.n_xs == 21
-    assert history.n_fun == 0
-
-
-@pytest.mark.parametrize("xs", XS)
-def test_add_xs_initialized_extension_needed(xs):
-    history = History(functype="least_squares")
-
-    history.add_xs(np.ones(3))
-    initial_size = len(history.xs)
-    history.add_xs(np.ones((initial_size - 1, 3)))
-    history.add_xs(xs)
-
-    assert len(history.xs) > initial_size
-
-    aaae(history.xs[initial_size], np.arange(3))
-
-    assert history.n_xs == initial_size + 1
-    assert history.n_fun == 0
-
-
-EVALS = [
-    (0, np.arange(5)),
-    ([0], [np.arange(5)]),
-    (np.array([0]), np.arange(5).reshape(1, 5)),
-]
-
-
-@pytest.mark.parametrize("x_indices, evals", EVALS)
-def test_add_evals_not_initialized(x_indices, evals):
-    history = History(functype="least_squares")
-    history.add_xs(np.arange(3))
-
-    history.add_evals(x_indices, evals)
-
-    assert history.get_n_fun() == 1
-    assert history.get_n_xs() == 1
-
-    aaae(history.fvecs[0], np.arange(5))
-    aaae(history.fvals[0], 30.0)
-
-    assert history.index_mapper == {0: [0]}
-
-
-@pytest.mark.parametrize("evals", [tup[1] for tup in EVALS])
-def test_add_evals_initialized_with_space(evals):
-    history = History(functype="least_squares")
-    history.add_xs(np.arange(6).reshape(2, 3))
-    history.add_evals([0] * 20, np.ones((20, 5)))
-
-    history.add_evals(1, evals)
-
-    assert history.get_n_fun() == 21
-    assert history.get_n_xs() == 2
-
-    aaae(history.fvecs[:21], np.vstack([np.ones((20, 5)), np.arange(5)]))
-    aaae(history.fvals[20], 30.0)
-
-    assert history.index_mapper == {0: list(range(20)), 1: [20]}
-
-
-def test_get_indices_in_trustregion():
-    history = History(functype="least_squares")
-    xs = [[1, 1], [1.1, 1.2], [1.5, 1], [0.9, 0.9]]
-    fvecs = np.zeros((4, 3))
-    indices = history.add_xs(xs)
-    history.add_evals(indices, fvecs)
-
-    trustregion = Region(
-        center=np.ones(2),
-        radius=0.3,
-    )
-
-    indices = history.get_x_indices_in_region(trustregion)
-
-    aaae(indices, np.array([0, 1, 3]))
-
-
-@pytest.fixture()
-def history():
-    history = History(functype="least_squares")
-    xs = np.arange(15).reshape(5, 3)
-    fvecs = np.arange(25).reshape(5, 5)
-    indices = history.add_xs(xs)
-    history.add_evals(indices, fvecs)
-    return history
-
-
-def test_get_xs_no_indices(history):
-    xs = history.get_xs()
-    aaae(xs, np.arange(15).reshape(5, 3))
-
-
-def test_get_xs_with_indices(history):
-    xs = history.get_xs([0, 2, 4])
-    aaae(xs, np.arange(15).reshape(5, 3)[[0, 2, 4]])
-
-
-def test_get_xs_scalar_index(history):
-    xs = history.get_xs(0)
-    aaae(xs, np.arange(3))
-
-
-def test_add_eval_for_invalid_x(history):
-    with pytest.raises(ValueError):
-        history.add_evals(5, np.arange(5))
-
-
-def test_get_fvecs_scalar_index(history):
-    fvecs = history.get_fvecs(0)
-    aaae(fvecs, np.arange(5).reshape(1, 5))
-
-
-def test_get_fvecs_with_indices(history):
-    fvecs = history.get_fvecs([0])
-    assert isinstance(fvecs, dict)
-    assert len(fvecs) == 1
-    assert 0 in fvecs
-    aaae(fvecs[0], np.arange(5).reshape(1, 5))
-
-
-def test_get_fvals_scalar_index(history):
-    fvals = history.get_fvals(0)
-    aaae(fvals, 30.0)
-
-
-def test_get_fvals_with_indices(history):
-    fvals = history.get_fvals([0])
-    assert isinstance(fvals, dict)
-    assert len(fvals) == 1
-    assert 0 in fvals
-    aaae(fvals[0], 30.0)
-
-
-@pytest.mark.parametrize("average", [True, False])
-def test_get_model_data_trivial_averaging(history, average):
-    got_xs, got_fvecs = history.get_model_data(
-        x_indices=[0, 1],
-        average=average,
-    )
-
-    aaae(got_xs, np.arange(6).reshape(2, 3))
-    aaae(got_fvecs, np.arange(10).reshape(2, 5))
-
-
-def test_get_model_data_no_averaging(history):
-    got_xs, got_fvecs = history.get_model_data(x_indices=[0, 1])
-    aaae(got_xs, np.arange(6).reshape(2, 3))
-    aaae(got_fvecs, np.arange(10).reshape(2, 5))
-
-
-@pytest.fixture()
-def noisy_history():
-    history = History(functype="least_squares")
-    history.add_xs(np.arange(6).reshape(2, 3))
-    fvecs = np.arange(25).reshape(5, 5)
-    history.add_evals([0, 0, 1, 1, 1], fvecs)
-    return history
-
-
-@pytest.mark.parametrize("average", [True, False])
-def test_get_model_data_with_repeated_evaluations(noisy_history, average):
-    got_xs, got_fvecs = noisy_history.get_model_data(
-        x_indices=[0, 1],
-        average=average,
-    )
-
-    if average:
-        aaae(got_xs, np.arange(6).reshape(2, 3))
-        expected_fvecs = np.array(
-            [
-                np.arange(10).reshape(2, 5).mean(axis=0),
-                np.arange(10, 25).reshape(3, 5).mean(axis=0),
-            ]
-        )
-        aaae(got_fvecs, expected_fvecs)
-    else:
-        aaae(got_xs, np.arange(6).reshape(2, 3).repeat([2, 3], axis=0))
-        aaae(got_fvecs, np.arange(25).reshape(5, 5))
diff --git a/tests/optimization/tranquilo/test_models.py b/tests/optimization/tranquilo/test_models.py
deleted file mode 100644
index cffffa93e..000000000
--- a/tests/optimization/tranquilo/test_models.py
+++ /dev/null
@@ -1,190 +0,0 @@
-import numpy as np
-import pytest
-from estimagic.optimization.tranquilo.region import Region
-from estimagic.optimization.tranquilo.models import (
-    ScalarModel,
-    VectorModel,
-    _predict_scalar,
-    _predict_vector,
-    add_models,
-    is_second_order_model,
-    move_model,
-    n_free_params,
-    n_interactions,
-    n_second_order_terms,
-)
-from numpy.testing import assert_array_almost_equal as aaae
-from numpy.testing import assert_array_equal
-
-
-def test_predict_scalar():
-    model = ScalarModel(
-        intercept=1.0,
-        linear_terms=np.arange(2),
-        square_terms=(np.arange(4) + 1).reshape(2, 2),
-    )
-    x = np.array([[0, 0], [0, 1], [1, 0], [1, 2]])
-    exp = np.array([1, 4, 1.5, 16.5])
-    got = _predict_scalar(model, x)
-    assert_array_equal(exp, got)
-
-
-def test_predict_vector():
-    model = VectorModel(
-        intercepts=1 + np.arange(3),
-        linear_terms=np.arange(6).reshape(3, 2),
-        square_terms=(np.arange(3 * 2 * 2) + 1).reshape(3, 2, 2),
-    )
-    x = np.array([[0, 0], [0, 1], [1, 0], [1, 2]], dtype=float)
-    exp = np.array(
-        [
-            [1, 4, 1.5, 16.5],
-            [2, 9, 6.5, 41.5],
-            [3, 14, 11.5, 66.5],
-        ]
-    ).T
-    got = _predict_vector(model, x)
-    assert_array_equal(exp, got)
-
-
-def test_n_free_params_name_quadratic():
-    assert n_free_params(dim=2, model_type="quadratic") == 1 + 2 + 3
-    assert n_free_params(dim=3, model_type="quadratic") == 1 + 3 + 6
-    assert n_free_params(dim=9, model_type="quadratic") == 1 + 9 + 45
-
-
-def test_n_free_params_name_invalid():
-    with pytest.raises(ValueError):
-        assert n_free_params(dim=3, model_type="invalid")
-
-
-@pytest.mark.parametrize("dim", [2, 3, 9])
-def test_n_free_params_info_linear(dim):
-    assert n_free_params(dim, model_type="linear") == 1 + dim
-
-
-@pytest.mark.parametrize("dim", [2, 3, 9])
-def test_n_free_params_info_quadratic(dim):
-    assert n_free_params(dim, model_type="quadratic") == 1 + dim + n_second_order_terms(
-        dim
-    )
-
-
-def test_n_free_params_invalid():
-    model = ScalarModel(intercept=1.0, linear_terms=np.ones(1), square_terms=np.ones(1))
-    with pytest.raises(ValueError):
-        n_free_params(dim=1, model_type=model)
-
-
-def test_n_second_order_terms():
-    assert n_second_order_terms(3) == 6
-
-
-def test_n_interactions():
-    assert n_interactions(3) == 3
-
-
-@pytest.mark.parametrize("model_type", ("linear", "quadratic"))
-def test_is_second_order_model_type(model_type):
-    assert is_second_order_model(model_type) == (model_type == "quadratic")
-
-
-def test_is_second_order_model_model():
-    model = ScalarModel(intercept=1.0, linear_terms=np.ones(1))
-    assert is_second_order_model(model) is False
-
-    model = ScalarModel(intercept=1.0, linear_terms=np.ones(1), square_terms=np.ones(1))
-    assert is_second_order_model(model) is True
-
-
-def test_is_second_order_model_invalid():
-    model = np.linalg.lstsq
-    with pytest.raises(TypeError):
-        is_second_order_model(model)
-
-
-@pytest.fixture()
-def scalar_model():
-    out = ScalarModel(
-        intercept=0.5,
-        linear_terms=np.array([-0.3, 0.3]),
-        square_terms=np.array([[0.8, 0.2], [0.2, 0.7]]),
-        shift=np.array([0.2, 0.3]),
-        scale=0.6,
-    )
-    return out
-
-
-@pytest.fixture()
-def vector_model():
-    out = VectorModel(
-        intercepts=np.array([0.5, 0.4, 0.3]),
-        linear_terms=np.array([[-0.3, 0.3], [-0.2, 0.1], [-0.2, 0.1]]),
-        square_terms=np.array(
-            [
-                [[0.8, 0.2], [0.2, 0.7]],
-                [[0.6, 0.2], [0.2, 0.5]],
-                [[0.8, 0.2], [0.2, 0.7]],
-            ]
-        ),
-        shift=np.array([0.2, 0.3]),
-        scale=0.6,
-    )
-    return out
-
-
-def test_move_scalar_model(scalar_model):
-    old_region = Region(center=scalar_model.shift, radius=scalar_model.scale)
-    new_region = Region(center=np.array([-0.1, 0.1]), radius=0.45)
-
-    old_model = scalar_model
-    x_unscaled = np.array([[0.5, 0.5]])
-    x_old = old_region.map_to_unit(x_unscaled)
-    x_new = new_region.map_to_unit(x_unscaled)
-
-    new_model = move_model(old_model, new_region)
-
-    old_prediction = old_model.predict(x_old)
-    new_prediction = new_model.predict(x_new)
-
-    assert new_model.scale == new_region.radius
-    aaae(new_model.shift, new_region.center)
-
-    assert np.allclose(old_prediction, new_prediction)
-
-
-def test_move_vector_model(vector_model):
-    old_region = Region(center=vector_model.shift, radius=vector_model.scale)
-    new_region = Region(center=np.array([-0.1, 0.1]), radius=0.45)
-
-    old_model = vector_model
-
-    x_unscaled = np.array([[0.5, 0.5]])
-    x_old = old_region.map_to_unit(x_unscaled)
-    x_new = new_region.map_to_unit(x_unscaled)
-
-    new_model = move_model(old_model, new_region)
-
-    old_prediction = old_model.predict(x_old)
-    new_prediction = new_model.predict(x_new)
-
-    assert new_model.scale == new_region.radius
-    aaae(new_model.shift, new_region.center)
-
-    assert np.allclose(old_prediction, new_prediction)
-
-
-def test_add_scalar_models(scalar_model):
-    got = add_models(scalar_model, scalar_model)
-
-    assert got.intercept == scalar_model.intercept * 2
-    aaae(got.linear_terms, scalar_model.linear_terms * 2)
-    aaae(got.square_terms, scalar_model.square_terms * 2)
-
-
-def test_add_vector_models(vector_model):
-    got = add_models(vector_model, vector_model)
-
-    assert np.allclose(got.intercepts, vector_model.intercepts * 2)
-    aaae(got.linear_terms, vector_model.linear_terms * 2)
-    aaae(got.square_terms, vector_model.square_terms * 2)
diff --git a/tests/optimization/tranquilo/test_options.py b/tests/optimization/tranquilo/test_options.py
deleted file mode 100644
index e8a6297de..000000000
--- a/tests/optimization/tranquilo/test_options.py
+++ /dev/null
@@ -1,56 +0,0 @@
-import pytest
-from collections import namedtuple
-from estimagic.optimization.tranquilo.options import (
-    get_default_aggregator,
-    update_option_bundle,
-)
-
-
-def test_get_default_aggregator_scalar_quadratic():
-    assert get_default_aggregator("scalar", "quadratic") == "identity"
-
-
-def test_get_default_aggregator_error():
-    with pytest.raises(
-        NotImplementedError,
-        match="The requested combination of functype and model_type is not supported.",
-    ):
-        get_default_aggregator("scalar", "linear")
-
-
-@pytest.fixture
-def default_options():
-    options = namedtuple("default_options", "number")
-    return options(number=1)
-
-
-def test_update_option_bundle_fast_path():
-    assert update_option_bundle("whatever", user_options=None) == "whatever"
-
-
-def test_update_option_bundle_dict(default_options):
-    got = update_option_bundle(default_options, user_options={"number": 2})
-    assert got.number == 2
-
-
-def test_update_option_bundle_namedtuple(default_options):
-    user_option = default_options._replace(number=2)
-    got = update_option_bundle(default_options, user_options=user_option)
-    assert got.number == 2
-
-
-def test_update_option_bundle_convert_type(default_options):
-    got = update_option_bundle(default_options, user_options={"number": "2"})
-    assert got.number == 2
-
-
-def test_update_option_bundle_wrong_type(default_options):
-    with pytest.raises(ValueError, match="invalid literal for int"):
-        update_option_bundle(default_options, user_options={"number": "not_a_number"})
-
-
-def test_update_option_bundle_invalid_field(default_options):
-    with pytest.raises(
-        ValueError, match="The following user options are not valid: {'not_a_field'}"
-    ):
-        update_option_bundle(default_options, user_options={"not_a_field": 10})
diff --git a/tests/optimization/tranquilo/test_poisedness.py b/tests/optimization/tranquilo/test_poisedness.py
deleted file mode 100644
index 2a846d0a2..000000000
--- a/tests/optimization/tranquilo/test_poisedness.py
+++ /dev/null
@@ -1,388 +0,0 @@
-import numpy as np
-import pytest
-from estimagic.optimization.tranquilo.poisedness import (
-    _get_minimize_options,
-    _lagrange_poly_matrix,
-    _reshape_coef_to_square_terms,
-    get_poisedness_constant,
-    improve_poisedness,
-)
-from numpy.testing import assert_array_almost_equal as aaae
-
-
-def evaluate_scalar_model(x, intercept, linear_terms, square_terms):
-    return intercept + linear_terms.T @ x + 0.5 * x.T @ square_terms @ x
-
-
-# ======================================================================================
-# Improve poisedness
-# ======================================================================================
-
-TEST_CASES = [
-    (
-        np.array(
-            [
-                [-0.98, -0.96],
-                [-0.96, -0.98],
-                [0, 0],
-                [0.98, 0.96],
-                [0.96, 0.98],
-                [0.94, 0.94],
-            ]
-        ),
-        "sphere",
-        5,
-        [
-            5324.240935366314,
-            36.87996947175511,
-            11.090857556966462,
-            1.3893207179888898,
-            1.0016763267639168,
-        ],
-    ),
-    (
-        np.array(
-            [
-                [-0.98, -0.96],
-                [-0.96, -0.98],
-                [0, 0],
-                [0.98, 0.96],
-                [0.96, 0.98],
-                [0.94, 0.94],
-            ]
-        ),
-        "cube",
-        10,
-        [
-            10648.478006222356,
-            49.998826793338836,
-            13.145227394549012,
-            1.0313287779903457,
-            1.008398336326099,
-            1.0306831620836225,
-            1.0019247733166188,
-            1.0044418474330754,
-            1.0024393102571791,
-            1.0017007017773365,
-        ],
-    ),
-    (
-        np.array(
-            [
-                [-0.98, -0],
-                [-0.96, -0.01],
-                [0, 0],
-                [-0.02, 0.98],
-                [0.03, -0.96],
-                [0.94, 0.06],
-            ]
-        ),
-        "sphere",
-        5,
-        [
-            50.83088699521032,
-            1.4010345122261196,
-            1.109469103188152,
-            1.0614725892080803,
-            1.0368961283088556,
-        ],
-    ),
-    (
-        np.array(
-            [
-                [-0.98, 0.0],
-                [-0.56, -0.01],
-                [-0.3, -0.07],
-                [0.98, 0.02],
-                [0.46, 0.03],
-                [0.94, 0.06],
-            ]
-        ),
-        "sphere",
-        5,
-        [
-            687.9333361325548,
-            22.830295678507802,
-            11.89595397927371,
-            1.590858593504958,
-            1.1143219029197806,
-        ],
-    ),
-]
-
-
-@pytest.mark.parametrize("sample, shape, maxiter, expected", TEST_CASES)
-def test_improve_poisedness(sample, shape, maxiter, expected):
-    _, got_lambdas = improve_poisedness(sample=sample, shape=shape, maxiter=maxiter)
-    aaae(got_lambdas[-5:], expected[-5:], decimal=2)
-
-
-# ======================================================================================
-# Lambda poisedness constant
-# ======================================================================================
-TEST_CASES = [
-    (
-        np.array(
-            [
-                [-0.98, -0.96],
-                [-0.96, -0.98],
-                [0, 0],
-                [0.98, 0.96],
-                [0.96, 0.98],
-                [0.94, 0.94],
-            ]
-        ),
-        5324.241743151584,
-    ),
-    (
-        np.array(
-            [
-                [-0.98, -0.96],
-                [-0.96, -0.98],
-                [0.0, 0.0],
-                [0.98, 0.96],
-                [0.96, 0.98],
-                [-0.70710678, 0.70710678],
-            ]
-        ),
-        36.87996947175511,
-    ),
-    (
-        np.array(
-            [
-                [-0.98, -0.96],
-                [-0.96, -0.98],
-                [0.0, 0.0],
-                [0.84885278, -0.52862932],
-                [0.96, 0.98],
-                [-0.70710678, 0.70710678],
-            ]
-        ),
-        11.090857500607644,
-    ),
-    (
-        np.array(
-            [
-                [-0.98, -0.96],
-                [-0.02260674, 0.99974443],
-                [0.0, 0.0],
-                [0.84885278, -0.52862932],
-                [0.96, 0.98],
-                [-0.70710678, 0.70710678],
-            ]
-        ),
-        1.3893205660280858,
-    ),
-    (
-        np.array(
-            [
-                [-0.98, -0.96],
-                [-0.02260674, 0.99974443],
-                [0.0, 0.0],
-                [0.84885278, -0.52862932],
-                [0.96, 0.98],
-                [-0.96706306, 0.2545369],
-            ]
-        ),
-        1.0016763272061744,
-    ),
-]
-
-
-@pytest.mark.parametrize("sample, expected", TEST_CASES)
-def test_poisedness_constant_scaled(sample, expected):
-    """Test cases are modified versions from :cite:`Conn2009` p.
-
-    99.
-
-    """
-    got, *_ = get_poisedness_constant(sample, shape="sphere")
-    assert np.allclose(got, expected)
-
-
-TEST_CASES = [
-    (
-        np.array(
-            [
-                [-0.98, -0.96],
-                [-0.96, -0.98],
-                [0, 0],
-                [0.98, 0.96],
-                [0.96, 0.98],
-                [0.94, 0.94],
-            ]
-        ),
-        5324,
-    ),
-    (
-        np.array(
-            [
-                [-0.98, -0.96],
-                [-0.96, -0.98],
-                [0.0, 0.0],
-                [0.98, 0.96],
-                [0.96, 0.98],
-                [-0.707, 0.707],
-            ]
-        ),
-        36.88,
-    ),
-    (
-        np.array(
-            [
-                [-0.967, 0.254],
-                [-0.96, -0.98],
-                [0, 0],
-                [0.98, 0.96],
-                [-0.199, 0.979],
-                [0.707, -0.707],
-            ]
-        ),
-        1.001,
-    ),
-]
-
-
-@pytest.mark.parametrize("sample, expected", TEST_CASES)
-def test_poisedness_constant_textbook_scaled(sample, expected):
-    """Test cases are taken from :cite:`Conn2009` p.
-
-    99.
-
-    """
-    got, *_ = get_poisedness_constant(sample, shape="sphere")
-    assert np.allclose(got, expected, rtol=1e-3)
-
-
-TEST_CASES = [
-    (
-        np.array(
-            [
-                [0.524, 0.0006],
-                [0.032, 0.323],
-                [0.187, 0.890],
-                [0.5, 0.5],
-                [0.982, 0.368],
-                [0.774, 0.918],
-            ]
-        ),
-        1,
-    )
-]
-
-
-@pytest.mark.parametrize("sample, expected", TEST_CASES)
-def test_poisedness_constant_textbook_unscaled(sample, expected):
-    """This test case is taken from :cite:`Conn2009` p.
-
-    45.
-
-    """
-    n_params = sample.shape[1]
-
-    radius = 0.5
-    center = 0.5 * np.ones(n_params)
-    sample_scaled = (sample - center) / radius
-
-    got, *_ = get_poisedness_constant(sample_scaled, shape="sphere")
-    assert np.allclose(got, expected, rtol=1e-3)
-
-
-def test_invalid_shape_argument():
-    with pytest.raises(ValueError):
-        assert _get_minimize_options(shape="ellipse", n_params=10)
-
-
-# ======================================================================================
-# Lagrange polynomials
-# ======================================================================================
-
-TEST_CASES = [
-    (
-        np.array([[0, 0], [1, 0], [0, 1], [2, 0], [1, 1], [0, 2], [0.5, 0.5]]),
-        np.array(
-            [
-                [
-                    1,
-                    -1.5,
-                    -1.5,
-                    1,
-                    1,
-                    1,
-                ],
-                [
-                    0,
-                    5 / 3,
-                    -1 / 3,
-                    -1.64705882e00,
-                    -7.64705882e-01,
-                    3.52941176e-01,
-                ],
-                [
-                    0,
-                    -1 / 3,
-                    5 / 3,
-                    3.52941176e-01,
-                    -7.64705882e-01,
-                    -1.64705882e00,
-                ],
-                [
-                    0,
-                    -5 / 12,
-                    1 / 12,
-                    9.11764706e-01,
-                    -5.88235294e-02,
-                    -8.82352941e-02,
-                ],
-                [
-                    -0,
-                    -1 / 6,
-                    -1 / 6,
-                    1.76470588e-01,
-                    1.11764706e00,
-                    1.76470588e-01,
-                ],
-                [
-                    0,
-                    1 / 12,
-                    -5 / 12,
-                    -8.82352941e-02,
-                    -5.88235294e-02,
-                    9.11764706e-01,
-                ],
-                [
-                    0,
-                    2 / 3,
-                    2 / 3,
-                    -7.05882353e-01,
-                    -4.70588235e-01,
-                    -7.05882353e-01,
-                ],
-            ]
-        ),
-        np.array([1, 0.84, 0.84, 0.99, 0.96, 0.99, 0.37]),
-    )
-]
-
-
-@pytest.mark.parametrize("sample, expected_lagrange_mat, expected_critval", TEST_CASES)
-def test_lagrange_poly_matrix(sample, expected_lagrange_mat, expected_critval):
-    """This test case is taken from :cite:`Conn2009` p.
-
-    62.
-
-    """
-    sample = np.array([[0, 0], [1, 0], [0, 1], [2, 0], [1, 1], [0, 2], [0.5, 0.5]])
-    n_params = sample.shape[1]
-
-    lagrange_mat = _lagrange_poly_matrix(sample)
-    aaae(lagrange_mat, expected_lagrange_mat)
-
-    for idx, lagrange_poly in enumerate(lagrange_mat):
-        intercept = lagrange_poly[0]
-        linear_terms = lagrange_poly[1 : n_params + 1]
-        _coef_square_terms = lagrange_poly[n_params + 1 :]
-        square_terms = _reshape_coef_to_square_terms(_coef_square_terms, n_params)
-
-        got = evaluate_scalar_model(sample[idx], intercept, linear_terms, square_terms)
-        aaae(got, expected_critval[idx], decimal=2)
diff --git a/tests/optimization/tranquilo/test_process_arguments.py b/tests/optimization/tranquilo/test_process_arguments.py
deleted file mode 100644
index 8532e65ea..000000000
--- a/tests/optimization/tranquilo/test_process_arguments.py
+++ /dev/null
@@ -1,137 +0,0 @@
-"""Tests for the process_arguments function and subfunctions.
-
-When testing process_arguments we should only test the values of outputs that somehow
-depend on the inputs, not the values with static defaults.
-
-"""
-import pytest
-import numpy as np
-from estimagic.optimization.tranquilo.process_arguments import (
-    process_arguments,
-    _process_batch_size,
-    _process_sample_size,
-    _process_model_type,
-    _process_search_radius_factor,
-    _process_acceptance_decider,
-    _process_model_fitter,
-    _process_residualize,
-    _process_n_evals_at_start,
-)
-
-
-def test_process_arguments_scalar_deterministic():
-    res = process_arguments(
-        functype="scalar",
-        criterion=lambda x: x @ x,
-        x=np.array([-3, 1, 2]),
-        radius_options={"initial_radius": 1.0},
-    )
-    assert res["radius_options"].initial_radius == 1.0
-
-
-def test_process_batch_size():
-    assert _process_batch_size(batch_size=2, n_cores=2) == 2
-    assert _process_batch_size(batch_size=None, n_cores=3) == 3
-
-
-def test_process_batch_size_invalid():
-    with pytest.raises(ValueError, match="batch_size must be"):
-        _process_batch_size(batch_size=1, n_cores=2)
-
-
-def test_process_sample_size():
-    x = np.arange(3)
-    assert _process_sample_size(sample_size=None, model_type="linear", x=x) == 4
-    assert _process_sample_size(sample_size=None, model_type="quadratic", x=x) == 7
-    assert _process_sample_size(10, None, None) == 10
-
-
-def test_process_sample_size_callable():
-    x = np.arange(3)
-    sample_size = lambda x, model_type: len(x) ** 2
-    assert _process_sample_size(sample_size=sample_size, model_type="linear", x=x) == 9
-
-
-def test_process_model_type():
-    assert _process_model_type(model_type="linear", functype="scalar") == "linear"
-    assert _process_model_type(model_type=None, functype="scalar") == "quadratic"
-    assert _process_model_type(model_type=None, functype="least_squares") == "linear"
-    assert _process_model_type(model_type=None, functype="likelihood") == "linear"
-
-
-def test_process_model_type_invalid():
-    with pytest.raises(ValueError, match="model_type must be"):
-        _process_model_type(model_type="invalid", functype="scalar")
-
-
-def test_process_search_radius_factor():
-    assert _process_search_radius_factor(search_radius_factor=1.1, functype=None) == 1.1
-    assert (
-        _process_search_radius_factor(search_radius_factor=None, functype="scalar")
-        == 4.25
-    )
-    assert (
-        _process_search_radius_factor(
-            search_radius_factor=None, functype="least_squares"
-        )
-        == 5.0
-    )
-
-
-def test_process_search_radius_factor_negative():
-    with pytest.raises(ValueError, match="search_radius_factor must be"):
-        _process_search_radius_factor(-1, "scalar")
-
-
-def test_process_acceptance_decider():
-    assert _process_acceptance_decider(acceptance_decider=None, noisy=True) == "noisy"
-    assert (
-        _process_acceptance_decider(acceptance_decider=None, noisy=False) == "classic"
-    )
-    assert (
-        _process_acceptance_decider(acceptance_decider="classic", noisy=None)
-        == "classic"
-    )
-
-
-def test_process_model_fitter():
-    assert (
-        _process_model_fitter(
-            model_fitter=None, model_type="quadratic", sample_size=3, x=np.arange(3)
-        )
-        == "tranquilo"
-    )
-    assert (
-        _process_model_fitter(
-            model_fitter=None, model_type="linear", sample_size=4, x=np.arange(3)
-        )
-        == "ols"
-    )
-    assert (
-        _process_model_fitter(
-            model_fitter="xyz", model_type=None, sample_size=None, x=None
-        )
-        == "xyz"
-    )
-
-
-def test_process_residualize():
-    assert _process_residualize(residualize=None, model_fitter="tranquilo") is True
-    assert _process_residualize(residualize=None, model_fitter="ols") is False
-    assert _process_residualize(residualize=False, model_fitter="custom") is False
-
-
-def test_process_residualize_invalid():
-    with pytest.raises(ValueError, match="residualize must be a boolean."):
-        _process_residualize(residualize="invalid", model_fitter=None)
-
-
-def test_process_n_evals_at_start():
-    assert _process_n_evals_at_start(n_evals=None, noisy=True) == 5
-    assert _process_n_evals_at_start(n_evals=None, noisy=False) == 1
-    assert _process_n_evals_at_start(n_evals=10, noisy=None) == 10
-
-
-def test_process_n_evals_at_start_negative():
-    with pytest.raises(ValueError, match="n_initial_acceptance_evals must be"):
-        _process_n_evals_at_start(n_evals=-1, noisy=None)
diff --git a/tests/optimization/tranquilo/test_region.py b/tests/optimization/tranquilo/test_region.py
deleted file mode 100644
index c28c959dd..000000000
--- a/tests/optimization/tranquilo/test_region.py
+++ /dev/null
@@ -1,128 +0,0 @@
-import numpy as np
-from estimagic.optimization.tranquilo.bounds import Bounds
-from estimagic.optimization.tranquilo.region import (
-    Region,
-    _any_bounds_binding,
-    _get_shape,
-    _get_cube_bounds,
-    _get_cube_center,
-    _get_effective_radius,
-    _get_effective_center,
-    _map_from_unit_cube,
-    _map_from_unit_sphere,
-    _map_to_unit_cube,
-    _map_to_unit_sphere,
-)
-from numpy.testing import assert_array_equal
-import pytest
-
-
-def test_map_to_unit_sphere():
-    got = _map_to_unit_sphere(np.ones(2), center=2 * np.ones(1), radius=2)
-    assert_array_equal(got, -0.5 * np.ones(2))
-
-
-def test_map_to_unit_cube():
-    bounds = Bounds(lower=np.zeros(2), upper=2 * np.ones(2))
-    got = _map_to_unit_cube(np.ones(2), cube_bounds=bounds)
-    assert_array_equal(got, np.zeros(2))
-
-
-def test_map_from_unit_sphere():
-    got = _map_from_unit_sphere(-0.5 * np.ones(2), center=2 * np.ones(1), radius=2)
-    assert_array_equal(got, np.ones(2))
-
-
-def test_map_from_unit_cube():
-    bounds = Bounds(lower=np.zeros(2), upper=2 * np.ones(2))
-    got = _map_from_unit_cube(np.zeros(2), cube_bounds=bounds)
-    assert_array_equal(got, np.ones(2))
-
-
-def test_any_bounds_binding_true():
-    bounds = Bounds(lower=-np.ones(2), upper=np.ones(2))
-    out = _any_bounds_binding(bounds, center=np.zeros(2), radius=2)
-    assert out
-
-
-def test_any_bounds_binding_false():
-    bounds = Bounds(lower=-np.ones(2), upper=np.ones(2))
-    out = _any_bounds_binding(bounds, center=np.zeros(2), radius=0.5)
-    assert not out
-
-
-def test_get_shape_sphere():
-    out = _get_shape(center=np.zeros(2), radius=1, bounds=None)
-    assert out == "sphere"
-
-
-def test_get_shape_cube():
-    bounds = Bounds(lower=np.zeros(2), upper=np.ones(2))
-    out = _get_shape(center=np.zeros(2), radius=1, bounds=bounds)
-    assert out == "cube"
-
-
-def test_get_cube_bounds():
-    bounds = Bounds(lower=-np.ones(2), upper=np.ones(2))
-    out = _get_cube_bounds(center=np.zeros(2), radius=1, bounds=bounds, shape="sphere")
-    assert_array_equal(out.lower, bounds.lower)
-    assert_array_equal(out.upper, bounds.upper)
-
-
-def test_get_cube_bounds_no_bounds():
-    bounds = Bounds(lower=None, upper=None)
-    out = _get_cube_bounds(center=np.zeros(2), radius=1, bounds=bounds, shape="sphere")
-    assert_array_equal(out.lower, -np.ones(2))
-    assert_array_equal(out.upper, np.ones(2))
-
-
-def test_get_cube_bounds_updated_upper_bounds():
-    bounds = Bounds(lower=-2 * np.ones(2), upper=0.5 * np.ones(2))
-    out = _get_cube_bounds(center=np.zeros(2), radius=1, bounds=bounds, shape="cube")
-    np.all(out.lower > -np.ones(2))
-    np.all(out.lower < np.zeros(2))
-    np.all(out.upper == 0.5 * np.ones(2))
-
-
-def test_get_cube_center():
-    bounds = Bounds(lower=np.array([0, 0.5]), upper=np.array([1, 10]))
-    out = _get_cube_center(cube_bounds=bounds)
-    assert_array_equal(out, np.array([0.5, 5.25]))
-
-
-def test_get_effective_radius():
-    bounds = Bounds(lower=np.array([0, 0.5]), upper=np.array([1, 10]))
-    out = _get_effective_radius(shape="cube", radius=None, cube_bounds=bounds)
-    assert_array_equal(out, np.array([0.5, 4.75]))
-
-
-def test_get_effective_center_sphere():
-    out = _get_effective_center(shape="sphere", center=np.ones(2), cube_center=None)
-    assert_array_equal(out, np.ones(2))
-
-
-def test_get_effective_center_cube():
-    out = _get_effective_center(shape="cube", center=None, cube_center=np.zeros(2))
-    assert_array_equal(out, np.zeros(2))
-
-
-def test_region_non_binding_bounds():
-    region = Region(center=np.zeros(2), radius=1)
-    assert region.shape == "sphere"
-    assert region.radius == 1
-    assert region.bounds is None
-    with pytest.raises(AttributeError, match="The trustregion is a sphere"):
-        region.cube_bounds  # noqa: B018
-    with pytest.raises(AttributeError, match="The trustregion is a sphere"):
-        region.cube_center  # noqa: B018
-
-
-def test_region_binding_bounds():
-    bounds = Bounds(lower=-np.ones(2), upper=0.5 * np.ones(2))
-    region = Region(center=np.zeros(2), radius=1, bounds=bounds)
-    assert region.shape == "cube"
-    assert region.radius == 1
-    assert region.bounds is bounds
-    # shrinkage because cube radius is smaller than (spherical) radius
-    assert np.all(bounds.lower - region.cube_bounds.lower < 0)
-    assert_array_equal(region.cube_bounds.upper, bounds.upper)
diff --git a/tests/optimization/tranquilo/test_rho_noise.py b/tests/optimization/tranquilo/test_rho_noise.py
deleted file mode 100644
index dd62528b1..000000000
--- a/tests/optimization/tranquilo/test_rho_noise.py
+++ /dev/null
@@ -1,78 +0,0 @@
-import numpy as np
-import pytest
-from estimagic.optimization.tranquilo.aggregate_models import get_aggregator
-from estimagic.optimization.tranquilo.fit_models import get_fitter
-from estimagic.optimization.tranquilo.region import Region
-from estimagic.optimization.tranquilo.bounds import Bounds
-from estimagic.optimization.tranquilo.rho_noise import simulate_rho_noise
-from estimagic.optimization.tranquilo.solve_subproblem import get_subsolver
-from numpy.testing import assert_array_almost_equal as aaae
-
-
-@pytest.mark.parametrize("functype", ["scalar", "least_squares"])
-def test_convergence_to_one_if_noise_is_tiny(functype):
-    """Test simulate_rho_noise.
-
-    For the test, the "true" model is a standard sphere function.
-
-    """
-    xs = (
-        np.array(
-            [
-                [0.0, 0.0],
-                [0.0, 1.0],
-                [1.0, 0.0],
-                [-1.0, 0.0],
-                [0.0, -1.0],
-            ]
-        )
-        + 0.5
-    )
-
-    if functype == "least_squares":
-        fvecs = xs.copy()
-        model_type = "linear"
-        model_aggregator = get_aggregator(
-            aggregator="least_squares_linear",
-        )
-        n_residuals = 2
-    else:
-        fvecs = (xs**2).sum(axis=1).reshape(-1, 1)
-        model_type = "quadratic"
-        model_aggregator = get_aggregator(
-            aggregator="identity",
-        )
-        n_residuals = 1
-
-    noise_cov = np.eye(n_residuals) * 1e-12
-
-    trustregion = Region(center=np.ones(2) * 0.5, radius=1.0, bounds=Bounds(None, None))
-    model_fitter = get_fitter(
-        fitter="ols",
-        model_type=model_type,
-        residualize=False,
-        infinity_handling="relative",
-    )
-
-    vector_model = model_fitter(
-        xs, fvecs, weights=None, region=trustregion, old_model=None
-    )
-
-    subsolver = get_subsolver(sphere_solver="gqtpar", cube_solver="bntr")
-
-    rng = np.random.default_rng(123)
-
-    got = simulate_rho_noise(
-        xs=xs,
-        vector_model=vector_model,
-        trustregion=trustregion,
-        noise_cov=noise_cov,
-        model_fitter=model_fitter,
-        model_aggregator=model_aggregator,
-        subsolver=subsolver,
-        rng=rng,
-        n_draws=100,
-        ignore_corelation=True,
-    )
-
-    aaae(got, np.ones_like(got), decimal=4)
diff --git a/tests/optimization/tranquilo/test_sample_points.py b/tests/optimization/tranquilo/test_sample_points.py
deleted file mode 100644
index 5f264a28f..000000000
--- a/tests/optimization/tranquilo/test_sample_points.py
+++ /dev/null
@@ -1,171 +0,0 @@
-import numpy as np
-import pytest
-from estimagic.optimization.tranquilo.bounds import Bounds
-from estimagic.optimization.tranquilo.region import Region
-from estimagic.optimization.tranquilo.sample_points import (
-    _draw_from_distribution,
-    _minimal_pairwise_distance_on_hull,
-    _project_onto_unit_hull,
-    get_sampler,
-)
-from numpy.testing import assert_array_almost_equal as aaae
-from scipy.spatial.distance import pdist
-
-SAMPLERS = ["random_interior", "random_hull", "optimal_hull"]
-
-
-@pytest.mark.parametrize("sampler", SAMPLERS)
-def test_samplers(sampler):
-    _sampler = get_sampler(sampler)
-    trustregion = Region(center=np.array([0.0, 0]), radius=1.5, bounds=None)
-    sample = _sampler(
-        trustregion=trustregion,
-        n_points=5,
-        rng=np.random.default_rng(1234),
-    )
-    assert len(sample) == 5
-    assert np.all(-1.5 <= sample)
-    assert np.all(sample <= 1.5)
-
-
-@pytest.mark.parametrize("sampler", SAMPLERS)
-def test_bounds_are_satisfied(sampler):
-    bounds = Bounds(lower=np.array([-2.0, -2.0]), upper=np.array([0.25, 0.5]))
-    _sampler = get_sampler(sampler)
-    trustregion = Region(center=np.array([0.0, 0]), radius=1.5, bounds=bounds)
-    sample = _sampler(
-        trustregion=trustregion,
-        n_points=5,
-        rng=np.random.default_rng(1234),
-    )
-    lower = np.full_like(sample, bounds.lower)
-    upper = np.full_like(sample, bounds.upper)
-    assert np.all(lower <= sample)
-    assert np.all(sample <= upper)
-
-
-@pytest.mark.parametrize("sampler", SAMPLERS)
-def test_enough_existing_points(sampler):
-    # test that if enough existing points exist an empty array is returned
-    sampler = get_sampler(sampler=sampler)
-    bounds = Bounds(lower=-np.ones(3), upper=np.ones(3))
-    calculated = sampler(
-        trustregion=Region(center=np.zeros(3), radius=1, bounds=bounds),
-        n_points=0,
-        existing_xs=np.empty((5, 3)),
-        rng=np.random.default_rng(1234),
-    )
-
-    assert calculated.size == 0
-
-
-def test_optimization_ignores_existing_points():
-    # test that existing points behave as constants in the optimal sampling
-    sampler = get_sampler(sampler="optimal_hull")
-    bounds = Bounds(lower=-np.ones(3), upper=np.ones(3))
-    calculated = sampler(
-        trustregion=Region(center=np.zeros(3), radius=1, bounds=bounds),
-        n_points=3,
-        existing_xs=np.ones((2, 3)),  # same point implies min distance of zero always
-        rng=np.random.default_rng(1234),
-    )
-
-    assert pdist(calculated).min() > 0
-
-
-def test_optimality():
-    # test that optimal versions of hull samplers produce better criterion value
-    standard_sampler = get_sampler(sampler="random_hull")
-    optimal_sampler = get_sampler(sampler="optimal_hull")
-    bounds = Bounds(lower=-np.ones(3), upper=np.ones(3))
-    distances = []
-    for sampler in [standard_sampler, optimal_sampler]:
-        sample = sampler(
-            trustregion=Region(center=np.zeros(3), radius=1, bounds=bounds),
-            n_points=5,
-            rng=np.random.default_rng(1234),
-        )
-        distances.append(pdist(sample).min())
-
-    assert distances[1] > distances[0]
-
-
-@pytest.mark.parametrize("n_points_randomsearch", [1, 2, 5, 10])
-def test_randomsearch(n_points_randomsearch):
-    # test that initial randomsearch of hull samplers produce better fekete values
-
-    bounds = Bounds(lower=-np.ones(3), upper=np.ones(3))
-
-    _sampler = get_sampler("optimal_hull")
-
-    # optimal sampling without randomsearch
-    _, info = _sampler(
-        trustregion=Region(center=np.zeros(3), radius=1, bounds=bounds),
-        n_points=5,
-        rng=np.random.default_rng(0),
-        return_info=True,
-    )
-
-    # optimal sampling with randomsearch
-    _, info_randomsearch = _sampler(
-        trustregion=Region(center=np.zeros(3), radius=1, bounds=bounds),
-        n_points=5,
-        rng=np.random.default_rng(0),
-        n_points_randomsearch=n_points_randomsearch,
-        return_info=True,
-    )
-
-    for key in ["start_fekete", "opt_fekete"]:
-        statement = info_randomsearch[key] >= info[key] or np.isclose(
-            info_randomsearch[key], info[key], rtol=1e-3
-        )
-        assert statement
-
-
-@pytest.mark.parametrize("trustregion_shape", ("sphere", "cube"))
-def test_pairwise_distance_on_hull(trustregion_shape):
-    # equal points imply zero distance
-    value = _minimal_pairwise_distance_on_hull(
-        x=np.ones(4),
-        existing_xs=None,
-        hardness=1,
-        trustregion_shape=trustregion_shape,
-        n_params=2,
-    )
-    assert value == 0
-
-    # non-equal points imply positive distance
-    value = _minimal_pairwise_distance_on_hull(
-        x=np.arange(4),
-        existing_xs=None,
-        hardness=1,
-        trustregion_shape=trustregion_shape,
-        n_params=2,
-    )
-    assert value > 0
-
-
-@pytest.mark.parametrize("trustregion_shape", ("sphere", "cube"))
-def test_project_onto_unit_hull(trustregion_shape):
-    rng = np.random.default_rng(1234)
-    old = rng.uniform(-1, 1, size=10).reshape(5, 2)
-    new = _project_onto_unit_hull(old, trustregion_shape=trustregion_shape)
-
-    order = 2 if trustregion_shape == "sphere" else np.inf
-
-    norm = np.linalg.norm(old, axis=1, ord=order)
-    with pytest.raises(AssertionError):
-        aaae(1, norm)
-
-    norm = np.linalg.norm(new, axis=1, ord=order)
-    aaae(1, norm)
-
-
-@pytest.mark.parametrize("distribution", ["normal", "uniform"])
-def test_draw_from_distribution(distribution):
-    rng = np.random.default_rng()
-    draw = _draw_from_distribution(distribution, rng=rng, size=(3, 2))
-    if distribution == "uniform":
-        assert (-1 <= draw).all()
-        assert (draw <= 1).all()
-    assert draw.shape == (3, 2)
diff --git a/tests/optimization/tranquilo/test_solve_subproblem.py b/tests/optimization/tranquilo/test_solve_subproblem.py
deleted file mode 100644
index 7fda79396..000000000
--- a/tests/optimization/tranquilo/test_solve_subproblem.py
+++ /dev/null
@@ -1,45 +0,0 @@
-import numpy as np
-import pytest
-from estimagic.optimization.tranquilo.models import ScalarModel
-from estimagic.optimization.tranquilo.solve_subproblem import get_subsolver
-from estimagic.optimization.tranquilo.region import Region
-from estimagic.optimization.tranquilo.bounds import Bounds
-from numpy.testing import assert_array_almost_equal as aaae
-
-solvers = ["gqtpar", "gqtpar_fast"]
-
-
-@pytest.mark.slow()
-@pytest.mark.parametrize("solver_name", solvers, ids=solvers)
-def test_without_bounds(solver_name):
-    linear_terms = np.array([-0.0005429824695352, -0.1032556117176, -0.06816855282091])
-    quadratic_terms = np.array(
-        [
-            [2.05714077e-02, 7.58182390e-01, 9.00050279e-01],
-            [7.58182390e-01, 6.25867992e01, 4.20096648e01],
-            [9.00050279e-01, 4.20096648e01, 4.03810858e01],
-        ]
-    )
-
-    expected_x = np.array(
-        [
-            -0.9994584757179,
-            -0.007713730538474,
-            0.03198833730482,
-        ]
-    )
-
-    model = ScalarModel(
-        intercept=0, linear_terms=linear_terms, square_terms=quadratic_terms
-    )
-
-    trustregion = Region(center=np.zeros(3), radius=1, bounds=Bounds(None, None))
-
-    solve_subproblem = get_subsolver(sphere_solver=solver_name, cube_solver="bntr")
-
-    calculated = solve_subproblem(
-        model=model,
-        trustregion=trustregion,
-    )
-
-    aaae(calculated.x, expected_x)
diff --git a/tests/optimization/tranquilo/test_tranquilo.py b/tests/optimization/tranquilo/test_tranquilo.py
deleted file mode 100644
index e628a3d1d..000000000
--- a/tests/optimization/tranquilo/test_tranquilo.py
+++ /dev/null
@@ -1,234 +0,0 @@
-import itertools
-
-import numpy as np
-import pytest
-from estimagic.optimization.optimize import minimize
-from estimagic.optimization.tranquilo.tranquilo import (
-    tranquilo,
-    tranquilo_ls,
-)
-from numpy.testing import assert_array_almost_equal as aaae
-
-# ======================================================================================
-# Test tranquilo end-to-end
-# ======================================================================================
-
-
-def _product(sample_filter, model_fitter, model_type):
-    # is used to create products of test cases
-    return list(itertools.product(sample_filter, model_fitter, model_type))
-
-
-# ======================================================================================
-# Scalar Tranquilo
-# ======================================================================================
-
-TEST_CASES = {
-    "ols": {
-        "sample_filter": ["discard_all", "keep_all"],
-        "model_fitter": ["ols"],
-        "model_type": ["quadratic"],
-    },
-    "ols_keep_all": {
-        "sample_filter": ["keep_all"],
-        "model_fitter": ["ols"],
-        "model_type": ["quadratic"],
-    },
-    "pounders_discard_all": {
-        "sample_filter": ["discard_all"],
-        "model_fitter": ["powell"],
-        "model_type": ["quadratic"],
-    },
-    "pounders_keep_all": {
-        "sample_filter": ["keep_all"],
-        "model_fitter": ["powell"],
-        "model_type": ["quadratic"],
-    },
-}
-
-TEST_CASES = [_product(**kwargs) for kwargs in TEST_CASES.values()]
-TEST_CASES = itertools.chain.from_iterable(TEST_CASES)
-
-
-@pytest.mark.parametrize("sample_filter, model_fitter, model_type", TEST_CASES)
-def test_internal_tranquilo_scalar_sphere_defaults(
-    sample_filter,
-    model_fitter,
-    model_type,
-):
-    res = tranquilo(
-        criterion=lambda x: x @ x,
-        x=np.arange(4),
-        sample_filter=sample_filter,
-        model_fitter=model_fitter,
-        model_type=model_type,
-    )
-    aaae(res["solution_x"], np.zeros(4), decimal=4)
-
-
-# ======================================================================================
-# Imprecise options for scalar tranquilo
-# ======================================================================================
-
-TEST_CASES = {
-    "ls_keep": {
-        "sample_filter": ["keep_all"],
-        "model_fitter": ["ols"],
-        "model_type": ["quadratic"],
-    },
-    "pounders_discard_all": {
-        "sample_filter": ["discard_all"],
-        "model_fitter": ["powell"],
-        "model_type": ["quadratic"],
-    },
-}
-
-TEST_CASES = [_product(**kwargs) for kwargs in TEST_CASES.values()]
-TEST_CASES = itertools.chain.from_iterable(TEST_CASES)
-
-
-@pytest.mark.parametrize("sample_filter, model_fitter, model_type", TEST_CASES)
-def test_internal_tranquilo_scalar_sphere_imprecise_defaults(
-    sample_filter,
-    model_fitter,
-    model_type,
-):
-    res = tranquilo(
-        criterion=lambda x: x @ x,
-        x=np.arange(4),
-        sample_filter=sample_filter,
-        model_fitter=model_fitter,
-        model_type=model_type,
-    )
-    aaae(res["solution_x"], np.zeros(4), decimal=3)
-
-
-# ======================================================================================
-# External
-# ======================================================================================
-
-
-def test_external_tranquilo_scalar_sphere_defaults():
-    res = minimize(
-        criterion=lambda x: x @ x,
-        params=np.arange(4),
-        algorithm="tranquilo",
-    )
-
-    aaae(res.params, np.zeros(4), decimal=4)
-
-
-# ======================================================================================
-# Least-squares Tranquilo
-# ======================================================================================
-
-
-TEST_CASES = {
-    "ols": {
-        "sample_filter": ["keep_all", "discard_all"],
-        "model_fitter": ["ols"],
-        "model_type": ["linear"],
-    },
-    "tranquilo": {
-        "sample_filter": ["keep_all", "discard_all"],
-        "model_fitter": ["tranquilo"],
-        "model_type": ["linear"],
-    },
-}
-
-TEST_CASES = [_product(**kwargs) for kwargs in TEST_CASES.values()]
-TEST_CASES = itertools.chain.from_iterable(TEST_CASES)
-
-
-@pytest.mark.parametrize("sample_filter, model_fitter, model_type", TEST_CASES)
-def test_internal_tranquilo_ls_sphere_defaults(
-    sample_filter,
-    model_fitter,
-    model_type,
-):
-    res = tranquilo_ls(
-        criterion=lambda x: x,
-        x=np.arange(5),
-        sample_filter=sample_filter,
-        model_fitter=model_fitter,
-        model_type=model_type,
-    )
-    aaae(res["solution_x"], np.zeros(5), decimal=5)
-
-
-# ======================================================================================
-# External
-# ======================================================================================
-
-
-def test_external_tranquilo_ls_sphere_defaults():
-    res = minimize(
-        criterion=lambda x: x,
-        params=np.arange(5),
-        algorithm="tranquilo_ls",
-    )
-
-    aaae(res.params, np.zeros(5), decimal=5)
-
-
-# ======================================================================================
-# Noisy case
-# ======================================================================================
-
-
-@pytest.mark.parametrize("algo", ["tranquilo", "tranquilo_ls"])
-def test_tranquilo_with_noise_handling_and_deterministic_function(algo):
-    def _f(x):
-        return {"root_contributions": x, "value": x @ x}
-
-    res = minimize(
-        criterion=_f,
-        params=np.arange(5),
-        algorithm=algo,
-        algo_options={"noisy": True},
-    )
-
-    aaae(res.params, np.zeros(5), decimal=3)
-
-
-@pytest.mark.slow()
-def test_tranquilo_ls_with_noise_handling_and_noisy_function():
-    rng = np.random.default_rng(123)
-
-    def _f(x):
-        x_n = x + rng.normal(0, 0.05, size=x.shape)
-        return {"root_contributions": x_n, "value": x_n @ x_n}
-
-    res = minimize(
-        criterion=_f,
-        params=np.ones(3),
-        algorithm="tranquilo",
-        algo_options={"noisy": True, "n_evals_per_point": 10},
-    )
-
-    aaae(res.params, np.zeros(3), decimal=1)
-
-
-# ======================================================================================
-# Bounded case
-# ======================================================================================
-
-
-def sum_of_squares(x):
-    contribs = x**2
-    return {"value": contribs.sum(), "contributions": contribs, "root_contributions": x}
-
-
-@pytest.mark.parametrize("algorithm", ["tranquilo", "tranquilo_ls"])
-def test_tranquilo_with_binding_bounds(algorithm):
-    res = minimize(
-        criterion=sum_of_squares,
-        params=np.array([3, 2, -3]),
-        lower_bounds=np.array([1, -np.inf, -np.inf]),
-        upper_bounds=np.array([np.inf, np.inf, -1]),
-        algorithm=algorithm,
-        collect_history=True,
-        skip_checks=True,
-    )
-    assert res.success in [True, None]
-    aaae(res.params, np.array([1, 0, -1]), decimal=3)
diff --git a/tests/optimization/tranquilo/test_volume.py b/tests/optimization/tranquilo/test_volume.py
deleted file mode 100644
index e09c500fa..000000000
--- a/tests/optimization/tranquilo/test_volume.py
+++ /dev/null
@@ -1,104 +0,0 @@
-import numpy as np
-import pytest
-from estimagic.optimization.tranquilo.volume import (
-    _cube_radius,
-    _cube_volume,
-    _sphere_radius,
-    _sphere_volume,
-    get_radius,
-    get_radius_after_volume_scaling,
-    get_radius_of_cube_with_volume_of_sphere,
-    get_radius_of_sphere_with_volume_of_cube,
-    get_volume,
-)
-
-dims = dims = [1, 2, 3, 4, 12, 13, 15]
-coeffs = [
-    2,
-    np.pi,
-    4 * np.pi / 3,
-    np.pi**2 / 2,
-    np.pi**6 / 720,
-    128 * np.pi**6 / 135135,
-    256 * np.pi**7 / 2027025,
-]
-
-
-@pytest.mark.parametrize("dim", dims)
-def test_get_radius_of_sphere_with_volume_of_cube(dim):
-    cube_radius = 1.5
-    scaling_factor = 0.95
-    vol = _cube_volume(cube_radius, dim) * scaling_factor
-    expected = _sphere_radius(vol, dim)
-    got = get_radius_of_sphere_with_volume_of_cube(cube_radius, dim, scaling_factor)
-    assert np.allclose(got, expected)
-
-
-@pytest.mark.parametrize("dim", dims)
-def test_get_radius_of_cube_with_volume_of_sphere(dim):
-    sphere_radius = 1.5
-    scaling_factor = 0.95
-    vol = _sphere_volume(sphere_radius, dim) * scaling_factor
-    expected = _cube_radius(vol, dim)
-    got = get_radius_of_cube_with_volume_of_sphere(sphere_radius, dim, scaling_factor)
-    assert np.allclose(got, expected)
-
-
-def test_get_radius_of_sphere_with_volume_of_cube_no_scaling():
-    v1 = get_radius_of_sphere_with_volume_of_cube(2.0, 2, None)
-    v2 = get_radius_of_sphere_with_volume_of_cube(2.0, 2, 1.0)
-    assert v1 == v2
-
-
-def test_get_radius_of_cube_with_volume_of_sphere_no_scaling():
-    v1 = get_radius_of_cube_with_volume_of_sphere(2.0, 2, None)
-    v2 = get_radius_of_cube_with_volume_of_sphere(2.0, 2, 1.0)
-    assert v1 == v2
-
-
-@pytest.mark.parametrize("dim", dims)
-def test_radius_after_volume_rescaling_scaling_factor_sphere(dim):
-    radius = 0.6
-    scaling_factor = 0.9
-
-    naive = _sphere_radius(_sphere_volume(radius, dim) * scaling_factor, dim)
-
-    got = get_radius_after_volume_scaling(radius, dim, scaling_factor)
-
-    assert np.allclose(got, naive)
-
-
-@pytest.mark.parametrize("dim", dims)
-def test_radius_after_volume_rescaling_scaling_factor_cube(dim):
-    radius = 0.6
-    scaling_factor = 0.9
-
-    naive = _cube_radius(_cube_volume(radius, dim) * scaling_factor, dim)
-
-    got = get_radius_after_volume_scaling(radius, dim, scaling_factor)
-
-    assert np.allclose(got, naive)
-
-
-@pytest.mark.parametrize("dim, coeff", list(zip(dims, coeffs)))
-def test_shpere_volume_and_radius(dim, coeff):
-    radius = 0.5
-    expected_volume = coeff * radius**dim
-    got_volume = get_volume(radius, dim, "sphere")
-    assert np.allclose(got_volume, expected_volume)
-
-    got_radius = get_radius(got_volume, dim, "sphere")
-    assert np.allclose(got_radius, radius)
-
-
-@pytest.mark.parametrize("dim", dims)
-def test_cube_volume_and_radius(dim):
-    radius = 0.6
-
-    expected_volume = 1.2**dim
-
-    got_volume = get_volume(radius, dim, "cube")
-    assert np.allclose(got_volume, expected_volume)
-
-    got_radius = get_radius(got_volume, dim, "cube")
-    assert np.allclose(got_radius, radius)
diff --git a/tests/optimization/tranquilo/test_weighting.py b/tests/optimization/tranquilo/test_weighting.py
deleted file mode 100644
index 201bad583..000000000
--- a/tests/optimization/tranquilo/test_weighting.py
+++ /dev/null
@@ -1,7 +0,0 @@
-import numpy as np
-from estimagic.optimization.tranquilo.weighting import get_sample_weighter
-
-
-def test_no_weighting():
-    weight_points = get_sample_weighter(weighter="no_weights", bounds=None)
-    assert weight_points(np.ones((4, 3)), trustregion=None) is None
diff --git a/tests/optimization/tranquilo/test_wrap_criterion.py b/tests/optimization/tranquilo/test_wrap_criterion.py
deleted file mode 100644
index 3886a36cd..000000000
--- a/tests/optimization/tranquilo/test_wrap_criterion.py
+++ /dev/null
@@ -1,61 +0,0 @@
-import itertools
-
-import numpy as np
-import pytest
-from estimagic.optimization.tranquilo.history import History
-from estimagic.optimization.tranquilo.wrap_criterion import get_wrapped_criterion
-from numpy.testing import assert_array_almost_equal as aaae
-
-TEST_CASES = list(itertools.product(["scalar", "least_squares", "likelihood"], [1, 2]))
-
-
-@pytest.mark.parametrize("functype, n_evals", TEST_CASES)
-def test_wrapped_criterion(functype, n_evals):
-    # set up criterion (all should have same results)
-    func_dict = {
-        "least_squares": lambda x: x,
-        "likelihood": lambda x: x**2,
-        "scalar": lambda x: x @ x,
-    }
-
-    criterion = func_dict[functype]
-
-    # set up history
-    history = History(functype=functype)
-    for params in [np.zeros(3), np.ones(3)]:
-        idxs = history.add_xs(params)
-        history.add_evals(idxs, criterion(params))
-
-    assert history.get_n_fun() == 2
-
-    wrapped_criterion = get_wrapped_criterion(
-        criterion=criterion, batch_evaluator="joblib", n_cores=1, history=history
-    )
-
-    # set up params and expected results
-    if n_evals == 1:
-        params = np.arange(3)
-        history.add_xs(params)
-        expected_fvecs = criterion(params)
-        expected_fvals = params @ params
-        expected_indices = 2
-        eval_info = {2: 1}
-    else:
-        params = np.arange(3 * n_evals).reshape(n_evals, 3)
-        history.add_xs(params)
-        expected_fvecs = np.array([criterion(x) for x in params]).reshape(2, -1)
-        expected_fvals = np.array([x @ x for x in params])
-        expected_indices = np.arange(2, 2 + n_evals)
-        eval_info = {idx: 1 for idx in expected_indices}
-
-    # use wrapped_criterion
-    wrapped_criterion(eval_info)
-
-    assert history.get_n_fun() == 2 + n_evals
-    assert history.get_n_xs() == 2 + n_evals
-
-    got_fvecs = history.fvecs[expected_indices]
-    aaae(got_fvecs, expected_fvecs)
-
-    got_fvals = history.fvals[expected_indices]
-    aaae(got_fvals, expected_fvals)
diff --git a/tests/visualization/test_visualize_tranquilo.py b/tests/visualization/test_visualize_tranquilo.py
deleted file mode 100644
index 9d2d2dfd3..000000000
--- a/tests/visualization/test_visualize_tranquilo.py
+++ /dev/null
@@ -1,40 +0,0 @@
-import pytest
-from estimagic import get_benchmark_problems, minimize
-from estimagic.visualization.visualize_tranquilo import visualize_tranquilo
-
-cases = []
-algo_options = {
-    "random_hull": {
-        "sampler": "random_hull",
-        "subsolver": "gqtpar_fast",
-        "sample_filter": "keep_all",
-        "stopping.max_iterations": 10,
-    },
-    "optimal_hull": {
-        "sampler": "optimal_hull",
-        "subsolver": "gqtpar_fast",
-        "sample_filter": "keep_all",
-        "stopping.max_iterations": 10,
-    },
-}
-for problem in ["rosenbrock_good_start", "watson_6_good_start"]:
-    inputs = get_benchmark_problems("more_wild")[problem]["inputs"]
-    criterion = inputs["criterion"]
-    start_params = inputs["params"]
-    for algo in ["tranquilo", "tranquilo_ls"]:
-        results = {}
-        for s, options in algo_options.items():
-            results[s] = minimize(
-                criterion=criterion,
-                params=start_params,
-                algorithm=algo,
-                algo_options=options,
-            )
-        cases.append(results)
-
-
-@pytest.mark.parametrize("results", cases)
-def test_visualize_tranquilo(results):
-    visualize_tranquilo(results, 5)
-    for res in results.values():
-        visualize_tranquilo(res, [1, 5])