From d25d4c28721b3e9d87421bafcec5c37263ce0e60 Mon Sep 17 00:00:00 2001 From: Janos Gabler Date: Mon, 5 Jun 2023 17:35:00 +0200 Subject: [PATCH] Make tranquilo an optional dependency (#464) --- .envs/testenv-linux.yml | 2 +- .envs/testenv-others.yml | 2 +- .github/workflows/main.yml | 4 +- CHANGES.md | 21 + docs/rtd_environment.yml | 1 - docs/source/conf.py | 1 + environment.yml | 2 +- pyproject.toml | 1 + setup.cfg | 1 - src/estimagic/algorithms.py | 46 + src/estimagic/benchmarking/cartis_roberts.py | 10 +- src/estimagic/benchmarking/run_benchmark.py | 2 +- src/estimagic/config.py | 16 + src/estimagic/optimization/__init__.py | 46 - src/estimagic/optimization/get_algorithm.py | 2 +- .../subsolvers/_conjugate_gradient_fast.py | 138 -- .../subsolvers/_steihaug_toint_fast.py | 207 --- .../optimization/subsolvers/_trsbox_fast.py | 658 ---------- .../optimization/subsolvers/bntr_fast.py | 1167 ----------------- .../optimization/subsolvers/gqtpar_fast.py | 668 ---------- src/estimagic/optimization/tranquilo.py | 29 + .../optimization/tranquilo/__init__.py | 0 .../tranquilo/acceptance_decision.py | 244 ---- .../tranquilo/acceptance_sample_size.py | 69 - .../optimization/tranquilo/adjust_radius.py | 42 - .../tranquilo/aggregate_models.py | 152 --- .../optimization/tranquilo/bounds.py | 28 - .../optimization/tranquilo/clustering.py | 75 -- .../tranquilo/estimate_variance.py | 61 - .../optimization/tranquilo/filter_points.py | 129 -- .../optimization/tranquilo/fit_models.py | 507 ------- .../optimization/tranquilo/geometry.py | 24 - .../optimization/tranquilo/get_component.py | 231 ---- .../optimization/tranquilo/handle_infinity.py | 49 - .../optimization/tranquilo/history.py | 261 ---- .../optimization/tranquilo/models.py | 295 ----- .../optimization/tranquilo/options.py | 210 --- .../optimization/tranquilo/poisedness.py | 211 --- .../tranquilo/process_arguments.py | 314 ----- .../optimization/tranquilo/region.py | 152 --- .../optimization/tranquilo/rho_noise.py | 87 -- .../optimization/tranquilo/sample_points.py | 466 ------- .../tranquilo/solve_subproblem.py | 200 --- .../optimization/tranquilo/tranquilo.py | 467 ------- .../optimization/tranquilo/volume.py | 81 -- .../optimization/tranquilo/weighting.py | 27 - .../optimization/tranquilo/wrap_criterion.py | 56 - .../tranquilo/wrapped_subsolvers.py | 94 -- .../visualization/visualize_tranquilo.py | 590 --------- .../optimization/subsolvers/test_bntr_fast.py | 551 -------- .../subsolvers/test_gqtpar_fast.py | 98 -- .../subsolvers/test_gqtpar_lambdas.py | 20 - .../subsolvers/test_minimize_trust_region.py | 484 ------- tests/optimization/test_history_collection.py | 2 +- tests/optimization/test_many_algorithms.py | 2 +- .../optimization/test_quadratic_subsolvers.py | 11 - .../test_with_nonlinear_constraints.py | 2 +- .../tranquilo/test_acceptance_decision.py | 140 -- .../tranquilo/test_acceptance_sample_size.py | 86 -- .../tranquilo/test_adjust_radius.py | 104 -- .../tranquilo/test_aggregate_models.py | 79 -- tests/optimization/tranquilo/test_bounds.py | 38 - .../optimization/tranquilo/test_clustering.py | 34 - .../tranquilo/test_estimate_variance.py | 44 - .../tranquilo/test_filter_points.py | 48 - .../optimization/tranquilo/test_fit_models.py | 145 -- .../tranquilo/test_get_component.py | 170 --- .../tranquilo/test_handle_infinity.py | 15 - tests/optimization/tranquilo/test_history.py | 230 ---- tests/optimization/tranquilo/test_models.py | 190 --- tests/optimization/tranquilo/test_options.py | 56 - .../optimization/tranquilo/test_poisedness.py | 388 ------ .../tranquilo/test_process_arguments.py | 137 -- tests/optimization/tranquilo/test_region.py | 128 -- .../optimization/tranquilo/test_rho_noise.py | 78 -- .../tranquilo/test_sample_points.py | 171 --- .../tranquilo/test_solve_subproblem.py | 45 - .../optimization/tranquilo/test_tranquilo.py | 234 ---- tests/optimization/tranquilo/test_volume.py | 104 -- .../optimization/tranquilo/test_weighting.py | 7 - .../tranquilo/test_wrap_criterion.py | 61 - .../visualization/test_visualize_tranquilo.py | 40 - 82 files changed, 133 insertions(+), 11955 deletions(-) create mode 100644 src/estimagic/algorithms.py delete mode 100644 src/estimagic/optimization/subsolvers/_conjugate_gradient_fast.py delete mode 100644 src/estimagic/optimization/subsolvers/_steihaug_toint_fast.py delete mode 100644 src/estimagic/optimization/subsolvers/_trsbox_fast.py delete mode 100644 src/estimagic/optimization/subsolvers/bntr_fast.py delete mode 100644 src/estimagic/optimization/subsolvers/gqtpar_fast.py create mode 100644 src/estimagic/optimization/tranquilo.py delete mode 100644 src/estimagic/optimization/tranquilo/__init__.py delete mode 100644 src/estimagic/optimization/tranquilo/acceptance_decision.py delete mode 100644 src/estimagic/optimization/tranquilo/acceptance_sample_size.py delete mode 100644 src/estimagic/optimization/tranquilo/adjust_radius.py delete mode 100644 src/estimagic/optimization/tranquilo/aggregate_models.py delete mode 100644 src/estimagic/optimization/tranquilo/bounds.py delete mode 100644 src/estimagic/optimization/tranquilo/clustering.py delete mode 100644 src/estimagic/optimization/tranquilo/estimate_variance.py delete mode 100644 src/estimagic/optimization/tranquilo/filter_points.py delete mode 100644 src/estimagic/optimization/tranquilo/fit_models.py delete mode 100644 src/estimagic/optimization/tranquilo/geometry.py delete mode 100644 src/estimagic/optimization/tranquilo/get_component.py delete mode 100644 src/estimagic/optimization/tranquilo/handle_infinity.py delete mode 100644 src/estimagic/optimization/tranquilo/history.py delete mode 100644 src/estimagic/optimization/tranquilo/models.py delete mode 100644 src/estimagic/optimization/tranquilo/options.py delete mode 100644 src/estimagic/optimization/tranquilo/poisedness.py delete mode 100644 src/estimagic/optimization/tranquilo/process_arguments.py delete mode 100644 src/estimagic/optimization/tranquilo/region.py delete mode 100644 src/estimagic/optimization/tranquilo/rho_noise.py delete mode 100644 src/estimagic/optimization/tranquilo/sample_points.py delete mode 100644 src/estimagic/optimization/tranquilo/solve_subproblem.py delete mode 100644 src/estimagic/optimization/tranquilo/tranquilo.py delete mode 100644 src/estimagic/optimization/tranquilo/volume.py delete mode 100644 src/estimagic/optimization/tranquilo/weighting.py delete mode 100644 src/estimagic/optimization/tranquilo/wrap_criterion.py delete mode 100644 src/estimagic/optimization/tranquilo/wrapped_subsolvers.py delete mode 100644 src/estimagic/visualization/visualize_tranquilo.py delete mode 100644 tests/optimization/subsolvers/test_bntr_fast.py delete mode 100644 tests/optimization/subsolvers/test_gqtpar_fast.py delete mode 100644 tests/optimization/subsolvers/test_gqtpar_lambdas.py delete mode 100644 tests/optimization/subsolvers/test_minimize_trust_region.py delete mode 100644 tests/optimization/tranquilo/test_acceptance_decision.py delete mode 100644 tests/optimization/tranquilo/test_acceptance_sample_size.py delete mode 100644 tests/optimization/tranquilo/test_adjust_radius.py delete mode 100644 tests/optimization/tranquilo/test_aggregate_models.py delete mode 100644 tests/optimization/tranquilo/test_bounds.py delete mode 100644 tests/optimization/tranquilo/test_clustering.py delete mode 100644 tests/optimization/tranquilo/test_estimate_variance.py delete mode 100644 tests/optimization/tranquilo/test_filter_points.py delete mode 100644 tests/optimization/tranquilo/test_fit_models.py delete mode 100644 tests/optimization/tranquilo/test_get_component.py delete mode 100644 tests/optimization/tranquilo/test_handle_infinity.py delete mode 100644 tests/optimization/tranquilo/test_history.py delete mode 100644 tests/optimization/tranquilo/test_models.py delete mode 100644 tests/optimization/tranquilo/test_options.py delete mode 100644 tests/optimization/tranquilo/test_poisedness.py delete mode 100644 tests/optimization/tranquilo/test_process_arguments.py delete mode 100644 tests/optimization/tranquilo/test_region.py delete mode 100644 tests/optimization/tranquilo/test_rho_noise.py delete mode 100644 tests/optimization/tranquilo/test_sample_points.py delete mode 100644 tests/optimization/tranquilo/test_solve_subproblem.py delete mode 100644 tests/optimization/tranquilo/test_tranquilo.py delete mode 100644 tests/optimization/tranquilo/test_volume.py delete mode 100644 tests/optimization/tranquilo/test_weighting.py delete mode 100644 tests/optimization/tranquilo/test_wrap_criterion.py delete mode 100644 tests/visualization/test_visualize_tranquilo.py diff --git a/.envs/testenv-linux.yml b/.envs/testenv-linux.yml index 0535fecb6..a4cd42372 100644 --- a/.envs/testenv-linux.yml +++ b/.envs/testenv-linux.yml @@ -16,13 +16,13 @@ dependencies: - click # run, tests - cloudpickle # run, tests - joblib # run, tests - - numba # run, tests - numpy>=1.17.0 # run, tests - pandas # run, tests - plotly # run, tests - pybaum >= 0.1.2 # run, tests - scipy>=1.2.1 # run, tests - sqlalchemy # run, tests + - tranquilo>=0.0.4 # dev, tests - pip: # dev, tests, docs - DFO-LS # dev, tests - Py-BOBYQA # dev, tests diff --git a/.envs/testenv-others.yml b/.envs/testenv-others.yml index 475f786c2..33093b602 100644 --- a/.envs/testenv-others.yml +++ b/.envs/testenv-others.yml @@ -15,13 +15,13 @@ dependencies: - click # run, tests - cloudpickle # run, tests - joblib # run, tests - - numba # run, tests - numpy>=1.17.0 # run, tests - pandas # run, tests - plotly # run, tests - pybaum >= 0.1.2 # run, tests - scipy>=1.2.1 # run, tests - sqlalchemy # run, tests + - tranquilo>=0.0.4 # dev, tests - pip: # dev, tests, docs - DFO-LS # dev, tests - Py-BOBYQA # dev, tests diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 895c6cfa5..cae534bd7 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -21,9 +21,9 @@ jobs: os: - ubuntu-latest python-version: - - '3.8' - '3.9' - '3.10' + - '3.11' steps: - uses: actions/checkout@v3 - name: create build environment @@ -54,9 +54,9 @@ jobs: - macos-latest - windows-latest python-version: - - '3.8' - '3.9' - '3.10' + - '3.11' steps: - uses: actions/checkout@v3 - name: create build environment diff --git a/CHANGES.md b/CHANGES.md index e399c1891..1c6d797ba 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -4,6 +4,27 @@ This is a record of all past estimagic releases and what went into them in rever chronological order. We follow [semantic versioning](https://semver.org/) and all releases are available on [Anaconda.org](https://anaconda.org/OpenSourceEconomics/estimagic). +Following the [scientific python guidelines](https://scientific-python.org/specs/spec-0000/) +we drop the official support for Python 3.8. + + +## 0.4.6 + +This release drastically improves the optimizer benchmarking capabilities, especially +with noisy functions and parallel optimizers. It makes tranquilo and numba optional +dependencies and is the first version of estimagic to be compatible with Python +3.11. + + +- {gh}`464` Makes tranquilo and numba optional dependencies ({ghuser}`janosg`) +- {gh}`461` Updates docstrings for procss_benchmark_results ({ghuser}`segsell`) +- {gh}`460` Fixes several bugs in the processing of benchmark results with noisy + functions ({ghuser}`janosg`) +- {gh}`459` Prepares benchmarking functionality for parallel optimizers + ({ghuser}`mpetrosian` and {ghuser}`janosg`) +- {gh}`457` Removes some unused files ({ghuser}`segsell`) +- {gh}`455` Improves a local pre-commit hook ({ghuser}`ChristianZimpelmann`) + ## 0.4.5 diff --git a/docs/rtd_environment.yml b/docs/rtd_environment.yml index 5d38e9afd..68eed648b 100644 --- a/docs/rtd_environment.yml +++ b/docs/rtd_environment.yml @@ -17,7 +17,6 @@ dependencies: - ipython_genutils - myst-nb - pydata-sphinx-theme<=0.12.0 - - numba - pybaum - matplotlib - seaborn diff --git a/docs/source/conf.py b/docs/source/conf.py index 58abca26c..ba12b8dfb 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -77,6 +77,7 @@ "petsc4py", "statsmodels", "numba", + "tranquilo", ] extlinks = { diff --git a/environment.yml b/environment.yml index 3b7b6e85a..f326dc071 100644 --- a/environment.yml +++ b/environment.yml @@ -22,7 +22,6 @@ dependencies: - click # run, tests - cloudpickle # run, tests - joblib # run, tests - - numba # run, tests - numpy>=1.17.0 # run, tests - pandas # run, tests - plotly # run, tests @@ -35,6 +34,7 @@ dependencies: - sphinx-copybutton # docs - sphinx-panels # docs - sphinxcontrib-bibtex # docs + - tranquilo>=0.0.4 # dev, tests - pip: # dev, tests, docs - DFO-LS # dev, tests - Py-BOBYQA # dev, tests diff --git a/pyproject.toml b/pyproject.toml index b5b3f2483..5a4c08015 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -91,6 +91,7 @@ filterwarnings = [ "ignore:Widget._widget_types is deprecated", "ignore:Widget.widget_types is deprecated", "ignore:Widget.widgets is deprecated", + "ignore:Parallelization together with", ] addopts = ["--doctest-modules"] markers = [ diff --git a/setup.cfg b/setup.cfg index ab268cd59..6c16c830a 100644 --- a/setup.cfg +++ b/setup.cfg @@ -39,7 +39,6 @@ install_requires = click cloudpickle joblib - numba numpy>=1.17.0 pandas plotly diff --git a/src/estimagic/algorithms.py b/src/estimagic/algorithms.py new file mode 100644 index 000000000..56d787f55 --- /dev/null +++ b/src/estimagic/algorithms.py @@ -0,0 +1,46 @@ +import inspect + +from estimagic.optimization import ( + bhhh, + cyipopt_optimizers, + fides_optimizers, + nag_optimizers, + neldermead, + nlopt_optimizers, + pounders, + pygmo_optimizers, + scipy_optimizers, + simopt_optimizers, + tao_optimizers, + tranquilo, +) + +MODULES = [ + cyipopt_optimizers, + fides_optimizers, + nag_optimizers, + nlopt_optimizers, + pygmo_optimizers, + scipy_optimizers, + simopt_optimizers, + tao_optimizers, + bhhh, + neldermead, + pounders, + tranquilo, +] + +ALL_ALGORITHMS = {} +AVAILABLE_ALGORITHMS = {} +for module in MODULES: + func_dict = dict(inspect.getmembers(module, inspect.isfunction)) + for name, func in func_dict.items(): + if hasattr(func, "_algorithm_info"): + ALL_ALGORITHMS[name] = func + if func._algorithm_info.is_available: + AVAILABLE_ALGORITHMS[name] = func + + +GLOBAL_ALGORITHMS = [ + name for name, func in ALL_ALGORITHMS.items() if func._algorithm_info.is_global +] diff --git a/src/estimagic/benchmarking/cartis_roberts.py b/src/estimagic/benchmarking/cartis_roberts.py index 8554f01f0..293ed1272 100644 --- a/src/estimagic/benchmarking/cartis_roberts.py +++ b/src/estimagic/benchmarking/cartis_roberts.py @@ -16,8 +16,16 @@ """ from functools import partial -from numba import njit import numpy as np +from estimagic.config import IS_NUMBA_INSTALLED + +if IS_NUMBA_INSTALLED: + from numba import njit +else: + + def njit(func): + return func + from estimagic.benchmarking.more_wild import ( brown_almost_linear, diff --git a/src/estimagic/benchmarking/run_benchmark.py b/src/estimagic/benchmarking/run_benchmark.py index 4b0e3bc99..e02203a95 100644 --- a/src/estimagic/benchmarking/run_benchmark.py +++ b/src/estimagic/benchmarking/run_benchmark.py @@ -10,7 +10,7 @@ import numpy as np from estimagic import batch_evaluators -from estimagic.optimization import AVAILABLE_ALGORITHMS +from estimagic.algorithms import AVAILABLE_ALGORITHMS from estimagic.optimization.optimize import minimize from pybaum import tree_just_flatten from estimagic.parameters.tree_registry import get_registry diff --git a/src/estimagic/config.py b/src/estimagic/config.py index 165cf0e4e..dd0fcde1c 100644 --- a/src/estimagic/config.py +++ b/src/estimagic/config.py @@ -87,6 +87,22 @@ IS_SIMOPT_INSTALLED = True +try: + import tranquilo # noqa: F401 +except ImportError: + IS_TRANQUILO_INSTALLED = False +else: + IS_TRANQUILO_INSTALLED = True + + +try: + import numba # noqa: F401 +except ImportError: + IS_NUMBA_INSTALLED = False +else: + IS_NUMBA_INSTALLED = True + + # ================================================================================= # Dashboard Defaults # ================================================================================= diff --git a/src/estimagic/optimization/__init__.py b/src/estimagic/optimization/__init__.py index c5e93112a..e69de29bb 100644 --- a/src/estimagic/optimization/__init__.py +++ b/src/estimagic/optimization/__init__.py @@ -1,46 +0,0 @@ -import inspect - -from estimagic.optimization import ( - bhhh, - cyipopt_optimizers, - fides_optimizers, - nag_optimizers, - neldermead, - nlopt_optimizers, - pounders, - pygmo_optimizers, - scipy_optimizers, - simopt_optimizers, - tao_optimizers, -) -from estimagic.optimization.tranquilo import tranquilo - -MODULES = [ - cyipopt_optimizers, - fides_optimizers, - nag_optimizers, - nlopt_optimizers, - pygmo_optimizers, - scipy_optimizers, - simopt_optimizers, - tao_optimizers, - bhhh, - neldermead, - pounders, - tranquilo, -] - -ALL_ALGORITHMS = {} -AVAILABLE_ALGORITHMS = {} -for module in MODULES: - func_dict = dict(inspect.getmembers(module, inspect.isfunction)) - for name, func in func_dict.items(): - if hasattr(func, "_algorithm_info"): - ALL_ALGORITHMS[name] = func - if func._algorithm_info.is_available: - AVAILABLE_ALGORITHMS[name] = func - - -GLOBAL_ALGORITHMS = [ - name for name, func in ALL_ALGORITHMS.items() if func._algorithm_info.is_global -] diff --git a/src/estimagic/optimization/get_algorithm.py b/src/estimagic/optimization/get_algorithm.py index ef07978aa..553465ba8 100644 --- a/src/estimagic/optimization/get_algorithm.py +++ b/src/estimagic/optimization/get_algorithm.py @@ -9,7 +9,7 @@ list_of_dicts_to_dict_of_lists, ) from estimagic.logging.write_to_database import update_row -from estimagic.optimization import ALL_ALGORITHMS +from estimagic.algorithms import ALL_ALGORITHMS from estimagic.utilities import propose_alternatives diff --git a/src/estimagic/optimization/subsolvers/_conjugate_gradient_fast.py b/src/estimagic/optimization/subsolvers/_conjugate_gradient_fast.py deleted file mode 100644 index 6e46215bb..000000000 --- a/src/estimagic/optimization/subsolvers/_conjugate_gradient_fast.py +++ /dev/null @@ -1,138 +0,0 @@ -"""Implementation of the Conjugate Gradient algorithm.""" -import numpy as np -from numba import njit - - -@njit -def minimize_trust_cg_fast( - model_gradient, model_hessian, trustregion_radius, gtol_abs, gtol_rel -): - """Minimize the quadratic subproblem via (standard) conjugate gradient. - - Solve the trust-region quadratic subproblem: - min_x g.T @ x + 0.5 * x.T @ H @ x - s.t. ||x|| <= trustregion_radius - - approximately, where g denotes the gradient and H the hessian of the quadratic - model (i.e. the linear terms and square_terms), respectively. - - Args: - model_gradient (np.ndarray): 1d array of shape (n,) containing the - gradient (i.e. linear terms) of the quadratic model. - model_hessian (np.ndarray): 2d array of shape (n, n) containing the - hessian (i.e .square terms) of the quadratic model. - trustregion_radius (float): Radius of the trust-region. - gtol_abs (float): Convergence tolerance for the absolute gradient norm. - gtol_rel (float): Convergence tolerance for the relative gradient norm. - - Returns: - np.ndarray: Solution vector of shape (n,). - - """ - n = len(model_gradient) - max_iter = n * 2 - x_candidate = np.zeros(n) - - residual = model_gradient - direction = -model_gradient - - gradient_norm = np.linalg.norm(residual) - stop_tol = max(gtol_abs, gtol_rel * gradient_norm) - - for _ in range(max_iter): - if gradient_norm <= stop_tol: - break - - square_terms = direction.T @ model_hessian @ direction - - distance_to_boundary = _get_distance_to_trustregion_boundary( - x_candidate, direction, trustregion_radius - ) - - # avoid divide by zero warning - if square_terms > 0: - step_size = (residual @ residual) / square_terms - else: - step_size = np.inf - - if square_terms <= 0 or step_size > distance_to_boundary: - x_candidate = x_candidate + distance_to_boundary * direction - break - - x_candidate, residual, direction = _update_vectors_for_next_iteration( - x_candidate, residual, direction, model_hessian, step_size - ) - gradient_norm = np.linalg.norm(residual) - - return x_candidate - - -@njit -def _update_vectors_for_next_iteration( - x_candidate, residual, direction, hessian, alpha -): - """Update candidate, residual, and direction vectors for the next iteration. - - Args: - x_candidate (np.ndarray): Candidate vector of shape (n,). - residual (np.ndarray): Array of residuals of shape (n,). The residual vector - is defined as `r = Ax - b`, where `A` denotes the hessian matrix and `b` the - gradient vector of the quadratic trust-region subproblem. - `r` is equivalent to the first derivative of the quadratic subproblem. - direction (np.ndarray): Direction vector of shape (n,). - - Returns: - x_candidate (np.ndarray): Updated candidate vector of shape (n,). - residual_new (np.ndarray): Updated array of residuals of shape (n,). - direction (np.darray): Updated direction vector of shape (n,). - - """ - residual_new = np.zeros(len(residual)) - nom = 0.0 - denom = 0.0 - for i in range(len(x_candidate)): - x_candidate[i] = x_candidate[i] + alpha * direction[i] - temp = 0 - for j in range(len(x_candidate)): - temp += hessian[i, j] * direction[j] - residual_new[i] = temp * alpha + residual[i] - - nom += residual_new[i] * residual_new[i] - denom += residual[i] * residual[i] - beta = nom / denom - direction = -residual_new + beta * direction - - return x_candidate, residual_new, direction - - -@njit -def _get_distance_to_trustregion_boundary(candidate, direction, radius): - """Compute the distance of the candidate vector to trustregion boundary. - - The positive distance sigma is defined in Eculidean norm, as follows: - - || x + sigma * d || = radius - - where x denotes the candidate vector, and d the direction vector. - - Args: - candidate(np.ndarray): Candidate vector of shape (n,). - direction (np.ndarray): Direction vector of shape (n,). - radius (floar): Radius of the trust-region - - Returns: - float: The candidate vector's distance to the trustregion - boundary. - - """ - cc = 0 - cd = 0 - dd = 0 - for i in range(len(direction)): - cc += candidate[i] ** 2 - dd += direction[i] ** 2 - cd += candidate[i] * direction[i] - sigma = -cd + np.sqrt(cd * cd + dd * (radius**2 - cc)) - sigma = sigma / dd - - return sigma diff --git a/src/estimagic/optimization/subsolvers/_steihaug_toint_fast.py b/src/estimagic/optimization/subsolvers/_steihaug_toint_fast.py deleted file mode 100644 index 1e87fcfad..000000000 --- a/src/estimagic/optimization/subsolvers/_steihaug_toint_fast.py +++ /dev/null @@ -1,207 +0,0 @@ -"""Implementation of the Steihaug-Toint Conjugate Gradient algorithm.""" -import numpy as np -from numba import njit - - -@njit -def minimize_trust_stcg_fast(model_gradient, model_hessian, trustregion_radius): - """Minimize the quadratic subproblem via Steihaug-Toint conjugate gradient. - - Solve the quadratic trust-region subproblem: - - min_x g.T @ x + 0.5 * x.T @ hess @ x - s.t. ||x|| <= trustregion_radius - - approximately, where g denotes the gradient and hess the hessian of the quadratic - model (i.e. the linear terms and square_terms), respectively. - - The Steihaug-Toint conjugate gradient method is based on Steihaug - (:cite:`Steihaug1983`) and Toint (:cite:`Toint1981`). - - Args: - model_gradient (np.ndarray): 1d array of shape (n,) containing the - gradient (i.e. linear terms) of the quadratic model. - model_hessian (np.ndarray): 2d array of shape (n, n) containing the - hessian (i.e .square terms) of the quadratic model. - trustregion_radius (float): Radius of the trust-region. - - Returns: - np.ndarray: Solution vector of shape (n,). - - """ - abstol = 1e-50 - rtol = 1e-5 - divtol = 10_000 - - n = len(model_gradient) - radius_sq = trustregion_radius**2 - - residual = -model_gradient - rr = residual.T @ residual - - x_candidate = np.zeros(n) - - max_iter = min(n, 10_000) - - z = np.linalg.pinv(model_hessian) @ residual - rz = residual @ residual - - n_iter = 0 - diverged = False - converged = False - - norm_r = np.sqrt(rr) - norm_r0 = norm_r - if rtol * norm_r0 >= abstol: - ttol = rtol * norm_r0 - else: - ttol = abstol - - converged, diverged = _check_convergence( - norm_r, norm_r0, abstol, ttol, divtol, converged, diverged - ) - - p = model_hessian @ z - z = model_hessian @ p - n_iter += 1 - - kappa = p @ z - - dp = 0 - norm_d = 0 - norm_p = p @ p - - if kappa <= 0: - converged = True - - x_candidate, z, n_iter = _update_candidate_vector_and_iteration_number( - x_candidate, - residual, - p, - z, - model_gradient, - model_hessian, - rr, - trustregion_radius, - norm_p, - n_iter, - ) - - for _ in range(max_iter): - alpha = rz / kappa - norm_dp1 = norm_d + alpha * (2 * dp + alpha * norm_p) - - if trustregion_radius != 0 and norm_dp1 >= radius_sq: - converged = True - - if norm_p > 0: - x_candidate = _take_step_to_trustregion_boundary( - x_candidate, p, dp, radius_sq, norm_d, norm_p - ) - - break - - x_candidate = x_candidate + alpha * p - residual = residual - alpha * (model_hessian @ p) - - norm_d = x_candidate @ x_candidate - - rzm1 = rz - rz = residual @ residual - - norm_r = np.linalg.norm(residual) - - converged, diverged = _check_convergence( - norm_r, norm_r0, abstol, ttol, divtol, converged, diverged - ) - - if converged or diverged: - break - - beta = rz / rzm1 - - if abs(beta) <= 0: - diverged = True - break - - if n_iter >= max_iter: - diverged = True - break - - p = residual + beta * p - - dp = x_candidate @ p - norm_p = p @ p - - z = model_hessian @ p - kappa = p @ z - n_iter += 1 - - if kappa <= 0: - converged = True - - if trustregion_radius != 0 and norm_p > 0: - x_candidate = _take_step_to_trustregion_boundary( - x_candidate, p, dp, radius_sq, norm_d, norm_p - ) - - break - - return x_candidate - - -@njit -def _update_candidate_vector_and_iteration_number( - x_candidate, - residual, - p, - z, - model_gradient, - model_hessian, - rr, - radius, - norm_p, - n_iter, -): - """Update candidate, z vector, and iteration number.""" - radius_sq = radius**2 - - if radius != 0 and norm_p > 0: - # Take step to boundary - step = np.sqrt(radius_sq / norm_p) - x_candidate = x_candidate + step * p - - elif radius != 0: - if radius_sq >= rr: - alpha = 1.0 - else: - alpha = np.sqrt(radius_sq / rr) - - x_candidate = x_candidate + alpha * residual - z = model_gradient - 0.5 * (model_hessian @ x_candidate) - - n_iter += 1 - - return x_candidate, z, n_iter - - -@njit -def _take_step_to_trustregion_boundary(x_candidate, p, dp, radius_sq, norm_d, norm_p): - """Take step to trust-region boundary.""" - step = (np.sqrt(dp * dp + norm_p * (radius_sq - norm_d)) - dp) / norm_p - x_candidate = x_candidate + step * p - - return x_candidate - - -@njit -def _check_convergence( - rnorm, rnorm0, abstol, ttol, divtol, converged, diverged # noqa: ARG001 -): - """Check for convergence.""" - if rnorm <= ttol: - converged = True - elif rnorm >= divtol * rnorm0: - diverged = True - - return converged, diverged diff --git a/src/estimagic/optimization/subsolvers/_trsbox_fast.py b/src/estimagic/optimization/subsolvers/_trsbox_fast.py deleted file mode 100644 index c5cf22533..000000000 --- a/src/estimagic/optimization/subsolvers/_trsbox_fast.py +++ /dev/null @@ -1,658 +0,0 @@ -"""Implementation of the quadratic trustregion solver TRSBOX.""" -import numpy as np -from numba import njit - - -@njit -def minimize_trust_trsbox_fast( - model_gradient, - model_hessian, - trustregion_radius, - lower_bounds, - upper_bounds, -): - """Minimize a qaudratic trust-region subproblem using the trsbox algorithm. - - Solve the quadratic trust-region subproblem: - min_x g.T @ x + 0.5 * x.T @ hess @ x - s.t. ||x|| <= trustregion_radius - lower_bounds <= x <= upper_bounds - - approximately, using an active-set approach, where g denotes the gradient - and hess the hessian of the quadratic model (i.e. the linear terms and - square_terms), respectively. - - The subproblem is assumed to be centered, i.e. ``x_center`` is the zero vector. - The trsbox algorithm applies a conjugate gradient step in its main loop. - - This implementation of the quadratic trsbox algorithm is based on - M. J. D. Powell (2009) "The BOBYQA algorithm for bound constrained - optimization without derivatives." (cite:`Powell2009`). - - Some modifications to the termination conditions are taken from the - DFBOLS method by Zhang et al. (:cite:`Zhang2010`). - - Args: - model_gradient (np.ndarray): 1d array of shape (n,) containing the - gradient (i.e. linear terms) of the quadratic model. - model_hessian (np.ndarray): 2d array of shape (n, n) containing the - hessian (i.e .square terms) of the quadratic model. - lower_bounds (np.ndarray): 1d array of shape (n,) with lower bounds - for the parameter vector x. - upper_bounds (np.ndarray): 1d array of shape (n,) with upper bounds - for the parameter vector x. - trustregion_radius (float): Radius of the trust-region. - Returns: - np.ndarray: Solution vector for the quadratic trust-region subproblem - of shape (n,). - - """ - n = len(model_gradient) - x_center = np.zeros(n) - - n_iter = 0 - n_fixed_variables = 0 - - x_bounded = np.zeros(n) - x_bounded[(x_center <= lower_bounds) & (model_gradient >= 0.0)] = -1 - x_bounded[(x_center >= upper_bounds) & (model_gradient <= 0.0)] = 1 - - x_candidate = np.zeros(n) - gradient_projected = np.zeros(n) - gradient_candidate = model_gradient - - total_reduction = np.zeros(1) - delta_sq = trustregion_radius**2 - curve_min = -1.0 - beta = 0 - - need_alt_trust_step = False - max_iter = 100 * n**2 - - # Main Conjugate Gradient loop - for _ in range(max_iter): - gradient_projected[x_bounded != 0] = 0 - if beta == 0: - gradient_projected[x_bounded == 0] = -gradient_candidate[x_bounded == 0] - else: - gradient_projected[x_bounded == 0] = ( - beta * gradient_projected[x_bounded == 0] - - gradient_candidate[x_bounded == 0] - ) - gradient_projected_sumsq = gradient_projected @ gradient_projected - - if gradient_projected_sumsq == 0: - need_alt_trust_step = False - break - - if beta == 0: - gradient_sumsq = gradient_projected_sumsq - max_iter = n_iter + n - n_fixed_variables - - if n_iter == 0: - gradient_sumsq_initial = gradient_sumsq - - if ( - gradient_sumsq <= 1.0e-6 * gradient_sumsq_initial - and gradient_sumsq <= 1.0e-18 - ) or ( - gradient_sumsq * np.array([delta_sq]) <= 1.0e-6 * total_reduction**2 - and gradient_sumsq * np.array([delta_sq]) <= 1.0e-18 - ): - need_alt_trust_step = False - break - - hess_g = model_hessian @ gradient_projected - g_x = gradient_projected[x_bounded == 0] @ x_candidate[x_bounded == 0] - g_hess_g = gradient_projected[x_bounded == 0] @ hess_g[x_bounded == 0] - raw_distance = ( - np.array([delta_sq]) - - x_candidate[x_bounded == 0] @ x_candidate[x_bounded == 0] - ) - - if raw_distance <= 0: - need_alt_trust_step = True - break - step_len, distance_to_boundary = _take_unconstrained_step_up_to_boundary( - raw_distance, gradient_sumsq, gradient_projected_sumsq, g_x, g_hess_g - ) - - if step_len <= 1.0e-30: - need_alt_trust_step = False - break - - step_len, index_bound_active = _take_constrained_step_up_to_boundary( - x_candidate, gradient_projected, step_len, lower_bounds, upper_bounds - ) - current_reduction = 0.0 - if step_len > 0: - n_iter += 1 - ( - x_candidate, - gradient_candidate, - current_reduction, - total_reduction, - curve_min, - gradient_sumsq, - gradient_sumsq_old, - ) = _update_candidate_vectors_and_reduction( - x_candidate, - x_bounded, - gradient_candidate, - gradient_projected, - step_len, - total_reduction, - curve_min, - index_bound_active, - gradient_projected_sumsq, - gradient_sumsq, - g_hess_g, - hess_g, - ) - - if index_bound_active != -1: - n_fixed_variables += 1 - if gradient_projected[index_bound_active] >= 0: - x_bounded[index_bound_active] = 1 - else: - x_bounded[index_bound_active] = -1 - - delta_sq = delta_sq - x_candidate[index_bound_active] ** 2 - if delta_sq <= 0: - need_alt_trust_step = True - break - - beta = 0 - continue - - if step_len >= distance_to_boundary: - need_alt_trust_step = True - break - - if n_iter == max_iter or current_reduction <= 1.0e-6 * total_reduction: - need_alt_trust_step = False - break - - beta = gradient_sumsq / gradient_sumsq_old - continue - - if need_alt_trust_step: - curve_min = 0 - x_candidate = _perform_alternative_trustregion_step( - x_candidate=x_candidate, - x_bounded=x_bounded, - gradient_candidate=gradient_candidate, - model_hessian=model_hessian, - lower_bounds=lower_bounds, - upper_bounds=upper_bounds, - n_fixed_variables=n_fixed_variables, - total_reduction=total_reduction, - ) - else: - x_candidate = _apply_bounds_to_candidate_vector( - x_candidate, x_bounded, lower_bounds, upper_bounds - ) - - return x_candidate - - -@njit -def _perform_alternative_trustregion_step( - x_candidate, - x_bounded, - gradient_candidate, - model_hessian, - lower_bounds, - upper_bounds, - n_fixed_variables, - total_reduction, -): - """Perform the alternative trust-region step.""" - n = len(x_candidate) - max_iter = 100 * n**2 - - for _ in range(max_iter): - if n_fixed_variables >= n - 1: - x_candidate = _apply_bounds_to_candidate_vector( - x_candidate, x_bounded, lower_bounds, upper_bounds - ) - break - - search_direction = np.zeros(n) - search_direction[x_bounded == 0] = x_candidate[x_bounded == 0] - - x_reduced = x_candidate[x_bounded == 0] @ x_candidate[x_bounded == 0] - x_grad = x_candidate[x_bounded == 0] @ gradient_candidate[x_bounded == 0] - gradient_reduced = ( - gradient_candidate[x_bounded == 0] @ gradient_candidate[x_bounded == 0] - ) - hess_s = model_hessian @ search_direction - hessian_reduced = hess_s - - restart_alt_loop = False - - for _ in range(max_iter): - raw_reduction = gradient_reduced * x_reduced - x_grad**2 - if raw_reduction <= 1.0e-4 * total_reduction**2: - restart_alt_loop = False - break - - search_direction, s_norm = _compute_new_search_direction_and_norm( - x_candidate, - x_bounded, - x_reduced, - gradient_candidate, - x_grad, - raw_reduction, - ) - - ( - x_bounded, - index_active_bound, - n_fixed_variables, - active_bound, - bound_on_tangent, - free_variable_reached_bound, - ) = _calc_upper_bound_on_tangent( - x_candidate, - search_direction, - x_bounded, - lower_bounds, - upper_bounds, - n_fixed_variables, - ) - - if free_variable_reached_bound: - restart_alt_loop = True - break - - hess_s = model_hessian @ search_direction - - s_hess_s = np.sum(search_direction[x_bounded == 0] * hess_s[x_bounded == 0]) - x_hess_s = np.sum(x_candidate[x_bounded == 0] * hess_s[x_bounded == 0]) - x_hess_x = np.sum( - x_candidate[x_bounded == 0] * hessian_reduced[x_bounded == 0] - ) - - ( - previous_reduction, - next_reduction, - max_reduction, - tangent, - index_angle_greatest_reduction, - n_angles, - ) = _calc_greatest_criterion_reduction( - bound_on_tangent, s_hess_s, x_hess_s, x_hess_x, x_grad, s_norm - ) - - if index_angle_greatest_reduction == -1: - restart_alt_loop = False - break - - if index_angle_greatest_reduction < n_angles - 1: - tangent = _update_tangent( - index_angle_greatest_reduction, - bound_on_tangent, - n_angles, - next_reduction, - previous_reduction, - max_reduction, - ) - - cosine = (1.0 - tangent**2) / (1.0 + tangent**2) - sine = 2.0 * tangent / (1.0 + tangent**2) - current_reduction = _calc_new_reduction( - tangent, sine, s_hess_s, x_hess_x, x_hess_s, x_grad, s_norm - ) - - if current_reduction <= 0.0: - restart_alt_loop = False - break - - ( - x_candidate, - gradient_candidate, - x_grad, - gradient_reduced, - hessian_reduced, - ) = _update_candidate_vectors_and_reduction_alt_step( - x_candidate, - search_direction, - x_bounded, - gradient_candidate, - cosine, - sine, - hess_s, - hessian_reduced, - ) - - total_reduction = total_reduction + current_reduction - if ( - index_active_bound.size > 0 - and index_angle_greatest_reduction == n_angles - 1 - ): - n_fixed_variables += 1 - x_bounded[index_active_bound] = active_bound - restart_alt_loop = True - break - - if current_reduction <= 0.01 * total_reduction: - restart_alt_loop = False - break - - continue - - if restart_alt_loop: - continue - else: - break - - x_candidate = _apply_bounds_to_candidate_vector( - x_candidate, x_bounded, lower_bounds, upper_bounds - ) - - return x_candidate - - -@njit -def _apply_bounds_to_candidate_vector( - x_candidate, - x_bounded, - lower_bounds, - upper_bounds, -): - """Force candidate vector to lie within bounds.""" - x_candidate_new = np.zeros(len(x_candidate)) - for i in range(len(x_candidate)): - if x_candidate[i] <= lower_bounds[i]: - x_candidate_new[i] = lower_bounds[i] - elif x_candidate[i] >= upper_bounds[i]: - x_candidate_new[i] = upper_bounds[i] - else: - x_candidate_new[i] = x_candidate[i] - x_candidate_new[x_bounded == -1] = lower_bounds[x_bounded == -1] - x_candidate_new[x_bounded == 1] = upper_bounds[x_bounded == 1] - - return x_candidate_new - - -@njit -def _take_unconstrained_step_up_to_boundary( - raw_distance, gradient_sumsq, gradient_projected_sumsq, g_x, g_hess_g -): - """Take unconstrained step, ignoring bounds, up to boundary.""" - temp = np.sqrt(gradient_projected_sumsq * raw_distance + g_x**2) - if g_x >= 0: - distance_to_boundary = raw_distance / (temp + g_x) - else: - distance_to_boundary = (temp - g_x) / gradient_projected_sumsq - if g_hess_g <= 0: - step_len = distance_to_boundary[0] - else: - if distance_to_boundary <= gradient_sumsq / g_hess_g: - step_len = distance_to_boundary[0] - else: - step_len = gradient_sumsq / g_hess_g - - return step_len, distance_to_boundary - - -@njit -def _update_candidate_vectors_and_reduction( - x_candidate, - x_bounded, - gradient_candidate, - gradient_projected, - step_len, - total_reduction, - curve_min, - index_bound_active, - gradient_projected_sumsq, - gradient_sumsq, - g_hess_g, - hess_g, -): - """Update candidate vectors and the associated criterion reduction.""" - current_min = g_hess_g / gradient_projected_sumsq - - if index_bound_active == -1 and current_min > 0: - if curve_min != -1.0: - curve_min = min(curve_min, current_min) - else: - curve_min = current_min - - gradient_sumsq_old = gradient_sumsq - - gradient_candidate = gradient_candidate + step_len * hess_g - x_candidate = x_candidate + step_len * gradient_projected - - gradient_sumsq = ( - gradient_candidate[x_bounded == 0] @ gradient_candidate[x_bounded == 0] - ) - - current_reduction = max( - step_len * (gradient_sumsq_old - 0.5 * step_len * g_hess_g), 0 - ) - total_reduction = total_reduction + current_reduction - - return ( - x_candidate, - gradient_candidate, - current_reduction, - total_reduction, - curve_min, - gradient_sumsq, - gradient_sumsq_old, - ) - - -@njit -def _take_constrained_step_up_to_boundary( - x_candidate, gradient_projected, step_len, lower_bounds, upper_bounds -): - """Reduce step length, where boundary is hit, to preserve simple bounds.""" - index_bound_active = -1 - for i in range(len(x_candidate)): - if gradient_projected[i] != 0: - if gradient_projected[i] > 0: - step_len_constr = ( - upper_bounds[i] - x_candidate[i] - ) / gradient_projected[i] - else: - step_len_constr = ( - lower_bounds[i] - x_candidate[i] - ) / gradient_projected[i] - if step_len_constr < step_len: - step_len = step_len_constr - index_bound_active = i - - return step_len, index_bound_active - - -@njit -def _calc_upper_bound_on_tangent( - x_candidate, - search_direction, - x_bounded, - lower_bounds, - upper_bounds, - n_fixed_variables, -): - """Calculate upper bound on tangent of half the angle to the boundary.""" - bound_on_tangent = 1 - free_variable_reached_bound = False - - for i in range(len(x_candidate)): - if x_bounded[i] == 0: - lower_bound_centered = x_candidate[i] - lower_bounds[i] - upper_bound_centered = upper_bounds[i] - x_candidate[i] - - if lower_bound_centered <= 0.0: - n_fixed_variables += 1 - x_bounded[i] = -1 - free_variable_reached_bound = True - break - - elif upper_bound_centered <= 0.0: - n_fixed_variables += 1 - x_bounded[i] = 1 - free_variable_reached_bound = True - break - - ssq = x_candidate[i] ** 2 + search_direction[i] ** 2 - - ssq_lower = ssq - lower_bounds[i] ** 2 - if ssq_lower > 0.0: - ssq_lower = np.sqrt(ssq_lower) - search_direction[i] - if bound_on_tangent * ssq_lower > lower_bound_centered: - bound_on_tangent = lower_bound_centered / ssq_lower - index_active_bound = np.array([i]) - active_bound = np.array([-1]) - - ssq_upper = ssq - upper_bounds[i] ** 2 - if ssq_upper > 0.0: - ssq_upper = np.sqrt(ssq_upper) + search_direction[i] - if bound_on_tangent * ssq_upper > upper_bound_centered: - bound_on_tangent = upper_bound_centered / ssq_upper - index_active_bound = np.array([i]) - active_bound = np.array([1]) - - return ( - x_bounded, - index_active_bound, - n_fixed_variables, - active_bound, - bound_on_tangent, - free_variable_reached_bound, - ) - - -@njit -def _calc_greatest_criterion_reduction( - bound_on_tangent, s_hess_s, x_hess_s, x_hess_x, x_grad, s_norm -): - """Calculate the greatest feasible reduction in the criterion function. - - The largest reduction is found by looking at a range of equally spaced values of - ``tangent`` in the interval [0, ``bound_on_tangent``], where ``tangent`` is the - tangent of half the angle to the trust-region boundary. - - """ - previous_reduction = None - next_reduction = None - - max_reduction = 0 - index_angle_greatest_reduction = -1 - old_reduction = 0 - n_angles = int(17 * bound_on_tangent + 3.1) - - for i in range(n_angles): - tangent = bound_on_tangent * (i + 1) / n_angles - sine = 2.0 * tangent / (1.0 + tangent**2) - - new_reduction = _calc_new_reduction( - tangent, sine, s_hess_s, x_hess_x, x_hess_s, x_grad, s_norm - ) - - if new_reduction > max_reduction: - max_reduction = new_reduction - index_angle_greatest_reduction = i - previous_reduction = old_reduction - elif i == index_angle_greatest_reduction + 1: - next_reduction = new_reduction - old_reduction = new_reduction - - return ( - previous_reduction, - next_reduction, - max_reduction, - tangent, - index_angle_greatest_reduction, - n_angles, - ) - - -@njit -def _update_candidate_vectors_and_reduction_alt_step( - x_candidate, - search_direction, - x_bounded, - gradient_candidate, - cosine, - sine, - hess_s, - hessian_reduced, -): - """Update candidate vectors and the associated criterion reduction. - - If the angle of the alternative iteration is restricted by a bound on a free - variable, that variable is fixed at the bound. - - """ - gradient_candidate += (cosine - 1.0) * hessian_reduced + sine * hess_s - x_candidate_new = np.zeros(len(x_candidate)) - for i in range(len(x_candidate)): - if x_bounded[i] == 0: - x_candidate_new[i] = cosine * x_candidate[i] + sine * search_direction[i] - else: - x_candidate_new[i] = x_candidate[i] - x_grad = x_candidate_new[x_bounded == 0] @ gradient_candidate[x_bounded == 0] - gradient_reduced = ( - gradient_candidate[x_bounded == 0] @ gradient_candidate[x_bounded == 0] - ) - hessian_reduced = cosine * hessian_reduced + sine * hess_s - - return ( - x_candidate_new, - gradient_candidate, - x_grad, - gradient_reduced, - hessian_reduced, - ) - - -@njit -def _compute_new_search_direction_and_norm( - x_candidate, x_bounded, x_reduced, gradient_candidate, x_grad, raw_reduction -): - """Compute the new search direction and its norm.""" - raw_reduction = np.sqrt(raw_reduction) - search_direction = np.zeros(len(x_candidate)) - - search_direction[x_bounded == 0] = ( - x_grad * x_candidate[x_bounded == 0] - - x_reduced * gradient_candidate[x_bounded == 0] - ) / raw_reduction - s_norm = -raw_reduction - - return search_direction, s_norm - - -@njit -def _calc_new_reduction(tangent, sine, s_hess_s, x_hess_x, x_hess_s, x_grad, s_norm): - """Calculate the new reduction in the criterion function.""" - raw_reduction = s_hess_s + tangent * (tangent * x_hess_x - 2.0 * x_hess_s) - current_reduction = sine * (tangent * x_grad - s_norm - 0.5 * sine * raw_reduction) - - return current_reduction - - -@njit -def _update_tangent( - index_angle_greatest_reduction, - bound_on_tangent, - n_angles, - next_reduction, - previous_reduction, - max_reduction, -): - """Update the tangent of half the angle to the trust-region boundary.""" - raw_reduction = (next_reduction - previous_reduction) / ( - 2.0 * max_reduction - previous_reduction - next_reduction - ) - tangent = ( - bound_on_tangent - * ((index_angle_greatest_reduction + 1) + 0.5 * raw_reduction) - / n_angles - ) - return tangent diff --git a/src/estimagic/optimization/subsolvers/bntr_fast.py b/src/estimagic/optimization/subsolvers/bntr_fast.py deleted file mode 100644 index 50f8c4ad6..000000000 --- a/src/estimagic/optimization/subsolvers/bntr_fast.py +++ /dev/null @@ -1,1167 +0,0 @@ -"""Auxiliary functions for the quadratic BNTR trust-region subsolver.""" -import numpy as np -from estimagic.optimization.subsolvers._conjugate_gradient_fast import ( - minimize_trust_cg_fast, -) -from estimagic.optimization.subsolvers._steihaug_toint_fast import ( - minimize_trust_stcg_fast, -) -from estimagic.optimization.subsolvers._trsbox_fast import ( - minimize_trust_trsbox_fast, -) -from numba import njit - -EPSILON = np.finfo(float).eps ** (2 / 3) - - -def bntr_fast( - model, - lower_bounds, - upper_bounds, - x_candidate, - *, - conjugate_gradient_method, - maxiter, - maxiter_gradient_descent, - gtol_abs, - gtol_rel, - gtol_scaled, - gtol_abs_conjugate_gradient, - gtol_rel_conjugate_gradient, -): - """Minimize a bounded trust-region subproblem via Newton Conjugate Gradient method. - - This function serves as a wrapper around the faster, numba-implementation of the - original BNTR algorithm. - - The BNTR (Bounded Newton Trust Rregion) algorithm uses an active-set approach - to solve the symmetric system of equations: - - hessian @ x = - gradient - - only for the inactive parameters of x that lie within the bounds. The active-set - estimation employed here is based on Bertsekas (:cite:`Bertsekas1982`). - - In the main loop, BNTR globalizes the Newton step using a trust-region method - based on the predicted versus actual reduction in the criterion function. - The trust-region radius is increased only if the accepted step is at the - trust-region boundary. - - - Args: - model (NamedTuple): NamedTuple containing the parameters of the - main model, i.e.: - - ``linear_terms`` (np.ndarray): 1d array of shape (n,) - - ``square_terms`` (np.ndarray): 2d array of shape (n,n). - lower_bounds (np.ndarray): 1d array of shape (n,) with lower bounds - for the parameter vector x. - upper_bounds (np.ndarray): 1d array of shape (n,) with upper bounds - for the parameter vector x. - x_candidate (np.ndarray): Initial guess for the solution of the subproblem. - conjugate_gradient_method (str): Method for computing the conjugate gradient - step. Available conjugate gradient methods are: - - "cg" - - "steihaug_toint" - - "trsbox" (default) - maxiter (int): Maximum number of iterations. If reached, terminate. - maxiter_gradient_descent (int): Maximum number of steepest descent iterations - to perform when the trust-region subsolver BNTR is used. - gtol_abs (float): Convergence tolerance for the absolute gradient norm. - gtol_rel (float): Convergence tolerance for the relative gradient norm. - gtol_scaled (float): Convergence tolerance for the scaled gradient norm. - gtol_abs_conjugate_gradient (float): Convergence tolerance for the absolute - gradient norm in the conjugate gradient step of the trust-region - subproblem ("BNTR"). - gtol_rel_conjugate_gradient (float): Convergence tolerance for the relative - gradient norm in the conjugate gradient step of the trust-region - subproblem ("BNTR"). - - Returns: - (dict): Result dictionary containing the following keys: - - ``x`` (np.ndarray): Solution vector of the subproblem of shape (n,) - - ``criterion`` (float): Minimum function value associated with the - solution. - - ``n_iterations`` (int): Number of iterations the algorithm ran before - termination. - - ``success`` (bool): Boolean indicating whether a solution has been found - before reaching maxiter. - - """ - - model_gradient = model.linear_terms - model_hessian = model.square_terms - ( - x_candidate, - f_candidate, - niter, - converged, - convergence_reason, - ) = _bntr_fast_jitted( - model_gradient=model_gradient, - model_hessian=model_hessian, - lower_bounds=lower_bounds, - upper_bounds=upper_bounds, - x_candidate=x_candidate, - conjugate_gradient_method=conjugate_gradient_method, - maxiter=maxiter, - maxiter_gradient_descent=maxiter_gradient_descent, - gtol_abs=gtol_abs, - gtol_rel=gtol_rel, - gtol_scaled=gtol_scaled, - gtol_abs_conjugate_gradient=gtol_abs_conjugate_gradient, - gtol_rel_conjugate_gradient=gtol_rel_conjugate_gradient, - ) - - result = { - "x": x_candidate, - "criterion": f_candidate, - "n_iterations": niter, - "success": converged, - "message": convergence_reason, - } - - return result - - -@njit -def _bntr_fast_jitted( - model_gradient, - model_hessian, - lower_bounds, - upper_bounds, - x_candidate, - conjugate_gradient_method, - maxiter, - maxiter_gradient_descent, - gtol_abs, - gtol_rel, - gtol_scaled, - gtol_abs_conjugate_gradient, - gtol_rel_conjugate_gradient, -): - """Minimize a bounded trust-region subproblem via Newton Conjugate Gradient method. - - Thi is the faster, numba implmementation of the original BNTR algorithm that - gets wrapped in minimize_bntr_fast - - The BNTR (Bounded Newton Trust Rregion) algorithm uses an active-set approach - to solve the symmetric system of equations: - - hessian @ x = - gradient - - only for the inactive parameters of x that lie within the bounds. The active-set - estimation employed here is based on Bertsekas (:cite:`Bertsekas1982`). - - In the main loop, BNTR globalizes the Newton step using a trust-region method - based on the predicted versus actual reduction in the criterion function. - The trust-region radius is increased only if the accepted step is at the - trust-region boundary. - - - Args: - model_gradient (np.ndarray): 1d array of shape (n,) of the linear terms of - surrogate model. - model_hessian (np.ndarray): 2d array of shape (n,n) of the square terms of - the surrogate model. - lower_bounds (np.ndarray): 1d array of shape (n,) with lower bounds - for the parameter vector x. - upper_bounds (np.ndarray): 1d array of shape (n,) with upper bounds - for the parameter vector x. - x_candidate (np.ndarray): Initial guess for the solution of the subproblem. - conjugate_gradient_method (str): Method for computing the conjugate gradient - step. Available conjugate gradient methods are: - - "cg" - - "steihaug_toint" - - "trsbox" (default) - maxiter (int): Maximum number of iterations. If reached, terminate. - maxiter_gradient_descent (int): Maximum number of steepest descent iterations - to perform when the trust-region subsolver BNTR is used. - gtol_abs (float): Convergence tolerance for the absolute gradient norm. - gtol_rel (float): Convergence tolerance for the relative gradient norm. - gtol_scaled (float): Convergence tolerance for the scaled gradient norm. - gtol_abs_conjugate_gradient (float): Convergence tolerance for the absolute - gradient norm in the conjugate gradient step of the trust-region - subproblem ("BNTR"). - gtol_rel_conjugate_gradient (float): Convergence tolerance for the relative - gradient norm in the conjugate gradient step of the trust-region - subproblem ("BNTR"). - - Returns: - x (np.ndarray): Solution vector of the subproblem of shape (n,) - criterion (float): Minimum function value associated with the - solution. - n_iterations (int): Number of iterations the algorithm ran before - termination. - success (bool): Boolean indicating whether a solution has been found - before reaching maxiter. - - """ - - ( - x_candidate, - f_candidate, - gradient_unprojected, - hessian_bounds_inactive, - trustregion_radius, - active_lower_bounds, - active_upper_bounds, - active_fixed_bounds, - inactive_bounds, - converged, - convergence_reason, - ) = _take_preliminary_gradient_descent_step_and_check_for_solution( - model_gradient, - model_hessian, - lower_bounds, - upper_bounds, - x_candidate, - maxiter_gradient_descent, - gtol_abs, - gtol_rel, - gtol_scaled, - ) - - for niter in range(maxiter + 1): - if converged: - break - - x_old = x_candidate - f_old = f_candidate - accept_step = False - - while not accept_step and not converged: - gradient_bounds_inactive = gradient_unprojected[inactive_bounds] - hessian_bounds_inactive = _find_hessian_submatrix_where_bounds_inactive( - model_hessian, inactive_bounds - ) - ( - conjugate_gradient_step, - conjugate_gradient_step_inactive_bounds, - cg_step_norm, - ) = _compute_conjugate_gradient_step( - x_candidate, - gradient_bounds_inactive, - hessian_bounds_inactive, - lower_bounds, - upper_bounds, - inactive_bounds=inactive_bounds, - active_lower_bounds=active_lower_bounds, - active_upper_bounds=active_upper_bounds, - active_fixed_bounds=active_fixed_bounds, - trustregion_radius=trustregion_radius, - conjugate_gradient_method=conjugate_gradient_method, - gtol_abs_conjugate_gradient=gtol_abs_conjugate_gradient, - gtol_rel_conjugate_gradient=gtol_rel_conjugate_gradient, - default_radius=100.00, - min_radius=1e-10, - max_radius=1e10, - ) - - x_unbounded = x_candidate + conjugate_gradient_step - x_candidate = _apply_bounds_to_x_candidate( - x_unbounded, lower_bounds, upper_bounds - ) - - predicted_reduction = ( - _compute_predicted_reduction_from_conjugate_gradient_step( - conjugate_gradient_step, - conjugate_gradient_step_inactive_bounds, - gradient_unprojected, - gradient_bounds_inactive, - hessian_bounds_inactive, - inactive_bounds, - ) - ) - - f_candidate = _evaluate_model_criterion( - x_candidate, model_gradient, model_hessian - ) - actual_reduction = f_old - f_candidate - - trustregion_radius_old = trustregion_radius - ( - trustregion_radius, - accept_step, - ) = _update_trustregion_radius_conjugate_gradient( - f_candidate, - predicted_reduction, - actual_reduction, - cg_step_norm, - trustregion_radius, - min_radius=1e-10, - max_radius=1e10, - eta1=1.0e-4, - eta2=0.25, - eta3=0.50, - eta4=0.90, - alpha1=0.25, - alpha2=0.50, - alpha3=1.00, - alpha4=2.00, - alpha5=4.00, - ) - - if accept_step: - gradient_unprojected = model_gradient + model_hessian @ x_candidate - - ( - active_lower_bounds, - active_upper_bounds, - active_fixed_bounds, - inactive_bounds, - ) = _get_information_on_active_bounds( - x_candidate, - gradient_unprojected, - lower_bounds, - upper_bounds, - ) - else: - x_candidate = x_old - f_candidate = f_old - - if trustregion_radius == trustregion_radius_old: - converged = True - break - - converged, convergence_reason = _check_for_convergence( - x_candidate, - f_candidate, - gradient_unprojected, - model_gradient, - lower_bounds, - upper_bounds, - converged, - convergence_reason, - niter, - maxiter=maxiter, - gtol_abs=gtol_abs, - gtol_rel=gtol_rel, - gtol_scaled=gtol_scaled, - ) - return x_candidate, f_candidate, niter, converged, convergence_reason - - -@njit -def _take_preliminary_gradient_descent_step_and_check_for_solution( - model_gradient, - model_hessian, - lower_bounds, - upper_bounds, - x_candidate, - maxiter_gradient_descent, - gtol_abs, - gtol_rel, - gtol_scaled, -): - """Take a preliminary gradient descent step and check if we found a solution. - - Args: - model_gradient (np.ndarray): 1d array of shape (n,) with linear terms of the - main model. - model_hessian (np.ndarray): 2d array of shape (n,n) with square terms of - the main model - lower_bounds (np.ndarray): 1d array of shape (n,) with lower bounds - for the parameter vector x. - upper_bounds (np.ndarray): 1d array of shape (n,) with upper bounds - for the parameter vector x. - x_candidate (np.ndarray): Initial guess for the solution of the subproblem. - maxiter_gradient_descent (int): Maximum number of iterations in performing - gradient descent step - gtol_abs (float): Convergence tolerance for the absolute gradient norm. - gtol_rel (float): Convergence tolerance for the relative gradient norm. - gtol_scaled (float): Convergence tolerance for the scaled gradient norm. - - Returns: - x_candidate (np.ndarray): Candidate for solution vector of shape (n,). - criterion_candidate (float): Candidate value for solution criterion. - gradient_unprojected (np.ndarray): - - """ - - default_radius = 100.0 - min_radius = 1e-10 - max_radius = 1e10 - theta = 0.25 - mu1 = 0.35 - mu2 = 0.50 - gamma1 = 0.0625 - gamma2 = 0.5 - gamma3 = 2.0 - gamma4 = 5.0 - - converged = False - convergence_reason = 0 - - criterion_candidate = _evaluate_model_criterion( - x_candidate, model_gradient, model_hessian - ) - - ( - active_lower_bounds, - active_upper_bounds, - active_fixed_bounds, - inactive_bounds, - ) = _get_information_on_active_bounds( - x_candidate, - model_gradient, - lower_bounds, - upper_bounds, - ) - - gradient_unprojected = model_gradient + model_hessian @ x_candidate - gradient_projected = _project_gradient_onto_feasible_set( - gradient_unprojected, inactive_bounds - ) - converged, convergence_reason = _check_for_convergence( - x_candidate=x_candidate, - f_candidate=criterion_candidate, - gradient_candidate=gradient_unprojected, - model_gradient=model_gradient, - lower_bounds=lower_bounds, - upper_bounds=upper_bounds, - converged=converged, - reason=convergence_reason, - niter=None, - maxiter=None, - gtol_abs=gtol_abs, - gtol_rel=gtol_rel, - gtol_scaled=gtol_scaled, - ) - - if converged: - hessian_inactive = model_hessian - trustregion_radius = default_radius - else: - hessian_inactive = _find_hessian_submatrix_where_bounds_inactive( - model_hessian, inactive_bounds - ) - - ( - x_candidate_gradient_descent, - f_min_gradient_descent, - step_size_gradient_descent, - trustregion_radius, - radius_lower_bound, - ) = _perform_gradient_descent_step( - x_candidate=x_candidate, - f_candidate_initial=criterion_candidate, - gradient_projected=gradient_projected, - hessian_inactive=hessian_inactive, - model_gradient=model_gradient, - model_hessian=model_hessian, - lower_bounds=lower_bounds, - upper_bounds=upper_bounds, - inactive_bounds=inactive_bounds, - maxiter_steepest_descent=maxiter_gradient_descent, - default_radius=default_radius, - theta=theta, - mu1=mu1, - mu2=mu2, - gamma1=gamma1, - gamma2=gamma2, - gamma3=gamma3, - gamma4=gamma4, - ) - - if f_min_gradient_descent < criterion_candidate: - criterion_candidate = f_min_gradient_descent - - x_unbounded = ( - x_candidate_gradient_descent - - step_size_gradient_descent * gradient_projected - ) - x_candidate = _apply_bounds_to_x_candidate( - x_unbounded, lower_bounds, upper_bounds - ) - - gradient_unprojected = model_gradient + model_hessian @ x_candidate - ( - active_lower_bounds, - active_upper_bounds, - active_fixed_bounds, - inactive_bounds, - ) = _get_information_on_active_bounds( - x_candidate, - gradient_unprojected, - lower_bounds, - upper_bounds, - ) - - gradient_projected = _project_gradient_onto_feasible_set( - gradient_unprojected, inactive_bounds - ) - hessian_inactive = _find_hessian_submatrix_where_bounds_inactive( - model_hessian, inactive_bounds - ) - - converged, convergence_reason = _check_for_convergence( - x_candidate=x_candidate, - f_candidate=criterion_candidate, - gradient_candidate=gradient_projected, - model_gradient=model_gradient, - lower_bounds=lower_bounds, - upper_bounds=upper_bounds, - converged=converged, - reason=convergence_reason, - niter=None, - maxiter=None, - gtol_abs=gtol_abs, - gtol_rel=gtol_rel, - gtol_scaled=gtol_scaled, - ) - - if not converged: - trustregion_radius = min( - max(min_radius, max(trustregion_radius, radius_lower_bound)), max_radius - ) - - return ( - x_candidate, - criterion_candidate, - gradient_unprojected, - hessian_inactive, - trustregion_radius, - active_lower_bounds, - active_upper_bounds, - active_fixed_bounds, - inactive_bounds, - converged, - convergence_reason, - ) - - -@njit -def _compute_conjugate_gradient_step( - x_candidate, - gradient_inactive, - hessian_inactive, - lower_bounds, - upper_bounds, - inactive_bounds, - active_lower_bounds, - active_upper_bounds, - active_fixed_bounds, - trustregion_radius, - conjugate_gradient_method, - gtol_abs_conjugate_gradient, - gtol_rel_conjugate_gradient, - default_radius, - min_radius, - max_radius, -): - """Compute the bounded Conjugate Gradient trust-region step. - Args: - x_candidate (np.ndarray): Candidate solution vector of parameters of len n. - gradient_inactive (np.ndarray): Model gradient where parameter bounds are - inactive. The length depends on the number of inactive bounds. - hessian_inactive (np.ndarray): Model hessian where parameter bounds are - inactive. The shape depends on the number of inactive bounds. - lower_bounds (np.ndarray): 1d array of parameter lower bounds, of length n. - upper_bounds (np.ndarray): 1d array of parameter upper bounds, of length n. - inactive_bounds (np.ndarray): 1d array of indices where parameter bounds are - inactive. - active_lower_bounds (np.ndarray): 1d array of indices where lower bounds of - parameters are inactive. - active_upper_bounds (np.ndarray): 1d array of indices where upper bounds of - parameters are inactive. - trustregion_radius (float): Radius of the trust region. - conjugate_gradient_method (str): The method used in the trust region - minimization problem. - gtol_abs_conjugate_gradient (float): Convergence tolerance for the absolute - gradient norm. - gtol_rel_conjugate_gradient (float): Convergence tolerance for the realtive - gradient norm. - default_radius (float): Default trust-region radius. - min_radius (float): Lower bound on the trust-region radius. - max_radius (float): Upper bound on the trust-region radius. - - Returns: - conjugate_gradient_step (np.ndarray): Conjugate gradient step,of lenght n, with - bounds applied to it. - step_inactive (np.ndarray): Conjugate gradient step,of length n, without bounds - applied to it - step_norm (float): Norm of the conjugate gradient step. - """ - conjugate_gradient_step = np.zeros(len(x_candidate)) - - if not inactive_bounds.any(): - # Save some computation and return an adjusted zero step - step_inactive = _apply_bounds_to_x_candidate( - x_candidate, lower_bounds, upper_bounds - ) - step_norm = np.linalg.norm(step_inactive) - - conjugate_gradient_step = _apply_bounds_to_conjugate_gradient_step( - step_inactive, - x_candidate, - lower_bounds, - upper_bounds, - inactive_bounds, - active_lower_bounds, - active_upper_bounds, - active_fixed_bounds, - ) - - else: - if conjugate_gradient_method == "cg": - step_inactive = minimize_trust_cg_fast( - gradient_inactive, - hessian_inactive, - trustregion_radius, - gtol_abs=gtol_abs_conjugate_gradient, - gtol_rel=gtol_rel_conjugate_gradient, - ) - step_norm = np.linalg.norm(step_inactive) - elif conjugate_gradient_method == "steihaug_toint": - step_inactive = minimize_trust_stcg_fast( - gradient_inactive, - hessian_inactive, - trustregion_radius, - ) - step_norm = np.linalg.norm(step_inactive) - elif conjugate_gradient_method == "trsbox": - step_inactive = minimize_trust_trsbox_fast( - gradient_inactive, - hessian_inactive, - trustregion_radius, - lower_bounds=lower_bounds[inactive_bounds], - upper_bounds=upper_bounds[inactive_bounds], - ) - step_norm = np.linalg.norm(step_inactive) - else: - raise ValueError( - "Invalid method: {conjugate_gradient_method}. " - "Must be one of cg, steihaug_toint, trsbox." - ) - - if trustregion_radius == 0: - if step_norm > 0: - # Accept - trustregion_radius = min(max(min_radius, step_norm), max_radius) - else: - # Re-solve - trustregion_radius = min(max(default_radius, min_radius), max_radius) - if conjugate_gradient_method == "cg": - step_inactive = minimize_trust_cg_fast( - gradient_inactive, - hessian_inactive, - trustregion_radius, - gtol_abs=gtol_abs_conjugate_gradient, - gtol_rel=gtol_rel_conjugate_gradient, - ) - step_norm = np.linalg.norm(step_inactive) - elif conjugate_gradient_method == "steihaug_toint": - step_inactive = minimize_trust_stcg_fast( - gradient_inactive, - hessian_inactive, - trustregion_radius, - ) - step_norm = np.linalg.norm(step_inactive) - elif conjugate_gradient_method == "trsbox": - step_inactive = minimize_trust_trsbox_fast( - gradient_inactive, - hessian_inactive, - trustregion_radius, - lower_bounds=lower_bounds[inactive_bounds], - upper_bounds=upper_bounds[inactive_bounds], - ) - step_norm = np.linalg.norm(step_inactive) - - if step_norm == 0: - raise ValueError("Initial direction is zero.") - - conjugate_gradient_step = _apply_bounds_to_conjugate_gradient_step( - step_inactive, - x_candidate, - lower_bounds, - upper_bounds, - inactive_bounds, - active_lower_bounds, - active_upper_bounds, - active_fixed_bounds, - ) - - return ( - conjugate_gradient_step, - step_inactive, - step_norm, - ) - - -@njit -def _compute_predicted_reduction_from_conjugate_gradient_step( - conjugate_gradient_step, - conjugate_gradient_step_inactive, - gradient_unprojected, - gradient_inactive, - hessian_inactive, - inactive_bounds, -): - """Compute predicted reduction induced by the Conjugate Gradient step. - - Args: - conjugate_gradient_step (np.ndarray): Conjugate gradient step,of lenght n, with - bounds applied to it. - conjugate_gradient_step_inactive (np.ndarray): Conjugate gradient step,of - length n, without bounds applied to it. - gradient_unprojected (np.ndarray): Model gradient of len n. - gradient_inactive (np.ndarray): Model gradient on indices where parameter - bounds are inactive. - hessian_inactive (np.ndarray): Model hessian on indices where parameter bounds - are inactive. - inactive_bounds (np.ndarray): 1d array of indices where parameter bounds - are inactive. - Returns: - predicted_reduction (float): Predicted reduction in criterion function. - - """ - active_bounds = ~inactive_bounds - if active_bounds.any(): - # Projection changed the step, so we have to recompute the step - # and the predicted reduction. Leave the rust radius unchanged. - cg_step_recomp = conjugate_gradient_step[inactive_bounds] - gradient_inactive_recomp = gradient_unprojected[inactive_bounds] - - predicted_reduction = _evaluate_model_criterion( - cg_step_recomp, gradient_inactive_recomp, hessian_inactive - ) - else: - # Step did not change, so we can just recover the - # pre-computed prediction - predicted_reduction = _evaluate_model_criterion( - conjugate_gradient_step_inactive, - gradient_inactive, - hessian_inactive, - ) - predicted_reduction = -predicted_reduction - - return predicted_reduction - - -@njit -def _perform_gradient_descent_step( - x_candidate, - f_candidate_initial, - gradient_projected, - hessian_inactive, - model_gradient, - model_hessian, - lower_bounds, - upper_bounds, - inactive_bounds, - maxiter_steepest_descent, - default_radius, - theta, - mu1, - mu2, - gamma1, - gamma2, - gamma3, - gamma4, -): - """Perform gradient descent step and update trust-region radius.""" - f_min = f_candidate_initial - gradient_norm = np.linalg.norm(gradient_projected) - - trustregion_radius = default_radius - radius_lower_bound = 0 - step_size_accepted = 0 - - for _ in range(maxiter_steepest_descent): - x_old = x_candidate - - step_size_candidate = trustregion_radius / gradient_norm - x_candidate = x_old - step_size_candidate * gradient_projected - - x_candidate = _apply_bounds_to_x_candidate( - x_candidate, lower_bounds, upper_bounds - ) - f_candidate = _evaluate_model_criterion( - x_candidate, model_gradient, model_hessian - ) - - x_diff = x_candidate - x_old - - if f_candidate < f_min: - f_min = f_candidate - step_size_accepted = step_size_candidate - - x_inactive = x_diff[inactive_bounds] - square_terms = x_inactive.T @ hessian_inactive @ x_inactive - - predicted_reduction = trustregion_radius * ( - gradient_norm - - 0.5 * trustregion_radius * square_terms / (gradient_norm**2) - ) - actual_reduction = f_candidate_initial - f_candidate - - ( - trustregion_radius, - radius_lower_bound, - ) = _update_trustregion_radius_and_gradient_descent( - trustregion_radius, - radius_lower_bound, - predicted_reduction, - actual_reduction, - gradient_norm, - theta, - mu1, - mu2, - gamma1, - gamma2, - gamma3, - gamma4, - ) - - return ( - x_candidate, - f_min, - step_size_accepted, - trustregion_radius, - radius_lower_bound, - ) - - -@njit -def _update_trustregion_radius_conjugate_gradient( - f_candidate, - predicted_reduction, - actual_reduction, - x_norm_cg, - trustregion_radius, - min_radius, - max_radius, - alpha1, - alpha2, - alpha3, - alpha4, - alpha5, - eta1, - eta2, - eta3, - eta4, -): - """Update the trust-region radius based on predicted and actual reduction.""" - accept_step = False - - if predicted_reduction < 0 or ~np.isfinite(predicted_reduction): - # Reject and start over - trustregion_radius = alpha1 * min(trustregion_radius, x_norm_cg) - - else: - if ~np.isfinite(actual_reduction): - trustregion_radius = alpha1 * min(trustregion_radius, x_norm_cg) - else: - if abs(actual_reduction) <= max(1, abs(f_candidate) * EPSILON) and abs( - predicted_reduction - ) <= max(1, abs(f_candidate) * EPSILON): - kappa = 1 - else: - kappa = actual_reduction / predicted_reduction - - if kappa < eta1: - # Reject the step - trustregion_radius = alpha1 * min(trustregion_radius, x_norm_cg) - else: - accept_step = True - - # Update the trust-region radius only if the computed step is at the - # trust-radius boundary - if x_norm_cg == trustregion_radius: - if kappa < eta2: - # Marginal bad step - trustregion_radius = alpha2 * trustregion_radius - elif kappa < eta3: - # Reasonable step - trustregion_radius = alpha3 * trustregion_radius - elif kappa < eta4: - trustregion_radius = alpha4 * trustregion_radius - else: - # Very good step - trustregion_radius = alpha5 * trustregion_radius - - trustregion_radius = min(max(trustregion_radius, min_radius), max_radius) - return trustregion_radius, accept_step - - -@njit -def _get_information_on_active_bounds( - x, - gradient_unprojected, - lower_bounds, - upper_bounds, -): - """Return boolean arrays indicating whether bounds at indices are active or not.""" - active_upper = np.zeros(len(x)).astype("bool") - active_lower = np.zeros(len(x)).astype("bool") - active_fixed = np.zeros(len(x)).astype("bool") - inactive = np.ones(len(x)).astype("bool") - for i in range(len(x)): - if (x[i] <= lower_bounds[i]) & (gradient_unprojected[i] > 0): - active_lower[i] = True - inactive[i] = False - elif (x[i] >= upper_bounds[i]) & (gradient_unprojected[i] < 0): - active_upper[i] = True - inactive[i] = False - elif lower_bounds[i] == upper_bounds[i]: - active_fixed[i] = True - inactive[i] = False - return active_lower, active_upper, active_fixed, inactive - - -@njit -def _find_hessian_submatrix_where_bounds_inactive(initial_hessian, inactive_bounds): - """Find the submatrix of the initial hessian where bounds are inactive.""" - hessian_inactive = initial_hessian[:, inactive_bounds][inactive_bounds, :] - return hessian_inactive - - -@njit -def _check_for_convergence( - x_candidate, - f_candidate, - gradient_candidate, - model_gradient, - lower_bounds, - upper_bounds, - converged, - reason, - niter, - maxiter, - gtol_abs, - gtol_rel, - gtol_scaled, -): - """Check if we have found a solution.""" - direction_fischer_burmeister = _get_fischer_burmeister_direction_vector( - x_candidate, gradient_candidate, lower_bounds, upper_bounds - ) - gradient_norm = np.linalg.norm(direction_fischer_burmeister) - gradient_norm_initial = np.linalg.norm(model_gradient) - - if gradient_norm < gtol_abs: - converged = True - reason = 1 - elif f_candidate != 0 and abs(gradient_norm / f_candidate) < gtol_rel: - converged = True - reason = 2 - elif ( - gradient_norm_initial != 0 - and gradient_norm / gradient_norm_initial < gtol_scaled - ): - converged = True - reason = 3 - elif gradient_norm_initial != 0 and gradient_norm == 0 and gtol_scaled == 0: - converged = True - reason = 4 - elif f_candidate <= -np.inf: - converged = True - reason = 5 - elif niter is not None and niter == maxiter: - reason = 6 - - return converged, reason - - -@njit -def _apply_bounds_to_x_candidate(x, lower_bounds, upper_bounds, bound_tol=0): - """Apply upper and lower bounds to the candidate vector.""" - for i in range(len(x)): - if x[i] <= lower_bounds[i] + bound_tol: - x[i] = lower_bounds[i] - elif x[i] >= upper_bounds[i] - bound_tol: - x[i] = upper_bounds[i] - return x - - -@njit -def _project_gradient_onto_feasible_set(gradient_unprojected, inactive_bounds): - """Project gradient onto feasible set, where search directions unconstrained.""" - gradient_projected = np.zeros(len(gradient_unprojected)) - gradient_projected[inactive_bounds] = gradient_unprojected[inactive_bounds] - - return gradient_projected - - -@njit -def _apply_bounds_to_conjugate_gradient_step( - step_inactive, - x_candidate, - lower_bounds, - upper_bounds, - inactive_bounds, - active_lower_bounds, - active_upper_bounds, - active_fixed_bounds, -): - """Apply lower and upper bounds to the Conjugate Gradient step.""" - cg_step = np.zeros(len(x_candidate)) - cg_step[inactive_bounds] = step_inactive - - if active_lower_bounds.any(): - x_active_lower = x_candidate[active_lower_bounds] - lower_bound_active = lower_bounds[active_lower_bounds] - - cg_step[active_lower_bounds] = lower_bound_active - x_active_lower - - if active_upper_bounds.any(): - x_active_upper = x_candidate[active_upper_bounds] - upper_bound_active = upper_bounds[active_upper_bounds] - - cg_step[active_upper_bounds] = upper_bound_active - x_active_upper - - if active_fixed_bounds.any(): - cg_step[active_fixed_bounds] = 0 - - return cg_step - - -@njit -def _update_trustregion_radius_and_gradient_descent( - trustregion_radius, - radius_lower_bound, - predicted_reduction, - actual_reduction, - gradient_norm, - theta, - mu1, - mu2, - gamma1, - gamma2, - gamma3, - gamma4, -): - """Update the trust-region radius and its upper bound.""" - if np.abs(actual_reduction) <= EPSILON and np.abs(predicted_reduction) <= EPSILON: - kappa = 1 - else: - kappa = actual_reduction / predicted_reduction - - tau_1 = ( - theta - * gradient_norm - * trustregion_radius - / ( - theta * gradient_norm * trustregion_radius - + (1 - theta) * predicted_reduction - - actual_reduction - ) - ) - tau_2 = ( - theta - * gradient_norm - * trustregion_radius - / ( - theta * gradient_norm * trustregion_radius - - (1 + theta) * predicted_reduction - + actual_reduction - ) - ) - - tau_min = min(tau_1, tau_2) - tau_max = max(tau_1, tau_2) - - if np.abs(kappa - 1) <= mu1: - # Great agreement - radius_lower_bound = max(radius_lower_bound, trustregion_radius) - - if tau_max < 1: - tau = gamma3 - elif tau_max > gamma4: - tau = gamma4 - else: - tau = tau_max - - elif np.abs(kappa - 1) <= mu2: - # Good agreement - radius_lower_bound = max(radius_lower_bound, trustregion_radius) - - if tau_max < gamma2: - tau = gamma2 - elif tau_max > gamma3: - tau = gamma3 - else: - tau = tau_max - - else: - # Not good agreement - if tau_min > 1: - tau = gamma2 - elif tau_max < gamma1: - tau = gamma1 - elif (tau_min < gamma1) and (tau_max >= 1): - tau = gamma1 - elif ( - (tau_1 >= gamma1) and (tau_1 < 1.0) and ((tau_2 < gamma1) or (tau_2 >= 1.0)) - ): - tau = tau_1 - elif ( - (tau_2 >= gamma1) and (tau_2 < 1.0) and ((tau_1 < gamma1) or (tau_2 >= 1.0)) - ): - tau = tau_2 - else: - tau = tau_max - - trustregion_radius = trustregion_radius * tau - - return trustregion_radius, radius_lower_bound - - -@njit -def _get_fischer_burmeister_direction_vector(x, gradient, lower_bounds, upper_bounds): - """Compute the constrained direction vector via the Fischer-Burmeister function.""" - direction = np.zeros(len(x)) - for i, (x_, g_, l_, u_) in enumerate(zip(x, gradient, lower_bounds, upper_bounds)): - fischer_scalar = _get_fischer_burmeister_scalar(u_ - x_, -g_) - fischer_scalar = _get_fischer_burmeister_scalar(fischer_scalar, x_ - l_) - - if l_ == u_: - direction[i] = l_ - x_ - else: - direction[i] = fischer_scalar - return direction - - -@njit -def _get_fischer_burmeister_scalar(a, b): - """Get the value of the Fischer-Burmeister function for two scalar inputs. - - This method was suggested by Bob Vanderbei. Since the Fischer-Burmeister - is symmetric, the order of the scalar inputs does not matter. - - Args: - a (float): First input. - b (float): Second input. - - Returns: - float: Value of the Fischer-Burmeister function for inputs a and b. - - """ - if a + b <= 0: - fischer_burmeister = np.sqrt(a**2 + b**2) - (a + b) - else: - fischer_burmeister = -2 * a * b / (np.sqrt(a**2 + b**2) + (a + b)) - - return fischer_burmeister - - -@njit -def _evaluate_model_criterion( - x, - gradient, - hessian, -): - """Evaluate the criterion function value of the main model. - - Args: - x (np.ndarray): Parameter vector of shape (n,). - gradient (np.ndarray): Gradient of shape (n,) for which the main model - shall be evaluated. - hessian (np.ndarray): Hessian of shape (n, n) for which the main model - shall be evaulated. - - Returns: - float: Criterion value of the main model. - - """ - return gradient.T @ x + 0.5 * x.T @ hessian @ x diff --git a/src/estimagic/optimization/subsolvers/gqtpar_fast.py b/src/estimagic/optimization/subsolvers/gqtpar_fast.py deleted file mode 100644 index 26e4a8da0..000000000 --- a/src/estimagic/optimization/subsolvers/gqtpar_fast.py +++ /dev/null @@ -1,668 +0,0 @@ -"""Auxiliary functions for the quadratic GQTPAR trust-region subsolver.""" -import numpy as np -from numba import njit -from scipy.linalg import cho_solve, solve_triangular -from scipy.linalg.lapack import dpotrf as compute_cholesky_factorization - - -def gqtpar_fast(model, x_candidate, *, k_easy=0.1, k_hard=0.2, maxiter=200): - """Solve the quadratic trust-region subproblem via nearly exact iterative method. - - This subproblem solver is mainly based on Conn et al. (2000) "Trust region methods" - (:cite:`Conn2000`), pp. 169-200. - - But ideas from Nocedal and Wright (2006) "Numerical optimization" - (:cite:`Nocedal2006`), pp. 83-91, who implement a similar algorithm, - were also used. - - The original algorithm was developed by More and Sorensen (1983) (:cite:`More1983`) - and is known as "GQTPAR". - - The vector x* is a global solution to the quadratic subproblem: - - min_x f + g @ x + 0.5 * x.T @ H @ x, - - if and only if ||x|| <= trustregion_radius - and if there is a scalar lambda >= 0, such that: - - 1) (H + lambda * I(n)) x* = -g - 2) lambda (trustregion_radius - ||x*||) = 0 - 3) H + lambda * I is positive definite - - where g denotes the gradient and H the hessian of the quadratic model, - respectively. - - k_easy and k_hard are stopping criteria for the iterative subproblem solver. - See pp. 194-197 in :cite:`Conn2000` for a more detailed description. - - Args: - model (NamedTuple): NamedTuple containing the parameters of the main model, i.e. - - ``linear_terms``, a np.ndarray of shape (n,) and - - ``square_terms``, a np.ndarray of shape (n,n). - x_candidate (np.ndarray): Initial guess for the solution of the subproblem. - k_easy (float): topping criterion for the "easy" case. - k_hard (float): Stopping criterion for the "hard" case. - maxiter (int): Maximum number of iterations to perform. If reached, - terminate. - - Returns: - (dict): Result dictionary containing the following keys: - - ``x`` (np.ndarray): Solution vector of the subproblem of shape (n,) - - ``criterion`` (float): Minimum function value associated with the - solution. - - """ - hessian_already_factorized = False - model_gradient = model.linear_terms - model_hessian = model.square_terms - - # Small floating point number signaling that for vectors smaller - # than that backward substituition is not reliable. - # See Golub, G. H., Van Loan, C. F. (2013), "Matrix computations", p.165. - zero_threshold = ( - model_hessian.shape[0] * np.finfo(float).eps * _norm(model_hessian, np.Inf) - ) - stopping_criteria = { - "k_easy": k_easy, - "k_hard": k_hard, - } - - gradient_norm = _norm(model_gradient, -1) - ( - lambda_candidate, - lambda_lower_bound, - lambda_upper_bound, - ) = _get_initial_guess_for_lambdas(model_gradient, model_hessian) - - converged = False - - for _niter in range(maxiter): - if hessian_already_factorized: - hessian_already_factorized = False - else: - ( - hessian_plus_lambda, - hessian_upper_triangular, - factorization_info, - ) = _add_lambda_and_factorize_hessian(model_hessian, lambda_candidate) - - if factorization_info == 0 and gradient_norm > zero_threshold: - ( - x_candidate, - hessian_plus_lambda, - hessian_already_factorized, - lambda_candidate, - lambda_lower_bound, - lambda_upper_bound, - converged, - ) = _find_new_candidate_and_update_parameters( - model_gradient, - model_hessian, - hessian_upper_triangular, - hessian_plus_lambda, - hessian_already_factorized, - lambda_candidate, - lambda_lower_bound, - lambda_upper_bound, - stopping_criteria, - converged, - ) - - elif factorization_info == 0 and gradient_norm <= zero_threshold: - ( - x_candidate, - lambda_candidate, - lambda_lower_bound, - lambda_upper_bound, - converged, - ) = _check_for_interior_convergence_and_update( - x_candidate, - hessian_upper_triangular, - lambda_candidate, - lambda_lower_bound, - lambda_upper_bound, - stopping_criteria, - converged, - ) - - else: - ( - lambda_candidate, - lambda_lower_bound, - ) = _update_lambdas_when_factorization_unsuccessful( - hessian_upper_triangular, - hessian_plus_lambda, - lambda_candidate, - lambda_lower_bound, - lambda_upper_bound, - factorization_info, - ) - - if converged: - break - - f_min = ( - model_gradient.T @ x_candidate - + 0.5 * x_candidate.T @ model_hessian @ x_candidate - ) - result = { - "x": x_candidate, - "criterion": f_min, - "n_iterations": _niter, - "success": converged, - } - - return result - - -@njit -def _get_initial_guess_for_lambdas(model_gradient, model_hessian): - """Return good initial guesses for lambda, its lower and upper bound. - - The values are chosen accordingly to the guidelines on - section 7.3.8 (p. 192) from :cite:`Conn2000`. - - Args: - model_gradient (np.ndarray): 1d array, of len n, of linear terms of the - surrogate model. - model_hessian (np.ndarray): 2d array, of shape (n,n), of square terms of the - surrogate model. - - Returns: - lambda_candidate (float): initial guess for damping factor - lambda_lower_bound (float): initial guess for the lower bound of the damping - factor. - lambda_upper_bound(float): initial guess for the upper bound of the damping - factor. - - """ - gradient_norm = _norm(model_gradient, -1.0) - model_hessian = model_hessian - - hessian_infinity_norm = _norm(model_hessian, np.Inf) - hessian_frobenius_norm = _norm(model_hessian, -1.0) - - hessian_gershgorin_lower, hessian_gershgorin_upper = _compute_gershgorin_bounds( - model_hessian - ) - - lambda_lower_bound = max( - 0, - -min(np.diag(model_hessian)), - gradient_norm - - min(hessian_gershgorin_upper, hessian_frobenius_norm, hessian_infinity_norm), - ) - lambda_upper_bound = max( - 0, - gradient_norm - + min(-hessian_gershgorin_lower, hessian_frobenius_norm, hessian_infinity_norm), - ) - - if lambda_lower_bound == 0: - lambda_candidate = 0 - else: - lambda_candidate = _get_new_lambda_candidate( - lower_bound=lambda_lower_bound, upper_bound=lambda_upper_bound - ) - - return lambda_candidate, lambda_lower_bound, lambda_upper_bound - - -def _add_lambda_and_factorize_hessian(model_hessian, lambda_candidate): - """Add lambda to hessian and factorize it into its upper triangular matrix. - - Args: - model_hessian (np.ndarray): 2d array, of shape (n,n), of square terms of the - surrogate model. - lambda_candidate (float): dampig factor. - Returns: - hessian_plus_lambda (np.ndarray): The square terms of the main model - plus the identity matrix times lambda. 2d array of shape (n, n). - hessian_upper_triangular (np.ndarray): Factorization of the hessian from the - main model into its upper triangular matrix. The diagonal is filled - and the lower lower triangular contains zeros. - factorization_info (int): success flag returned by scipy.dpotrf - - """ - hessian_plus_lambda = model_hessian + lambda_candidate * _identity( - model_hessian.shape[0] - ) - hessian_upper_triangular, factorization_info = compute_cholesky_factorization( - hessian_plus_lambda, - lower=False, - overwrite_a=False, - clean=True, - ) - - return hessian_plus_lambda, hessian_upper_triangular, factorization_info - - -def _find_new_candidate_and_update_parameters( - model_gradient, - model_hessian, - hessian_upper_triangular, - hessian_plus_lambda, - hessian_already_factorized, - lambda_candidate, - lambda_lower_bound, - lambda_upper_bound, - stopping_criteria, - converged, -): - """Find new candidate vector and update transformed hessian and lambdas.""" - x_candidate = cho_solve( - (hessian_upper_triangular, False), - -model_gradient, - ) - x_norm = _norm(x_candidate, -1.0) - - if x_norm <= 1 and lambda_candidate == 0: - converged = True - - w = solve_triangular( - hessian_upper_triangular, - x_candidate, - trans="T", - ) - w_norm = _norm(w, -1.0) - - newton_step = _compute_newton_step(lambda_candidate, x_norm, w_norm) - - if x_norm < 1: - ( - x_candidate, - hessian_plus_lambda, - hessian_already_factorized, - lambda_new_candidate, - lambda_new_lower_bound, - lambda_new_upper_bound, - converged, - ) = _update_candidate_and_parameters_when_candidate_within_trustregion( - x_candidate, - model_hessian, - hessian_upper_triangular, - hessian_plus_lambda, - hessian_already_factorized, - lambda_candidate, - lambda_lower_bound, - newton_step, - stopping_criteria, - converged, - ) - - else: - if abs(x_norm - 1) <= stopping_criteria["k_easy"]: - converged = True - lambda_new_candidate = newton_step - lambda_new_lower_bound = lambda_candidate - lambda_new_upper_bound = lambda_upper_bound - return ( - x_candidate, - hessian_plus_lambda, - hessian_already_factorized, - lambda_new_candidate, - lambda_new_lower_bound, - lambda_new_upper_bound, - converged, - ) - - -def _check_for_interior_convergence_and_update( - x_candidate, - hessian_upper_triangular, - lambda_candidate, - lambda_lower_bound, - lambda_upper_bound, - stopping_criteria, - converged, -): - """Check for interior convergence, update candidate vector and lambdas.""" - if lambda_candidate == 0: - x_candidate = np.zeros(len(x_candidate)) - converged = True - - s_min, z_min = _estimate_smallest_singular_value(hessian_upper_triangular) - step_len = 2 - - if step_len**2 * s_min**2 <= stopping_criteria["k_hard"] * lambda_candidate: - x_candidate = step_len * z_min - converged = True - - lambda_lower_bound = max(lambda_lower_bound, lambda_upper_bound - s_min**2) - lambda_new_candidate = _get_new_lambda_candidate( - lower_bound=lambda_lower_bound, upper_bound=lambda_candidate - ) - return ( - x_candidate, - lambda_new_candidate, - lambda_lower_bound, - lambda_candidate, - converged, - ) - - -def _update_lambdas_when_factorization_unsuccessful( - hessian_upper_triangular, - hessian_plus_lambda, - lambda_candidate, - lambda_lower_bound, - lambda_upper_bound, - factorization_info, -): - """Update lambdas in the case that factorization of hessian not successful.""" - delta, v = _compute_terms_to_make_leading_submatrix_singular( - hessian_upper_triangular, - hessian_plus_lambda, - factorization_info, - ) - v_norm = _norm(v, -1.0) - - lambda_lower_bound = max(lambda_lower_bound, lambda_candidate + delta / v_norm**2) - lambda_new_candidate = _get_new_lambda_candidate( - lower_bound=lambda_lower_bound, upper_bound=lambda_upper_bound - ) - return lambda_new_candidate, lambda_lower_bound - - -@njit -def _get_new_lambda_candidate(lower_bound, upper_bound): - """Update current lambda so that it lies within its bounds. - - Args: - lower_boud (float): lower bound of the current candidate dumping factor. - upper_bound(float): upper bound of the current candidate dumping factor. - - Returns: - float: New candidate for the damping factor lambda. - - """ - lambda_new_candidate = max( - np.sqrt(max(0, lower_bound * upper_bound)), - lower_bound + 0.01 * (upper_bound - lower_bound), - ) - - return lambda_new_candidate - - -@njit -def _compute_gershgorin_bounds(model_hessian): - """Compute upper and lower Gregoshgorin bounds for a square matrix. - - The Gregoshgorin bounds are the upper and lower bounds for the - eigenvalues of the square hessian matrix (i.e. the square terms of - the main model). See :cite:`Conn2000`. - - Args: - model_hessian (np.ndarray): 2d array, of shape (n,n), with square terms of the - surrogate model - Returns: - lower_gershgorin (float): Lower Gregoshgorin bound. - upper_gershgorin (float): Upper Gregoshgorin bound. - - """ - hessian_diag = np.diag(model_hessian) - hessian_diag_abs = np.abs(hessian_diag) - hessian_row_sums = np.sum(np.abs(model_hessian), axis=1) - - lower_gershgorin = np.min(hessian_diag + hessian_diag_abs - hessian_row_sums) - upper_gershgorin = np.max(hessian_diag - hessian_diag_abs + hessian_row_sums) - - return lower_gershgorin, upper_gershgorin - - -@njit -def _compute_newton_step(lambda_candidate, p_norm, w_norm): - """Compute the Newton step. - - Args: - lambda_candidate (float): Damping factor. - p_norm (float): Frobenius (i.e. L2-norm) of the candidate vector. - w_norm (float): Frobenius (i.e. L2-norm) of vector w, which is the solution - to the following triangular system: U.T w = p. - - Returns: - float: Newton step computed according to formula (4.44) p.87 - from Nocedal and Wright (2006). - - """ - return lambda_candidate + (p_norm / w_norm) ** 2 * (p_norm - 1) - - -def _update_candidate_and_parameters_when_candidate_within_trustregion( - x_candidate, - model_hessian, - hessian_upper_triangular, - hessian_plus_lambda, - hessian_already_factorized, - lambda_candidate, - lambda_lower_bound, - newton_step, - stopping_criteria, - converged, -): - """Update candidate vector, hessian, and lambdas when x outside trust-region.""" - - s_min, z_min = _estimate_smallest_singular_value(hessian_upper_triangular) - step_len = _compute_smallest_step_len_for_candidate_vector(x_candidate, z_min) - - quadratic_term = x_candidate.T @ hessian_plus_lambda @ x_candidate - - relative_error = (step_len**2 * s_min**2) / (quadratic_term + lambda_candidate) - if relative_error <= stopping_criteria["k_hard"]: - x_candidate = x_candidate + step_len * z_min - converged = True - - lambda_new_lower_bound = max(lambda_lower_bound, lambda_candidate - s_min**2) - - hessian_plus_lambda = model_hessian + newton_step * _identity(len(x_candidate)) - _, factorization_unsuccessful = compute_cholesky_factorization( - hessian_plus_lambda, - lower=False, - overwrite_a=False, - clean=True, - ) - - if factorization_unsuccessful == 0: - hessian_already_factorized = True - lambda_new_candidate = newton_step - else: - lambda_new_lower_bound = max(lambda_new_lower_bound, newton_step) - lambda_new_candidate = _get_new_lambda_candidate( - lower_bound=lambda_new_lower_bound, upper_bound=lambda_candidate - ) - - lambda_new_upper_bound = lambda_candidate - - return ( - x_candidate, - hessian_plus_lambda, - hessian_already_factorized, - lambda_new_candidate, - lambda_new_lower_bound, - lambda_new_upper_bound, - converged, - ) - - -@njit -def _compute_smallest_step_len_for_candidate_vector(x_candidate, z_min): - """Compute the smallest step length for the candidate vector. - - Choose step_length with the smallest magnitude. - The reason for this choice is explained at p. 6 in :cite:`More1983`, - just before the formula for tau. - - Args: - x_candidate (np.ndarray): Candidate vector of shape (n,). - z_min (float): Smallest singular value of the hessian matrix. - - Returns: - float: Step length with the smallest magnitude. - - """ - a = z_min @ z_min - b = 2 * x_candidate.T @ z_min - c = x_candidate.T @ x_candidate - 1 - ta, tb = np.roots(np.array([a, b, c])) - if abs(ta) <= abs(tb): - step_len = ta - else: - step_len = tb - return step_len - - -def _compute_terms_to_make_leading_submatrix_singular( - hessian_upper_triangular, hessian_plus_lambda, k -): - """Compute terms that make the leading submatrix of the hessian singular. - - The "hessian" here refers to the matrix - - H + lambda * I(n), - - where H is the initial hessian, lambda is the current damping factor, - I the identity matrix, and m the number of rows/columns of the symmetric - hessian matrix. - - Args: - hessian_upper_triangular (np.ndarray) Upper triangular matrix resulting of an - incomplete Cholesky decomposition of the hessian matrix. - hessian_plus_lambda (np.ndarray): Symmetric k by k hessian matrix, which is not - positive definite. - k (int): Positive integer such that the leading k by k submatrix from - hessian is the first non-positive definite leading submatrix. - - Returns: - Tuple: - - delta(float): Amount that should be added to the element (k, k) of - the leading k by k submatrix of the hessian to make it singular. - - v (np.ndarray): A vector such that ``v.T B v = 0``. Where B is the - hessian after ``delta`` is added to its element (k, k). - - """ - hessian_plus_lambda = hessian_plus_lambda - upper_triangular = hessian_upper_triangular - - delta = ( - np.sum(upper_triangular[: k - 1, k - 1] ** 2) - - hessian_plus_lambda[k - 1, k - 1] - ) - - v = np.zeros(len(hessian_plus_lambda)) - v[k - 1] = 1 - - if k != 1: - v[: k - 1] = solve_triangular( - upper_triangular[: k - 1, : k - 1], - -upper_triangular[: k - 1, k - 1], - ) - - return delta, v - - -@njit -def _estimate_condition(u): - """Return largest possible solution w to the system u.T w = e. - - u is an upper triangular matrix, and components of e are selected from {+1, -1}. - - Args: - u (np.ndarray): Upper triangular matrix of shape (n,n). - Returns: - w (np.ndarray): 1d array of len n. - - """ - u = np.atleast_2d(u) - - if u.shape[0] != u.shape[1]: - raise ValueError("A square triangular matrix should be provided.") - - # A vector `e` with components selected from {+1, -1} - # is selected so that the solution `w` to the system - # `U.T w = e` is as large as possible. Implementation - # based on algorithm 3.5.1, p. 142, from reference [2] - # adapted for lower triangular matrix. - m = u.shape[0] - p = np.zeros(m) - w = np.zeros(m) - - # Implemented according to: Golub, G. H., Van Loan, C. F. (2013). - # "Matrix computations". Forth Edition. JHU press. pp. 140-142. - for k in range(m): - wp = (1 - p[k]) / u.T[k, k] - wm = (-1 - p[k]) / u.T[k, k] - pp = p[k + 1 :] + u.T[k + 1 :, k] * wp - pm = p[k + 1 :] + u.T[k + 1 :, k] * wm - - if abs(wp) + _norm(pp, 1) >= abs(wm) + _norm(pm, 1): - w[k] = wp - p[k + 1 :] = pp - else: - w[k] = wm - p[k + 1 :] = pm - return w - - -def _estimate_smallest_singular_value(upper_triangular): - """Estimate the smallest singular vlue and the correspondent right singular vector. - - Given an upper triangular matrix `u`, performs in O(n**2) operations and returns - estimated values of smalles singular value and the correspondent right singular - vector. - - Based on estimate_smallest_singular_value from scipy.optimize._trustregion_exact, - jitting some calculations in a separate function and calling them here. - - Args: - upper_triangular (np.ndarray) : Square upper triangular matrix of shape (n,n) - - Returns: - s_min (float): Estimated smallest singular value of the provided matrix. - z_min (np.ndarray): Estimatied right singular vector. - - Notes: - The procedure is based on [1] and is done in two steps. First, it finds - a vector ``e`` with components selected from {+1, -1} such that the - solution ``w`` from the system ``U.T w = e`` is as large as possible. - Next it estimate ``U v = w``. The smallest singular value is close - to ``norm(w)/norm(v)`` and the right singular vector is close - to ``v/norm(v)``. - The estimation will be better more ill-conditioned is the matrix. - - References: - .. [1] Cline, A. K., Moler, C. B., Stewart, G. W., Wilkinson, J. H. - An estimate for the condition number of a matrix. 1979. - SIAM Journal on Numerical Analysis, 16(2), 368-375. - - """ - w = _estimate_condition(upper_triangular) - - # The system `U v = w` is solved using backward substitution. - v = solve_triangular(upper_triangular, w) - - v_norm = _norm(v, -1.0) - w_norm = _norm(w, -1.0) - - # Smallest singular value - s_min = w_norm / v_norm - - # Associated vector - z_min = v / v_norm - - return s_min, z_min - - -@njit -def _norm(a, order): - """A wrapper to jit np.linalg.norm.""" - if order == -1: - out = np.linalg.norm(a) - else: - out = np.linalg.norm(a, order) - return out - - -@njit -def _identity(dim): - """A wrapper to jit np.eye.""" - return np.eye(dim) diff --git a/src/estimagic/optimization/tranquilo.py b/src/estimagic/optimization/tranquilo.py new file mode 100644 index 000000000..6574d936c --- /dev/null +++ b/src/estimagic/optimization/tranquilo.py @@ -0,0 +1,29 @@ +from estimagic.config import IS_TRANQUILO_INSTALLED + + +if IS_TRANQUILO_INSTALLED: + from tranquilo.tranquilo import _tranquilo + from functools import partial + from estimagic.decorators import mark_minimizer + + tranquilo = mark_minimizer( + func=partial(_tranquilo, functype="scalar"), + name="tranquilo", + primary_criterion_entry="value", + needs_scaling=True, + is_available=True, + is_global=False, + ) + + tranquilo_ls = mark_minimizer( + func=partial(_tranquilo, functype="least_squares"), + primary_criterion_entry="root_contributions", + name="tranquilo_ls", + needs_scaling=True, + is_available=True, + is_global=False, + ) + + __all__ = ["tranquilo", "tranquilo_ls"] +else: + __all__ = [] diff --git a/src/estimagic/optimization/tranquilo/__init__.py b/src/estimagic/optimization/tranquilo/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/src/estimagic/optimization/tranquilo/acceptance_decision.py b/src/estimagic/optimization/tranquilo/acceptance_decision.py deleted file mode 100644 index d2303320b..000000000 --- a/src/estimagic/optimization/tranquilo/acceptance_decision.py +++ /dev/null @@ -1,244 +0,0 @@ -"""Functions that decide what is the next accepted point, given a candidate. - -Decision functions can simply decide whether or not the candidate is accepted but can -also do own function evaluations and decide to accept a different point. - -""" -from typing import NamedTuple - -import numpy as np - -from estimagic.optimization.tranquilo.acceptance_sample_size import ( - get_acceptance_sample_sizes, -) -from estimagic.optimization.tranquilo.get_component import get_component -from estimagic.optimization.tranquilo.options import AcceptanceOptions - - -def get_acceptance_decider(acceptance_decider, acceptance_options): - func_dict = { - "classic": _accept_classic, - "naive_noisy": accept_naive_noisy, - "noisy": accept_noisy, - } - - out = get_component( - name_or_func=acceptance_decider, - func_dict=func_dict, - component_name="acceptance_decider", - user_options=acceptance_options, - default_options=AcceptanceOptions(), - ) - - return out - - -def _accept_classic( - subproblem_solution, - state, - history, - *, - wrapped_criterion, - min_improvement, -): - """Do a classic acceptance step for a trustregion algorithm. - - Args: - subproblem_solution (SubproblemResult): Result of the subproblem solution. - state (State): Namedtuple containing the trustregion, criterion value of - previously accepted point, indices of model points, etc. - wrapped_criterion (callable): The criterion function. - min_improvement (float): Minimum improvement required to accept a point. - - Returns: - AcceptanceResult - - """ - out = _accept_simple( - subproblem_solution=subproblem_solution, - state=state, - history=history, - wrapped_criterion=wrapped_criterion, - min_improvement=min_improvement, - n_evals=1, - ) - return out - - -def accept_naive_noisy( - subproblem_solution, - state, - history, - *, - wrapped_criterion, - min_improvement, -): - """Do a naive noisy acceptance step, averaging over a fixed number of points.""" - out = _accept_simple( - subproblem_solution=subproblem_solution, - state=state, - history=history, - wrapped_criterion=wrapped_criterion, - min_improvement=min_improvement, - n_evals=5, - ) - return out - - -def _accept_simple( - subproblem_solution, - state, - history, - *, - wrapped_criterion, - min_improvement, - n_evals, -): - """Do a classic acceptance step for a trustregion algorithm. - - Args: - subproblem_solution (SubproblemResult): Result of the subproblem solution. - state (State): Namedtuple containing the trustregion, criterion value of - previously accepted point, indices of model points, etc. - wrapped_criterion (callable): The criterion function. - min_improvement (float): Minimum improvement required to accept a point. - - Returns: - AcceptanceResult - - """ - candidate_x = subproblem_solution.x - - candidate_index = history.add_xs(candidate_x) - - wrapped_criterion({candidate_index: n_evals}) - - candidate_fval = np.mean(history.get_fvals(candidate_index)) - - actual_improvement = -(candidate_fval - state.fval) - - rho = calculate_rho( - actual_improvement=actual_improvement, - expected_improvement=subproblem_solution.expected_improvement, - ) - - is_accepted = actual_improvement >= min_improvement - - res = _get_acceptance_result( - candidate_x=candidate_x, - candidate_fval=candidate_fval, - candidate_index=candidate_index, - rho=rho, - is_accepted=is_accepted, - old_state=state, - ) - - return res - - -def accept_noisy( - subproblem_solution, - state, - noise_variance, - history, - *, - wrapped_criterion, - min_improvement, - power_level, - confidence_level, - n_min, - n_max, -): - candidate_x = subproblem_solution.x - candidate_index = history.add_xs(candidate_x) - existing_n1 = len(history.get_fvals(state.index)) - - n_1, n_2 = get_acceptance_sample_sizes( - sigma=np.sqrt(noise_variance), - existing_n1=existing_n1, - expected_improvement=subproblem_solution.expected_improvement, - power_level=power_level, - confidence_level=confidence_level, - n_min=n_min, - n_max=n_max, - ) - - eval_info = { - state.index: n_1, - candidate_index: n_2, - } - - wrapped_criterion(eval_info) - - current_fval = history.get_fvals(state.index).mean() - candidate_fval = history.get_fvals(candidate_index).mean() - - actual_improvement = -(candidate_fval - current_fval) - - rho = calculate_rho( - actual_improvement=actual_improvement, - expected_improvement=subproblem_solution.expected_improvement, - ) - - is_accepted = actual_improvement >= min_improvement - - res = _get_acceptance_result( - candidate_x=candidate_x, - candidate_fval=candidate_fval, - candidate_index=candidate_index, - rho=rho, - is_accepted=is_accepted, - old_state=state, - ) - - return res - - -class AcceptanceResult(NamedTuple): - x: np.ndarray - fval: float - index: int - rho: float - accepted: bool - step_length: float - relative_step_length: float - candidate_index: int - candidate_x: np.ndarray - - -def _get_acceptance_result( - candidate_x, - candidate_fval, - candidate_index, - rho, - is_accepted, - old_state, -): - x = candidate_x if is_accepted else old_state.x - fval = candidate_fval if is_accepted else old_state.fval - index = candidate_index if is_accepted else old_state.index - step_length = np.linalg.norm(x - old_state.x, ord=2) - relative_step_length = step_length / old_state.trustregion.radius - - out = AcceptanceResult( - x=x, - fval=fval, - index=index, - rho=rho, - accepted=is_accepted, - step_length=step_length, - relative_step_length=relative_step_length, - candidate_index=candidate_index, - candidate_x=candidate_x, - ) - return out - - -def calculate_rho(actual_improvement, expected_improvement): - if expected_improvement == 0 and actual_improvement > 0: - rho = np.inf - elif expected_improvement == 0: - rho = -np.inf - else: - rho = actual_improvement / expected_improvement - return rho diff --git a/src/estimagic/optimization/tranquilo/acceptance_sample_size.py b/src/estimagic/optimization/tranquilo/acceptance_sample_size.py deleted file mode 100644 index 53f5b0208..000000000 --- a/src/estimagic/optimization/tranquilo/acceptance_sample_size.py +++ /dev/null @@ -1,69 +0,0 @@ -import numpy as np -from scipy.stats import norm - - -def get_acceptance_sample_sizes( - sigma, - existing_n1, - expected_improvement, - power_level, - confidence_level, - n_min, - n_max, -): - n1_raw, n2_raw = _get_optimal_sample_sizes( - sd_1=sigma, - sd_2=sigma, - existing_n1=existing_n1, - minimal_effect_size=np.clip(expected_improvement, 1e-8, np.inf), - power_level=power_level, - significance_level=1 - confidence_level, - ) - - n1 = int(np.ceil(np.clip(n1_raw, 0, max(0, n_max - existing_n1)))) - n2 = int(np.ceil(np.clip(n2_raw, n_min, n_max))) - return n1, n2 - - -def _get_optimal_sample_sizes( - sd_1, sd_2, existing_n1, minimal_effect_size, power_level, significance_level -): - """Return missing sample sizes. - - Args: - sd_1 (float): Standard deviation of the first group. - sd_2 (float): Standard deviation of the second group. - existing_n1 (int): Number of samples in the first group. - minimal_effect_size (float): Minimal effect size. - power_level (float): Power level. - significance_level (float): Significance level. - - Returns: - tuple: Missing sample sizes. - - """ - factor = _compute_factor(minimal_effect_size, power_level, significance_level) - - n1_optimal = (sd_1 * (sd_2 + sd_1)) * factor - n2_optimal = (sd_2 * (sd_2 + sd_1)) * factor - - if existing_n1 <= n1_optimal: - n1 = n1_optimal - existing_n1 - n2 = n2_optimal - else: - n1 = 0 - n2 = sd_2**2 * (factor ** (-1) - sd_1**2 / existing_n1) ** (-1) - - return n1, n2 - - -def _compute_factor(minimal_effect_size, power_level, significance_level): - # avoid division by zero warning; will be clipped later - if minimal_effect_size == 0: - factor = np.inf - else: - factor = ( - (norm.ppf(1 - significance_level) + norm.ppf(power_level)) - / minimal_effect_size - ) ** 2 - return factor diff --git a/src/estimagic/optimization/tranquilo/adjust_radius.py b/src/estimagic/optimization/tranquilo/adjust_radius.py deleted file mode 100644 index 5972f4fa5..000000000 --- a/src/estimagic/optimization/tranquilo/adjust_radius.py +++ /dev/null @@ -1,42 +0,0 @@ -import numpy as np - - -def adjust_radius(radius, rho, step_length, options): - """Adjust the trustregion radius based on relative improvement and stepsize. - - This is just a slight generalization of the pounders radius adjustment. With default - options it yields the same result. - - Noise handling is not built-in here. It will be achieved by calling the - function with a noise-adjusted rho. - - Args: - radius (float): The current trust-region radius. - rho (float): Actual over expected improvement between the last two accepted - parameter vectors. - step (np.ndarray): The step between the last two accepted parameter vectors. - options (NamedTuple): Options for radius management. - - Returns: - float: The updated radius. - - """ - is_large_step = step_length / radius >= options.large_step - - if rho >= options.rho_increase and is_large_step: - new_radius = radius * options.expansion_factor - elif rho >= options.rho_decrease: - new_radius = radius - else: - new_radius = radius * options.shrinking_factor - - if np.isfinite(options.max_radius_to_step_ratio): - max_radius = np.min( - [options.max_radius, step_length * options.max_radius_to_step_ratio] - ) - else: - max_radius = options.max_radius - - new_radius = np.clip(new_radius, options.min_radius, max_radius) - - return new_radius diff --git a/src/estimagic/optimization/tranquilo/aggregate_models.py b/src/estimagic/optimization/tranquilo/aggregate_models.py deleted file mode 100644 index 3de7c6408..000000000 --- a/src/estimagic/optimization/tranquilo/aggregate_models.py +++ /dev/null @@ -1,152 +0,0 @@ -from functools import partial - -import numpy as np - -from estimagic.optimization.tranquilo.models import ScalarModel - - -def get_aggregator(aggregator): - """Get a function that aggregates a VectorModel into a ScalarModel. - - Args: - aggregator (str): Name of an aggregator. - - Returns: - callable: The partialled aggregator that only depends on vector_model. - - """ - built_in_aggregators = { - "identity": aggregator_identity, - "sum": aggregator_sum, - "information_equality_linear": aggregator_information_equality_linear, - "least_squares_linear": aggregator_least_squares_linear, - } - - if aggregator in built_in_aggregators: - _aggregator = built_in_aggregators[aggregator] - else: - raise ValueError( - f"Invalid aggregator: {aggregator}. Must be one of " - f"{list(built_in_aggregators)} or a callable." - ) - - out = partial(_aggregate_models_template, aggregator=_aggregator) - return out - - -def _aggregate_models_template(vector_model, aggregator): - """Aggregate a VectorModel into a ScalarModel. - - Args: - vector_model (VectorModel): The VectorModel to aggregate. - aggregator (callable): The function that does the actual aggregation. - - Returns: - ScalarModel: The aggregated model - - """ - intercept, linear_terms, square_terms = aggregator(vector_model) - scalar_model = ScalarModel( - intercept=intercept, - linear_terms=linear_terms, - square_terms=square_terms, - shift=vector_model.shift, - scale=vector_model.scale, - ) - return scalar_model - - -def aggregator_identity(vector_model): - """Aggregate quadratic VectorModel using identity function. - - This aggregation is useful if the underlying maximization problem is a scalar - problem. To get a second-order main model vector_model must be second-order model. - - Assumptions - ----------- - 1. functype: scalar - 2. model_type: quadratic - - """ - n_params = vector_model.linear_terms.size - intercept = float(vector_model.intercepts) - linear_terms = vector_model.linear_terms.flatten() - if vector_model.square_terms is None: - square_terms = np.zeros((n_params, n_params)) - else: - square_terms = vector_model.square_terms.reshape(n_params, n_params) - return intercept, linear_terms, square_terms - - -def aggregator_sum(vector_model): - """Aggregate quadratic VectorModel using sum function. - - This aggregation is useful if the underlying maximization problem is a likelihood - problem. That is, the criterion is the sum of residuals, which allows us to sum - up the coefficients of the residual model to get the main model. The main model will - only be a second-order model if the residual model is a second-order model. - - Assumptions - ----------- - 1. functype: likelihood - 2. model_type: quadratic - - """ - vm_intercepts = vector_model.intercepts - intercept = vm_intercepts.sum(axis=0) - linear_terms = vector_model.linear_terms.sum(axis=0) - square_terms = vector_model.square_terms.sum(axis=0) - return intercept, linear_terms, square_terms - - -def aggregator_least_squares_linear(vector_model): - """Aggregate linear VectorModel assuming a least_squares functype. - - This aggregation is useful if the underlying maximization problem is a least-squares - problem. We can then simply plug-in a linear model for the residuals into the - least-squares formulae to get a second-order main model. - - Assumptions - ----------- - 1. functype: least_squares - 2. model_type: linear - - References - ---------- - See section 2.1 of :cite:`Cartis2018` for further information. - - """ - vm_linear_terms = vector_model.linear_terms - vm_intercepts = vector_model.intercepts - - intercept = vm_intercepts @ vm_intercepts - linear_terms = 2 * np.sum(vm_linear_terms * vm_intercepts.reshape(-1, 1), axis=0) - square_terms = 2 * vm_linear_terms.T @ vm_linear_terms - - return intercept, linear_terms, square_terms - - -def aggregator_information_equality_linear(vector_model): - """Aggregate linear VectorModel using the Fisher information equality. - - This aggregation is useful if the underlying maximization problem is a likelihood - problem. Given a linear model for the likelihood contributions we get an estimate of - the scores. Using the Fisher-Information-Equality we estimate the average Hessian - using the scores. - - Assumptions - ----------- - 1. functype: likelihood - 2. model_type: linear - - """ - vm_linear_terms = vector_model.linear_terms - vm_intercepts = vector_model.intercepts - - fisher_information = vm_linear_terms.T @ vm_linear_terms - - intercept = vm_intercepts.sum(axis=0) - linear_terms = vm_linear_terms.sum(axis=0) - square_terms = -fisher_information / 2 - - return intercept, linear_terms, square_terms diff --git a/src/estimagic/optimization/tranquilo/bounds.py b/src/estimagic/optimization/tranquilo/bounds.py deleted file mode 100644 index f73275421..000000000 --- a/src/estimagic/optimization/tranquilo/bounds.py +++ /dev/null @@ -1,28 +0,0 @@ -from dataclasses import dataclass, replace - -import numpy as np - - -@dataclass(frozen=True) -class Bounds: - """Parameter bounds.""" - - lower: np.ndarray - upper: np.ndarray - - def __post_init__(self): - # cannot use standard __setattr__ because it is frozen - super().__setattr__("has_any", _any_finite(self.lower, self.upper)) - - # make it behave like a NamedTuple - def _replace(self, **kwargs): - return replace(self, **kwargs) - - -def _any_finite(lb, ub): - out = False - if lb is not None and np.isfinite(lb).any(): - out = True - if ub is not None and np.isfinite(ub).any(): - out = True - return out diff --git a/src/estimagic/optimization/tranquilo/clustering.py b/src/estimagic/optimization/tranquilo/clustering.py deleted file mode 100644 index a697ac5c4..000000000 --- a/src/estimagic/optimization/tranquilo/clustering.py +++ /dev/null @@ -1,75 +0,0 @@ -import numpy as np -from numba import njit -from scipy.spatial.distance import pdist, squareform - - -def cluster(x, epsilon, shape="sphere"): - """Find clusters in x. - - A cluster is a set of points that are all within a radius - of eps around the central point of the cluster. - - Args: - x (np.ndarray): 2d numpy array of shape (n, d) with n points in - d-dimensional space. - eps (float): Proximity radius that determines the size of clusters. - shape (str): One of "sphere" or "cube". This is the shape of the clusters. - If "sphere", the distances between the points is calculated with an l2 norm. - If "cube", they are calculated with an infinity norm. - - Returns: - np.ndarray: 1d integer numpy array containing the cluster of each point. - np.ndarray: 1d integer numpy array containing the centers of each cluster. - - """ - if shape == "sphere": - dists = squareform(pdist(x)) - else: - raise NotImplementedError() - - labels, centers = _cluster(dists, epsilon) - return labels, centers - - -@njit -def _cluster(dists, epsilon): - n_points = len(dists) - labels = np.full(n_points, -1) - centers = np.full(n_points, -1) - n_labeled = 0 - cluster_counter = 0 - - while n_labeled < n_points: - # find best centerpoint among remaining points - - # provoke an index error if forget to set this later - candidate_center = 2 * n_points - max_n_neighbors = 0 - for i in range(n_points): - if labels[i] < 0: - n_neighbors = 0 - for j in range(n_points): - if labels[j] < 0 and j != i and dists[i, j] <= epsilon: - n_neighbors += 1 - if n_neighbors == 0: - labels[i] = cluster_counter - centers[cluster_counter] = i - cluster_counter += 1 - n_labeled += 1 - elif n_neighbors > max_n_neighbors: - max_n_neighbors = n_neighbors - candidate_center = i - - # if not all points are labeled, we can be sure a cluster center - # was found - if n_labeled < n_points: - i = candidate_center - for j in range(n_points): - if labels[j] < 0 and dists[i, j] <= epsilon: - labels[j] = cluster_counter - n_labeled += 1 - - centers[cluster_counter] = i - cluster_counter += 1 - - return labels, centers[:cluster_counter] diff --git a/src/estimagic/optimization/tranquilo/estimate_variance.py b/src/estimagic/optimization/tranquilo/estimate_variance.py deleted file mode 100644 index c4503323d..000000000 --- a/src/estimagic/optimization/tranquilo/estimate_variance.py +++ /dev/null @@ -1,61 +0,0 @@ -"""Estimate the variance or covariance matrix of the noise in the objective function.""" - - -import numpy as np - -from estimagic.optimization.tranquilo.get_component import get_component -from estimagic.optimization.tranquilo.history import History -from estimagic.optimization.tranquilo.region import Region -from estimagic.optimization.tranquilo.options import VarianceEstimatorOptions - - -def get_variance_estimator(fitter, user_options): - func_dict = { - "classic": _estimate_variance_classic, - } - - out = get_component( - name_or_func=fitter, - func_dict=func_dict, - component_name="variance estimator", - user_options=user_options, - default_options=VarianceEstimatorOptions(), - ) - - return out - - -def _estimate_variance_classic( - trustregion: Region, - history: History, - model_type: str, - max_distance_factor: float, - min_n_evals: int, -): - all_indices = history.get_x_indices_in_region( - trustregion._replace(radius=trustregion.radius * max_distance_factor) - ) - - n_evals = {idx: len(history.get_fvals(idx)) for idx in all_indices} - - # make sure we keep at least one sample from which we can estimate a variance - cutoff = min(max(n_evals.values()), min_n_evals) - - valid_indices = [idx for idx in all_indices if n_evals[idx] >= cutoff] - weights = np.array([n_ for idx, n_ in n_evals.items() if idx in valid_indices]) - weights = weights / weights.sum() - - if model_type == "scalar": - samples = list(history.get_fvals(valid_indices).values()) - out = 0.0 - for weight, sample in zip(weights, samples): - out += weight * np.var(sample, ddof=1) - else: - samples = list(history.get_fvecs(valid_indices).values()) - - dim = samples[0].shape[1] - out = np.zeros((dim, dim)) - for weight, sample in zip(weights, samples): - out += weight * np.cov(sample, rowvar=False, ddof=1) - - return out diff --git a/src/estimagic/optimization/tranquilo/filter_points.py b/src/estimagic/optimization/tranquilo/filter_points.py deleted file mode 100644 index 39658b45a..000000000 --- a/src/estimagic/optimization/tranquilo/filter_points.py +++ /dev/null @@ -1,129 +0,0 @@ -import numpy as np -import scipy - -from estimagic.optimization.tranquilo.clustering import cluster -from estimagic.optimization.tranquilo.get_component import get_component -from estimagic.optimization.tranquilo.volume import get_radius_after_volume_scaling -from estimagic.optimization.tranquilo.options import FilterOptions - - -def get_sample_filter(sample_filter="keep_all", user_options=None): - """Get filter function with partialled options. - - The filter function is applied to points inside the current trustregion before - additional points are sampled. - - The resulting function only takes an array of shape n_points, n_params as argument. - - Args: - sample_filter (str or callable): The name of a built in filter or a function - with the filter interface. - user_options (dict or namedtuple): Additional options for the filter. - - Returns: - callable: The filter - - """ - built_in_filters = { - "discard_all": discard_all, - "keep_all": keep_all, - "clustering": keep_cluster_centers, - "drop_excess": drop_excess, - } - - out = get_component( - name_or_func=sample_filter, - component_name="sample_filter", - func_dict=built_in_filters, - user_options=user_options, - default_options=FilterOptions(), - ) - - return out - - -def discard_all(state): - return state.x.reshape(1, -1), np.array([state.index]) - - -def keep_all(xs, indices): - return xs, indices - - -def drop_excess(xs, indices, state, target_size): - n_to_drop = max(0, len(xs) - target_size) - - if n_to_drop: - xs, indices = drop_worst_points(xs, indices, state, n_to_drop) - - return xs, indices - - -def drop_worst_points(xs, indices, state, n_to_drop): - """Drop the worst points from xs and indices. - - As long as there are points outside the trustregion, drop the point that is furthest - away from the trustregion center. - - If all points are inside the trustregion, find the two points that are closest to - each other. If one of them is the center, drop the other one. If none is the center, - drop the one that is closer to the center. - - This reflects that we want to have points as far out as possible as long as they are - inside the trustregion. - - The control flow is a bit complicated to avoid unnecessary or repeated computations - of distances and pairwise distances. - - """ - n_dropped = 0 - - if n_dropped < n_to_drop: - dists = np.linalg.norm(xs - state.x, axis=1) - - while n_dropped < n_to_drop and (dists > state.trustregion.radius).any(): - drop_index = np.argmax(dists) - xs = np.delete(xs, drop_index, axis=0) - indices = np.delete(indices, drop_index) - dists = np.delete(dists, drop_index, axis=0) - n_dropped += 1 - - if n_dropped < n_to_drop: - pdists = scipy.spatial.distance.squareform(scipy.spatial.distance.pdist(xs)) - pdists[np.diag_indices_from(pdists)] = np.inf - - while n_dropped < n_to_drop: - i, j = np.unravel_index(np.argmin(pdists), pdists.shape) - - if indices[i] == state.index: - drop_index = j - elif indices[j] == state.index: - drop_index = i - else: - drop_index = i if dists[i] < dists[j] else j - - xs = np.delete(xs, drop_index, axis=0) - indices = np.delete(indices, drop_index) - dists = np.delete(dists, drop_index, axis=0) - pdists = np.delete(pdists, drop_index, axis=0) - pdists = np.delete(pdists, drop_index, axis=1) - n_dropped += 1 - - return xs, indices - - -def keep_cluster_centers( - xs, indices, state, target_size, strictness=1e-10, shape="sphere" -): - dim = xs.shape[1] - scaling_factor = strictness / target_size - cluster_radius = get_radius_after_volume_scaling( - radius=state.trustregion.radius, - dim=dim, - scaling_factor=scaling_factor, - ) - _, centers = cluster(x=xs, epsilon=cluster_radius, shape=shape) - - # do I need to make sure trustregion center is in there? - out = xs[centers], indices[centers] - return out diff --git a/src/estimagic/optimization/tranquilo/fit_models.py b/src/estimagic/optimization/tranquilo/fit_models.py deleted file mode 100644 index 32f0b74ce..000000000 --- a/src/estimagic/optimization/tranquilo/fit_models.py +++ /dev/null @@ -1,507 +0,0 @@ -from functools import partial - -import numpy as np -from numba import njit -from scipy.linalg import qr_multiply - -from estimagic.optimization.tranquilo.get_component import get_component -from estimagic.optimization.tranquilo.handle_infinity import get_infinity_handler -from estimagic.optimization.tranquilo.options import FitterOptions -from estimagic.optimization.tranquilo.models import ( - VectorModel, - add_models, - move_model, - n_second_order_terms, -) - - -def get_fitter( - fitter, - fitter_options=None, - model_type=None, - residualize=None, - infinity_handling=None, -): - """Get a fit-function with partialled options. - - Args: - fitter (str or callable): Name of a fit method or a fit method. Arguments need - to be, in order, - - x (np.ndarray): Data points. - - y (np.ndarray): Corresponding function evaluations at data points. - - weighs (np.ndarray): Weights for the data points. - - model_type (str): Type of model to be fitted. - - fitter_options (dict): Options for the fit method. The following are supported: - - l2_penalty_linear (float): Penalty that is applied to all linear terms. - - l2_penalty_square (float): Penalty that is applied to all square terms, - that is the quadratic and interaction terms. - - model_type (str): Type of the model that is fitted. The following are supported: - - "linear": Only linear effects and intercept. - - "quadratic": Fully quadratic model. - - residualize (bool): If True, the model is fitted to the residuals of the old - model. This introduces momentum when the coefficients are penalized. - - infinity_handling (str): How to handle infinite values in the data. Currently - supported: {"relative"}. See `handle_infinty.py`. - - Returns: - callable: The partialled fit method that only depends on x and y. - - """ - built_in_fitters = { - "ols": fit_ols, - "ridge": fit_ridge, - "powell": fit_powell, - "tranquilo": fit_tranquilo, - } - - mandatory_arguments = ["x", "y", "model_type"] - - _raw_fitter = get_component( - name_or_func=fitter, - component_name="fitter", - func_dict=built_in_fitters, - default_options=FitterOptions(), - user_options=fitter_options, - mandatory_signature=mandatory_arguments, - ) - - clip_infinite_values = get_infinity_handler(infinity_handling) - - fitter = partial( - _fitter_template, - fitter=_raw_fitter, - model_type=model_type, - clip_infinite_values=clip_infinite_values, - residualize=residualize, - ) - - return fitter - - -def _fitter_template( - x, - y, - region, - old_model, - weights=None, - fitter=None, - model_type=None, - clip_infinite_values=None, - residualize=False, -): - """Fit a model to data. - - Args: - x (np.ndarray): Array of shape (n_samples, n_params) of x-values, - rescaled such that the trust region becomes a hypercube from -1 to 1. - y (np.ndarray): Array of shape (n_samples, n_residuals) with function - evaluations that have been centered around the function value at the - trust region center. - fitter (callable): Fit method. The first argument of any fit method needs to be - ``x``, second ``y`` and third ``model_type``. - model_type (str): Type of the model that is fitted. The following are supported: - - "linear": Only linear effects and intercept. - - "quadratic": Fully quadratic model. - - Returns: - VectorModel or ScalarModel: Results container. - - """ - _, n_params = x.shape - n_residuals = y.shape[1] - - y_clipped = clip_infinite_values(y) - x_unit = region.map_to_unit(x) - - if residualize: - old_model_moved = move_model(old_model, region) - y_clipped = y_clipped - old_model_moved.predict(x_unit).reshape(y_clipped.shape) - - coef = fitter(x=x_unit, y=y_clipped, weights=weights, model_type=model_type) - - # results processing - intercepts, linear_terms, square_terms = np.split(coef, (1, n_params + 1), axis=1) - intercepts = intercepts.flatten() - - # construct final square terms - if model_type == "quadratic": - square_terms = _reshape_square_terms_to_hess( - square_terms, n_params, n_residuals - ) - else: - square_terms = None - - results = VectorModel( - intercepts, - linear_terms, - square_terms, - shift=region.effective_center, - scale=region.effective_radius, - ) - - if residualize: - results = add_models(results, old_model_moved) - - return results - - -def fit_ols(x, y, weights, model_type): - """Fit a linear model using ordinary least squares. - - Args: - x (np.ndarray): Array of shape (n_samples, n_params) of x-values, - rescaled such that the trust region becomes a hypercube from -1 to 1. - y (np.ndarray): Array of shape (n_samples, n_residuals) with function - evaluations that have been centered around the function value at the - trust region center. - model_type (str): Type of the model that is fitted. The following are supported: - - "linear": Only linear effects and intercept. - - "quadratic": Fully quadratic model. - - Returns: - np.ndarray: The model coefficients. - - """ - features = _build_feature_matrix(x, model_type) - features_w, y_w = _add_weighting(features, y, weights) - coef = _fit_ols(features_w, y_w) - - return coef - - -def _fit_ols(x, y): - """Fit a linear model using least-squares. - - Args: - x (np.ndarray): Array of shape (n, p) of x-values. - y (np.ndarray): Array of shape (n, k) of y-values. - - Returns: - coef (np.ndarray): Array of shape (p, k) of coefficients. - - """ - coef, *_ = np.linalg.lstsq(x, y, rcond=None) - coef = coef.T - - return coef - - -def fit_tranquilo(x, y, weights, model_type, p_intercept, p_linear, p_square): - """Fit a linear model using ordinary least squares. - - The difference to fit_ols is that the linear terms are penalized less strongly - when the system is underdetermined. - - Args: - x (np.ndarray): Array of shape (n_samples, n_params) of x-values, - rescaled such that the trust region becomes a hypercube from -1 to 1. - y (np.ndarray): Array of shape (n_samples, n_residuals) with function - evaluations that have been centered around the function value at the - trust region center. - model_type (str): Type of the model that is fitted. The following are supported: - - "linear": Only linear effects and intercept. - - "quadratic": Fully quadratic model. - - Returns: - np.ndarray: The model coefficients. - - """ - features = _build_feature_matrix(x, model_type) - features_w, y_w = _add_weighting(features, y, weights) - - n_params = x.shape[1] - n_features = features.shape[1] - - factor = np.array( - [1 / p_intercept] - + [1 / p_linear] * n_params - + [1 / p_square] * (n_features - 1 - n_params) - ) - - coef_raw = _fit_ols(features_w * factor, y_w) - coef = coef_raw * factor - - return coef - - -def fit_ridge( - x, - y, - weights, - model_type, - l2_penalty_linear, - l2_penalty_square, -): - """Fit a linear model using Ridge regression. - - Args: - x (np.ndarray): Array of shape (n_samples, n_params) of x-values, rescaled such - that the trust region becomes a hypercube from -1 to 1. - y (np.ndarray): Array of shape (n_samples, n_residuals) with function - evaluations that have been centered around the function value at the trust - region center. - model_type (str): Type of the model that is fitted. The following are supported: - - "linear": Only linear effects and intercept. - - "quadratic": Fully quadratic model. - l2_penalty_linear (float): Penalty that is applied to all linear terms. - l2_penalty_square (float): Penalty that is applied to all square terms, that is - the quadratic and interaction terms. - - Returns: - np.ndarray: The model coefficients. - - """ - features = _build_feature_matrix(x, model_type) - - features_w, y_w = _add_weighting(features, y, weights) - - # create penalty array - n_params = x.shape[1] - cutoffs = (1, n_params + 1) - - penalty = np.zeros(features.shape[1]) - penalty[: cutoffs[0]] = 0 - penalty[cutoffs[0] : cutoffs[1]] = l2_penalty_linear - penalty[cutoffs[1] :] = l2_penalty_square - - coef = _fit_ridge(features_w, y_w, penalty) - - return coef - - -def _fit_ridge(x, y, penalty): - """Fit a linear model using ridge regression. - - Args: - x (np.ndarray): Array of shape (n, p) of x-values. - y (np.ndarray): Array of shape (n, k) of y-values. - penalty (np.ndarray): Array of shape (p, ) of penalty values. - - Returns: - np.ndarray: Array of shape (p, k) of coefficients. - - """ - a = x.T @ x - b = x.T @ y - - coef, *_ = np.linalg.lstsq(a + np.diag(penalty), b, rcond=None) - coef = coef.T - - return coef - - -def fit_powell(x, y, model_type): - """Fit a model, switching between penalized and unpenalized fitting. - - For: - - n + 1 points: Fit ols with linear feature matrix. - - n + 2 <= n + 0.5 * n * (n + 1) points, i.e. until one less than a - just identified quadratic model: Fit pounders. - - else: Fit ols with quadratic feature matrix. - - - Args: - x (np.ndarray): Array of shape (n_samples, n_params) of x-values, - rescaled such that the trust region becomes a hypercube from -1 to 1. - y (np.ndarray): Array of shape (n_samples, n_residuals) with function - evaluations that have been centered around the function value at the - trust region center. - model_type (str): Type of the model that is fitted. The following are supported: - - "linear": Only linear effects and intercept. - - "quadratic": Fully quadratic model. - - Returns: - np.ndarray: The model coefficients. - - """ - n_samples, n_params = x.shape - - _switch_to_linear = n_samples <= n_params + 1 - - _n_just_identified = n_params + 1 - if model_type == "quadratic": - _n_just_identified += n_second_order_terms(n_params) - - if _switch_to_linear: - coef = fit_ols(x, y, weights=None, model_type="linear") - n_resid, n_present = coef.shape - padding = np.zeros((n_resid, _n_just_identified - n_present)) - coef = np.hstack([coef, padding]) - elif n_samples >= _n_just_identified: - coef = fit_ols(x, y, weights=None, model_type=model_type) - else: - coef = _fit_minimal_frobenius_norm_of_hessian(x, y) - - return coef - - -def _fit_minimal_frobenius_norm_of_hessian(x, y): - """Fit a quadraitc model using the powell fitting method. - - The solution represents the quadratic whose Hessian matrix is of - minimum Frobenius norm. This has been popularized by Powell and is used in - many optimizers, e.g. bobyqa and pounders. - - For a mathematical exposition, see :cite:`Wild2008`, p. 3-5. - - This method should only be called if the number of samples is larger than what - is needed to identify the parameters of a linear model but smaller than what - is needed to identify the parameters of a quadratic model. Most of the time, - the sample size is 2n + 1. - - Args: - x (np.ndarray): Array of shape (n_samples, n_params) of x-values, - rescaled such that the trust region becomes a hypercube from -1 to 1. - y (np.ndarray): Array of shape (n_samples, n_residuals) with function - evaluations that have been centered around the function value at the - trust region center. - - Returns: - np.ndarray: The model coefficients. - - """ - n_samples, n_params = x.shape - - _n_too_few = n_params + 1 - _n_too_many = n_params + n_params * (n_params + 1) // 2 + 1 - - if n_samples <= _n_too_few: - raise ValueError("Too few points for minimum frobenius fitting.") - if n_samples >= _n_too_many: - raise ValueError("Too may points for minimum frobenius fitting") - - n_poly_features = n_second_order_terms(n_params) - - ( - m_mat, - n_mat, - z_mat, - n_z_mat, - ) = _get_feature_matrices_minimal_frobenius_norm_of_hessian(x) - - coef = _get_current_fit_minimal_frobenius_norm_of_hessian( - y=y, - m_mat=m_mat, - n_mat=n_mat, - z_mat=z_mat, - n_z_mat=n_z_mat, - n_params=n_params, - n_poly_features=n_poly_features, - ) - - return coef - - -def _get_current_fit_minimal_frobenius_norm_of_hessian( - y, - m_mat, - n_mat, - z_mat, - n_z_mat, - n_params, - n_poly_features, -): - n_residuals = y.shape[1] - offset = 0 - - coeffs_linear = np.empty((n_residuals, 1 + n_params)) - coeffs_square = np.empty((n_residuals, n_poly_features)) - - n_z_mat_square = n_z_mat.T @ n_z_mat - - for k in range(n_residuals): - z_y_vec = np.dot(z_mat.T, y[:, k]) - coeffs_first_stage, *_ = np.linalg.lstsq( - np.atleast_2d(n_z_mat_square), np.atleast_1d(z_y_vec), rcond=None - ) - - coeffs_second_stage = np.atleast_2d(n_z_mat) @ coeffs_first_stage - - rhs = y[:, k] - n_mat @ coeffs_second_stage - - alpha, *_ = np.linalg.lstsq(m_mat, rhs[: n_params + 1], rcond=None) - coeffs_linear[k, :] = alpha[offset : (n_params + 1)] - - coeffs_square[k] = coeffs_second_stage - - coef = np.concatenate((coeffs_linear, coeffs_square), axis=1) - - return np.atleast_2d(coef) - - -def _get_feature_matrices_minimal_frobenius_norm_of_hessian(x): - n_samples, n_params = x.shape - - intercept = np.ones((n_samples, 1)) - features = np.concatenate((intercept, _quadratic_features(x)), axis=1) - m_mat, n_mat = np.split(features, (n_params + 1,), axis=1) - - m_mat_pad = np.zeros((n_samples, n_samples)) - m_mat_pad[:, : n_params + 1] = m_mat - - n_z_mat, _ = qr_multiply( - m_mat_pad, - n_mat.T, - ) - - z_mat, _ = qr_multiply( - m_mat_pad, - np.eye(n_samples), - ) - - return ( - m_mat[: n_params + 1, : n_params + 1], - n_mat, - z_mat[:, n_params + 1 : n_samples], - n_z_mat[:, n_params + 1 : n_samples], - ) - - -def _build_feature_matrix(x, model_type): - raw = x if model_type == "linear" else _quadratic_features(x) - intercept = np.ones((len(x), 1)) - features = np.concatenate((intercept, raw), axis=1) - return features - - -def _reshape_square_terms_to_hess(square_terms, n_params, n_residuals): - idx1, idx2 = np.triu_indices(n_params) - hess = np.zeros((n_residuals, n_params, n_params), dtype=np.float64) - hess[:, idx1, idx2] = square_terms - hess = hess + np.triu(hess).transpose(0, 2, 1) - - return hess - - -@njit -def _quadratic_features(x): - # Create fully quadratic features without intercept - n_samples, n_params = x.shape - n_poly_terms = n_second_order_terms(n_params) - - poly_terms = np.empty((n_poly_terms, n_samples), np.float64) - xt = x.T - - idx = 0 - for i in range(n_params): - j_start = i - for j in range(j_start, n_params): - poly_terms[idx] = xt[i] * xt[j] - idx += 1 - out = np.concatenate((xt, poly_terms), axis=0) - return out.T - - -def _add_weighting(x, y, weights=None): - # weight the data in order to get weighted fitting from fitters that do not support - # weights. Inspired by: https://stackoverflow.com/a/52452833 - n_samples = len(x) - if weights is not None: - _root_weights = np.sqrt(weights).reshape(n_samples, 1) - y = y * _root_weights - x = x * _root_weights - return x, y diff --git a/src/estimagic/optimization/tranquilo/geometry.py b/src/estimagic/optimization/tranquilo/geometry.py deleted file mode 100644 index de1d6aa94..000000000 --- a/src/estimagic/optimization/tranquilo/geometry.py +++ /dev/null @@ -1,24 +0,0 @@ -import numpy as np - - -def log_d_quality_calculator(sample, trustregion): - """Logarithm of the d-optimality criterion. - - For a data sample x the log_d_criterion is defined as log(det(x.T @ x)). If the - determinant is zero the function returns -np.inf. Before computation the sample is - mapped into unit space. - - Args: - sample (np.ndarray): The data sample, shape = (n, p). - trustregion (Region): Trustregion. See module region.py. - - Returns: - np.ndarray: The criterion values, shape = (n, ). - - """ - points = trustregion.map_to_unit(sample) - n_samples, n_params = points.shape - xtx = points.T @ points - det = np.linalg.det(xtx / n_samples) - out = n_params * np.log(n_samples) + np.log(det) - return out diff --git a/src/estimagic/optimization/tranquilo/get_component.py b/src/estimagic/optimization/tranquilo/get_component.py deleted file mode 100644 index 54bdbe850..000000000 --- a/src/estimagic/optimization/tranquilo/get_component.py +++ /dev/null @@ -1,231 +0,0 @@ -import functools -import inspect -import warnings -from functools import partial - -from estimagic.utilities import propose_alternatives -from estimagic.optimization.tranquilo.options import update_option_bundle - - -def get_component( - name_or_func, - component_name, - func_dict=None, - default_options=None, - user_options=None, - redundant_option_handling="ignore", - redundant_argument_handling="ignore", - mandatory_signature=None, -): - """Process a function that represents an interchangeable component of tranquilo. - - The function is either a built in function or a user provided function. In all - cases we run some checks that the signature of the function is correct and then - partial all static options into the function. - - Args: - name_or_func (str or callable): Name of a function or function. - component_name (str): Name of the component. Used in error messages. Examples - would be "subsolver" or "model". - func_dict (dict): Dict with function names as keys and functions as values. - default_options (NamedTuple): Default options as a dict or NamedTuple. The - default options will be updated by the user options. - user_options (NamedTuple, Dict or None): User options as a dict or NamedTuple. - The default options will be updated by the user options. - redundant_option_handling (str): How to handle redundant options. Can be - "warn", "raise" or "ignore". Default "ignore". - redundant_argument_handling (str): How to handle redundant arguments passed - to the processed function at runtime. Can be "warn", "raise" or "ignore". - Default "ignore". - mandatory_signature (list): List or tuple of arguments that must be in the - signature of all functions in `func_dict`. These can be options or - arguments. Otherwise, a ValueError is raised. - - Returns: - callable: The processed function. - - """ - - _func, _name = _get_function_and_name( - name_or_func=name_or_func, - component_name=component_name, - func_dict=func_dict, - ) - - _all_arguments = list(inspect.signature(_func).parameters) - - _valid_options = _get_valid_options( - default_options=default_options, - user_options=user_options, - signature=_all_arguments, - name=_name, - component_name=component_name, - redundant_option_handling=redundant_option_handling, - ) - - _fail_if_mandatory_argument_is_missing( - mandatory_arguments=mandatory_signature, - signature=_all_arguments, - name=_name, - component_name=component_name, - ) - - _partialled = partial(_func, **_valid_options) - - if redundant_argument_handling == "raise": - out = _partialled - else: - out = _add_redundant_argument_handling( - func=_partialled, - signature=_all_arguments, - warn=redundant_argument_handling == "warn", - ) - - return out - - -def _get_function_and_name(name_or_func, component_name, func_dict): - """Get the function and its name. - - Args: - name_or_func (str or callable): Name of a function or function. - component_name (str): Name of the component. Used in error messages. Examples - would be "subsolver" or "model". - func_dict (dict): Dict with function names as keys and functions as values. - - Returns: - tuple: The function and its name. - - """ - func_dict = {} if func_dict is None else func_dict - if isinstance(name_or_func, str): - if name_or_func in func_dict: - _func = func_dict[name_or_func] - _name = name_or_func - else: - _proposal = propose_alternatives(name_or_func, list(func_dict)) - msg = ( - f"If {component_name} is a string, it must be one of the built in " - f"{component_name}s. Did you mean: {_proposal}?" - ) - raise ValueError(msg) - elif callable(name_or_func): - _func = name_or_func - _name = _func.__name__ - else: - raise TypeError("name_or_func must be a string or a callable.") - - return _func, _name - - -def _get_valid_options( - default_options, - user_options, - signature, - name, - component_name, - redundant_option_handling, -): - """Get the options that are valid for the function. - - Args: - default_options (NamedTuple): Default options as a dict or NamedTuple. The - default options will be updated by the user options. - user_options (NamedTuple, Dict or None): User options as a dict or NamedTuple. - The default options will be updated by the user options. - signature (list): List of arguments that are present in the signature. - name (str): Name of the function. - component_name (str): Name of the component. Used in error messages. Examples - would be "subsolver" or "model". - redundant_option_handling (str): How to handle redundant options. Can be - - Returns: - dict: Valid options. - - """ - _options = update_option_bundle(default_options, user_options=user_options) - _options = _options._asdict() - - _valid_options = {k: v for k, v in _options.items() if k in signature} - _redundant_options = {k: v for k, v in _options.items() if k not in signature} - - if redundant_option_handling == "warn" and _redundant_options: - msg = ( - f"The following options are not supported by the {component_name} {name} " - f"and will be ignored: {list(_redundant_options)}." - ) - warnings.warn(msg) - - elif redundant_option_handling == "raise" and _redundant_options: - msg = ( - f"The following options are not supported by the {component_name} {name}: " - f"{list(_redundant_options)}." - ) - raise ValueError(msg) - - return _valid_options - - -def _fail_if_mandatory_argument_is_missing( - mandatory_arguments, signature, name, component_name -): - """Check if any mandatory arguments are missing in the signature of the function. - - Args: - mandatory_arguments (list): List of mandatory arguments. - signature (list): List of arguments that are present in the signature. - name (str): Name of the function. - component_name (str): Name of the component. Used in error messages. Examples - would be "subsolver" or "model". - - Returns: - None - - Raises: - ValueError: If any mandatory arguments are missing in the signature of the - function. - - """ - mandatory_arguments = [] if mandatory_arguments is None else mandatory_arguments - - _missing = [arg for arg in mandatory_arguments if arg not in signature] - - if _missing: - msg = ( - f"The following mandatory arguments are missing in the signature of the " - f"{component_name} {name}: {_missing}." - ) - raise ValueError(msg) - - -def _add_redundant_argument_handling(func, signature, warn): - """Allow func to be called with arguments that are not in the signature. - - Args: - func (callable): The function to be wrapped. - signature (list): List of arguments that are supported by func. - warn (bool): Whether to warn about redundant arguments. - - Returns: - callable: The wrapped function. - - """ - - @functools.wraps(func) - def _wrapper_add_redundant_argument_handling(*args, **kwargs): - _kwargs = {**dict(zip(signature[: len(args)], args)), **kwargs} - - _redundant = {k: v for k, v in _kwargs.items() if k not in signature} - _valid = {k: v for k, v in _kwargs.items() if k in signature} - - if warn and _redundant: - msg = ( - f"The following arguments are not supported by the function " - f"{func.__name__} and will be ignored: {_redundant}." - ) - warnings.warn(msg) - - out = func(**_valid) - return out - - return _wrapper_add_redundant_argument_handling diff --git a/src/estimagic/optimization/tranquilo/handle_infinity.py b/src/estimagic/optimization/tranquilo/handle_infinity.py deleted file mode 100644 index c250b64f2..000000000 --- a/src/estimagic/optimization/tranquilo/handle_infinity.py +++ /dev/null @@ -1,49 +0,0 @@ -import numpy as np - - -def get_infinity_handler(infinity_handler): - if isinstance(infinity_handler, str): - built_in_handlers = {"relative": clip_relative} - infinity_handler = built_in_handlers[infinity_handler] - elif not callable(infinity_handler): - raise TypeError("infinity_handler must be a string or callable.") - - return infinity_handler - - -def clip_relative(fvecs): - """Clip infinities at a value that is relative to worst finite value. - - Args: - fvecs (np.ndarray): 2d numpy array of shape n_samples, n_residuals. - - - Returns: - np.ndarray: Array of same shape as fvecs with finite values. - - """ - _mask = np.isfinite(fvecs) - - _mins = np.min(fvecs, axis=0, where=_mask, initial=1e300) - _maxs = np.max(fvecs, axis=0, where=_mask, initial=-1e300) - - # abs is necessary because if all values are infinite, the diffs can switch sign - # due to the initial value in the masked min and max - _diff = _maxs - _mins - - # Due to the initial value of the masked min and max, the sign of the diff can - # be negative if all values are infinite. In that case we want to switch the - # signe of _diff, _mins and _maxs. - _signs = np.sign(_diff) - _diff *= _signs - _maxs *= _signs - _mins *= _signs - - _pos_penalty = _maxs + 2 * _diff + 1 - _neg_penalty = _mins - 2 * _diff - 1 - - out = np.nan_to_num( - fvecs, nan=_pos_penalty, posinf=_pos_penalty, neginf=_neg_penalty - ) - - return out diff --git a/src/estimagic/optimization/tranquilo/history.py b/src/estimagic/optimization/tranquilo/history.py deleted file mode 100644 index b2144df1f..000000000 --- a/src/estimagic/optimization/tranquilo/history.py +++ /dev/null @@ -1,261 +0,0 @@ -import numpy as np - - -class History: - """Container to save and retrieve history entries. - - These entries are: xs, fvecs and fvals. - - fvals don't need to be added explicitly, as they are computed internally whenever - new entries are added. - - """ - - def __init__(self, functype): - self.xs = None - self.fvecs = None - self.fvals = None - self.n_xs = 0 - self.n_fun = 0 - self.index_mapper = {} - - self.functype = functype - - if functype == "scalar": - self.aggregate = lambda x: x.flatten() - elif functype == "likelihood": - self.aggregate = lambda x: x.sum(axis=-1) - elif functype == "least_squares": - self.aggregate = lambda x: (x**2).sum(axis=-1) - else: - raise ValueError( - "funtype must be 'scalar', 'likelihood' or 'least_squares'." - ) - - def add_xs(self, xs): - """Add new parameter vectors to the history and return their indices. - - Args: - xs (np.ndarray or list): 1d or 2d array or list of 1d arrays with - parameter vectors. - - Returns: - np.ndarray: 1d array with indices of the added xs. - - """ - is_single = np.ndim(xs) == 1 - - xs = np.atleast_2d(xs) - - n_new_points = len(xs) if xs.size != 0 else 0 - - if n_new_points == 0: - return [] - - self.xs = _add_entries_to_array(self.xs, xs, self.n_xs) - - x_indices = np.arange(self.n_xs, self.n_xs + n_new_points) - - for x_index in x_indices: - self.index_mapper[x_index] = [] - - self.n_xs += n_new_points - - if is_single: - x_indices = x_indices[0] - - return x_indices - - def add_evals(self, x_indices, evals): - """Add new function evaluations to the history. - - Args: - x_indices (int, list or np.ndarray): Indices of the xs at which the function - was evaluated. - evals (np.ndarray or list): 1d or 2d array or list of 1d arrays with - least square fvecs. - - """ - x_indices = np.atleast_1d(x_indices) - - if not (x_indices < self.n_xs).all(): - raise ValueError( - "You requested to store a function evaluation for an x vector that is " - "not in the history." - ) - - n_new_points = len(x_indices) - - if n_new_points == 0: - return - - if self.functype == "scalar": - fvecs = np.reshape(evals, (-1, 1)) - else: - fvecs = np.atleast_2d(evals) - - fvals = np.atleast_1d(self.aggregate(fvecs)) - - if n_new_points != len(fvecs): - raise ValueError() - - self.fvecs = _add_entries_to_array(self.fvecs, fvecs, self.n_fun) - self.fvals = _add_entries_to_array(self.fvals, fvals, self.n_fun) - - f_indices = np.arange(self.n_fun, self.n_fun + n_new_points) - - for x_index, f_index in zip(x_indices, f_indices): - self.index_mapper[x_index].append(f_index) - - self.n_fun += n_new_points - - def get_xs(self, x_indices=None): - """Retrieve xs from history. - - Args: - x_indices (int, slice or sequence): Specifies the subset of rows that will - be returned. Anything that can be used to index into a 1d numpy array - is allowed. - - Returns: - np.ndarray: 1d or 2d array with parameter vectors - - """ - if isinstance(x_indices, np.ndarray): - x_indices = x_indices.astype(int) - - out = self.xs[: self.n_xs] - out = out[x_indices] if x_indices is not None else out - - return out - - def get_fvecs(self, x_indices): - """Retrieve fvecs from history. - - Args: - x_indices (int, slice or sequence): Specifies the subset of rows that will - be returned. Anything that can be used to index into a 1d numpy array - is allowed. - - Returns: - np.ndarray or dict: If x_indices is a scalar, a single array is returned. - Otherwise, a dictionary with x_indices as keys and arrays as values is - returned. - - """ - out = _extract_from_indices( - arr=self.fvecs[: self.n_fun], - mapper=self.index_mapper, - x_indices=x_indices, - n_xs=self.n_xs, - ) - return out - - def get_fvals(self, x_indices): - """Retrieve fvals from history. - - Args: - x_indices (int, slice or sequence): Specifies the subset of rows that will - be returned. Anything that can be used to index into a 1d numpy array - is allowed. - - Returns: - np.ndarray or dict: If x_indices is a scalar, a single array is returned. - Otherwise, a dictionary with x_indices as keys and arrays as values is - returned. - - """ - out = _extract_from_indices( - arr=self.fvals[: self.n_fun], - mapper=self.index_mapper, - x_indices=x_indices, - n_xs=self.n_xs, - ) - return out - - def get_model_data(self, x_indices, average=True): - if np.isscalar(x_indices): - x_indices = [x_indices] - - raw_xs = self.get_xs(x_indices) - raw_fvecs = self.get_fvecs(x_indices) - - if average: - fvecs = np.array([np.mean(fvec, axis=0) for fvec in raw_fvecs.values()]) - xs = raw_xs - else: - fvecs = np.vstack(list(raw_fvecs.values())) - n_obs = np.array([len(fvec) for fvec in raw_fvecs.values()]) - xs = np.repeat(raw_xs, n_obs, axis=0) - - return xs, fvecs - - def get_n_fun(self): - return self.n_fun - - def get_n_xs(self): - return self.n_xs - - def get_x_indices_in_region(self, region): - # early return if there are no entries - if self.get_n_fun() == 0: - return np.array([]) - xs = self.get_xs() - mask = np.linalg.norm(xs - region.center, axis=1) <= region.radius - out = np.arange(len(mask))[mask] - return out - - def __repr__(self): - return f"History for {self.functype} function with {self.n_fun} entries." - - -def _add_entries_to_array(arr, new, position): - if arr is None: - shape = 1_000 if new.ndim == 1 else (1_000, new.shape[1]) - arr = np.full(shape, np.nan) - - n_new_points = len(new) if new.size != 0 else 0 - - if len(arr) - position - n_new_points < 0: - n_extend = max(len(arr), n_new_points) - if arr.ndim == 2: - extension_shape = (n_extend, arr.shape[1]) - arr = np.vstack([arr, np.full(extension_shape, np.nan)]) - else: - arr = np.hstack([arr, np.full(n_extend, np.nan)]) - - arr[position : position + n_new_points] = new - - return arr - - -def _extract_from_indices(arr, mapper, x_indices, n_xs): - """Retrieve fvecs or fvals from history. - - Args: - arr (np.ndarray): 1d or 2d Array with function values. - mapper (dict): Maps x indices to f indices. - x_indices (None, int or np.ndarray): Specifies the subset of parameter - vectors for which the function values will be returned. - - Returns: - dict or np.ndarray: If x_indices is a scalar, a single array is returned. - Otherwise, a dictionary with x_indices as keys and arrays as values is - returned. - - """ - if isinstance(x_indices, np.ndarray): - x_indices = x_indices.astype(int) - - is_single = np.isscalar(x_indices) - if is_single: - x_indices = [x_indices] - - indices = np.arange(n_xs)[x_indices].tolist() - - out = {i: arr[mapper[i]] for i in indices} - - if is_single: - out = out[x_indices[0]] - - return out diff --git a/src/estimagic/optimization/tranquilo/models.py b/src/estimagic/optimization/tranquilo/models.py deleted file mode 100644 index 50b63da80..000000000 --- a/src/estimagic/optimization/tranquilo/models.py +++ /dev/null @@ -1,295 +0,0 @@ -from dataclasses import dataclass, replace -from typing import Union - -import numpy as np -from numba import njit - - -@dataclass(frozen=True) -class VectorModel: - intercepts: np.ndarray # shape (n_residuals,) - linear_terms: np.ndarray # shape (n_residuals, n_params) - square_terms: Union[ - np.ndarray, None - ] = None # shape (n_residuals, n_params, n_params) - - # scale and shift correspond to effective_radius and effective_center of the region - # on which the model was fitted - scale: Union[float, np.ndarray] = None - shift: np.ndarray = None - - def predict(self, x: np.ndarray) -> np.ndarray: - return _predict_vector(self, x) - - # make it behave like a NamedTuple - def _replace(self, **kwargs): - return replace(self, **kwargs) - - -@dataclass(frozen=True) -class ScalarModel: - intercept: float - linear_terms: np.ndarray # shape (n_params,) - square_terms: Union[np.ndarray, None] = None # shape (n_params, n_params) - - # scale and shift correspond to effective_radius and effective_center of the region - # on which the model was fitted - scale: Union[float, np.ndarray] = None - shift: np.ndarray = None - - def predict(self, x: np.ndarray) -> np.ndarray: - return _predict_scalar(self, x) - - # make it behave like a NamedTuple - def _replace(self, **kwargs): - return replace(self, **kwargs) - - -def _predict_vector(model: VectorModel, x_unit: np.ndarray) -> np.ndarray: - """Evaluate a VectorModel at x_unit. - - We utilize that a quadratic model can be written in the form: - - Equation 1: f(x) = a + x.T @ g + 0.5 * x.T @ H @ x, - - with symmetric H. Note that H = f''(x), while g = f'(x) - H @ x. If we consider a - polynomial expansion around x = 0, we therefore get g = f'(x). Hence, g, H can be - thought of as the gradient and Hessian. Note that here we consider the case of - f(x) being vector-valued. In this case the above equation holds for each entry of - f seperately. - - Args: - model (VectorModel): The aggregated model. Has entries: - - 'intercepts': corresponds to 'a' in the above equation - - 'linear_terms': corresponds to 'g' in the above equation - - 'square_terms': corresponds to 'H' in the above equation - x_unit (np.ndarray): New data. Has shape (n_params,) or (n_samples, n_params). - - Returns: - np.ndarray: Model evaluations, has shape (n_samples, n_residuals) if x is 2d - and (n_residuals,) if x is 1d. - - """ - is_flat_x = x_unit.ndim == 1 - - x = np.atleast_2d(x_unit) - - y = model.linear_terms @ x.T + model.intercepts.reshape(-1, 1) - - if model.square_terms is not None: - y += np.sum((x @ model.square_terms) * x, axis=2) / 2 - - if is_flat_x: - out = y.flatten() - else: - out = y.T.reshape(len(x_unit), -1) - - return out - - -def add_models(model1, model2): - """Add two models. - - Args: - model1 (Union[ScalarModel, VectorModel]): The first model. - model2 (Union[ScalarModel, VectorModel]): The second model. - - Returns: - Union[ScalarModel, VectorModel]: The sum of the two models. - - """ - if type(model1) != type(model2): - raise TypeError("Models must be of the same type.") - - if not np.allclose(model1.shift, model2.shift): - raise ValueError("Models must have the same shift.") - - if not np.allclose(model1.scale, model2.scale): - raise ValueError("Models must have the same scale.") - - new = {} - if isinstance(model1, ScalarModel): - new["intercept"] = model1.intercept + model2.intercept - else: - new["intercepts"] = model1.intercepts + model2.intercepts - - new["linear_terms"] = model1.linear_terms + model2.linear_terms - - if model1.square_terms is not None: - assert model2.square_terms is not None - new["square_terms"] = model1.square_terms + model2.square_terms - - out = replace(model1, **new) - return out - - -def move_model(model, new_region): - """Move a model to a new region. - - Args: - model (Union[ScalarModel, VectorModel]): The model to move. - new_region (Region): The new region. - - Returns: - Union[ScalarModel, VectorModel]: The moved model. - - """ - # undo old scaling - out = _scale_model(model, factor=1 / model.scale) - - # shift the center - shift = new_region.effective_center - model.shift - if isinstance(model, ScalarModel): - out = _shift_scalar_model(out, shift=shift) - else: - out = _shift_vector_model(out, shift=shift) - - # apply new scaling - new_scale = new_region.effective_radius - out = _scale_model(out, factor=new_scale) - return out - - -def _scale_model(model, factor): - """Scale a scalar or vector model to a new radius. - - Args: - model (Union[ScalarModel, VectorModel]): The model to scale. - factor (Union[float, np.ndarray]): The scaling factor. - - Returns: - Union[ScalarModel, VectorModel]: The scaled model. - - """ - new_g = model.linear_terms * factor - new_h = None if model.square_terms is None else model.square_terms * factor**2 - - out = model._replace( - linear_terms=new_g, - square_terms=new_h, - scale=model.scale * factor, - ) - return out - - -def _shift_scalar_model(model, shift): - """Shift a scalar model to a new center. - - Args: - model (ScalarModel): The model to shift. - shift (np.ndarray): The shift. - - Returns: - ScalarModel: The shifted model. - - """ - new_c = model.predict(shift) - new_g = model.linear_terms + model.square_terms @ shift - - out = model._replace( - intercept=new_c, - linear_terms=new_g, - shift=model.shift + shift, - ) - return out - - -def _shift_vector_model(model, shift): - """Shift a vector model to a new center. - - Args: - model (VectorModel): The model to shift. - shift (np.ndarray): The shift. - - Returns: - VectorModel: The shifted model. - - """ - new_c = model.predict(shift) - - new_g = model.linear_terms - - if model.square_terms is not None: - new_g += shift @ model.square_terms - - out = model._replace( - intercepts=new_c, - linear_terms=new_g, - shift=model.shift + shift, - ) - return out - - -def _predict_scalar(model: ScalarModel, x_unit: np.ndarray) -> np.ndarray: - """Evaluate a ScalarModel at x_unit. - - We utilize that a quadratic model can be written in the form: - - Equation 1: f(x) = a + x.T @ g + 0.5 * x.T @ H @ x, - - with symmetric H. Note that H = f''(x), while g = f'(x) - H @ x. If we consider a - polynomial expansion around x = 0, we therefore get g = f'(x). Hence, g, H can be - thought of as the gradient and Hessian. - - Args: - model (ScalarModel): The aggregated model. Has entries: - - 'intercept': corresponds to 'a' in the above equation - - 'linear_terms': corresponds to 'g' in the above equation - - 'square_terms': corresponds to 'H' in the above equation - x_unit (np.ndarray): New data. Has shape (n_params,) or (n_samples, - n_params). - - Returns: - np.ndarray or float: Model evaluations, an array with shape (n_samples,) if x - is 2d and a float otherwise. - - """ - is_flat_x = x_unit.ndim == 1 - - x = np.atleast_2d(x_unit) - - y = x @ model.linear_terms + model.intercept - - if model.square_terms is not None: - y += np.sum((x @ model.square_terms) * x, axis=1) / 2 - - if is_flat_x: - out = y.flatten()[0] - else: - out = y.flatten() - - return out - - -def n_free_params(dim, model_type): - """Number of free parameters in a model specified by name or model_info.""" - out = dim + 1 - if model_type in ("linear", "quadratic"): - if model_type == "quadratic": - out += n_second_order_terms(dim) - else: - raise ValueError() - return out - - -@njit -def n_second_order_terms(dim): - """Number of free second order terms in a quadratic model.""" - return dim * (dim + 1) // 2 - - -@njit -def n_interactions(dim): - """Number of free interaction terms in a quadratic model.""" - return dim * (dim - 1) // 2 - - -def is_second_order_model(model_or_info): - """Check if a model has any second order terms.""" - if isinstance(model_or_info, str): - out = model_or_info == "quadratic" - elif isinstance(model_or_info, (ScalarModel, VectorModel)): - out = model_or_info.square_terms is not None - else: - raise TypeError() - return out diff --git a/src/estimagic/optimization/tranquilo/options.py b/src/estimagic/optimization/tranquilo/options.py deleted file mode 100644 index 651f19100..000000000 --- a/src/estimagic/optimization/tranquilo/options.py +++ /dev/null @@ -1,210 +0,0 @@ -from typing import NamedTuple -from estimagic.optimization.tranquilo.models import n_free_params - -import numpy as np - - -def get_default_radius_options(x): - return RadiusOptions(initial_radius=0.1 * np.max(np.abs(x))) - - -def get_default_batch_size(n_cores): - return n_cores - - -def get_default_acceptance_decider(noisy): - return "noisy" if noisy else "classic" - - -def get_default_sample_size(model_type, x): - if model_type == "quadratic": - out = 2 * len(x) + 1 - else: - out = len(x) + 1 - - return out - - -def get_default_model_fitter(model_type, sample_size, x): - n_params = n_free_params(dim=len(x), model_type=model_type) - if model_type == "linear" or sample_size >= n_params: - fitter = "ols" - else: - fitter = "tranquilo" - return fitter - - -def get_default_residualize(model_fitter): - return model_fitter == "tranquilo" - - -def get_default_subsolver(bounds, cube_subsolver, sphere_subsolver): - return cube_subsolver if bounds.has_any else sphere_subsolver - - -def get_default_search_radius_factor(functype): - return 4.25 if functype == "scalar" else 5.0 - - -def get_default_model_type(functype): - return "quadratic" if functype == "scalar" else "linear" - - -def get_default_aggregator(functype, model_type): - if functype == "scalar" and model_type == "quadratic": - aggregator = "identity" - elif functype == "least_squares" and model_type == "linear": - aggregator = "least_squares_linear" - elif functype == "likelihood" and model_type == "linear": - aggregator = "information_equality_linear" - else: - allowed_combinations = { - "scalar": "quadratic", - "least_squares": "linear", - "likelihood": "linear", - } - raise NotImplementedError( - "The requested combination of functype and model_type is not supported. " - f"Allowed combinations are: {list(allowed_combinations.items())}." - ) - - return aggregator - - -def get_default_n_evals_at_start(noisy): - return 5 if noisy else 1 - - -class StopOptions(NamedTuple): - """Criteria for stopping without successful convergence.""" - - max_iter: int - max_eval: int - max_time: float - - -class ConvOptions(NamedTuple): - """Criteria for successful convergence.""" - - disable: bool - ftol_abs: float - gtol_abs: float - xtol_abs: float - ftol_rel: float - gtol_rel: float - xtol_rel: float - min_radius: float - - -class RadiusOptions(NamedTuple): - """Options for trust-region radius management.""" - - initial_radius: float - min_radius: float = 1e-6 - max_radius: float = 1e6 - rho_decrease: float = 0.1 - rho_increase: float = 0.1 - shrinking_factor: float = 0.5 - expansion_factor: float = 2.0 - large_step: float = 0.5 - max_radius_to_step_ratio: float = np.inf - - -class AcceptanceOptions(NamedTuple): - confidence_level: float = 0.8 - power_level: float = 0.8 - n_initial: int = 5 - n_min: int = 5 - n_max: int = 100 - min_improvement: float = 0.0 - - -class StagnationOptions(NamedTuple): - min_relative_step_keep: float = 0.125 - min_relative_step: float = 0.05 - sample_increment: int = 1 - max_trials: int = 1 - drop: bool = True - - -class SubsolverOptions(NamedTuple): - maxiter: int = 20 - maxiter_gradient_descent: int = 5 - conjugate_gradient_method: str = "cg" - gtol_abs: float = 1e-8 - gtol_rel: float = 1e-8 - gtol_scaled: float = 0.0 - gtol_abs_conjugate_gradient: float = 1e-8 - gtol_rel_conjugate_gradient: float = 1e-6 - k_easy: float = 0.1 - k_hard: float = 0.2 - - -class FitterOptions(NamedTuple): - l2_penalty_linear: float = 0.0 - l2_penalty_square: float = 0.1 - p_intercept: float = 0.05 - p_linear: float = 0.4 - p_square: float = 1.0 - - -class VarianceEstimatorOptions(NamedTuple): - max_distance_factor: float = 3.0 - min_n_evals: int = 3 - - -class FilterOptions(NamedTuple): - strictness: float = 1e-10 - shape: str = "sphere" - - -class SamplerOptions(NamedTuple): - distribution: str = None - hardness: float = 1 - algorithm: str = "scipy_lbfgsb" - algo_options: dict = None - criterion: str = None - n_points_randomsearch: int = 1 - return_info: bool = False - - -def update_option_bundle(default_options, user_options=None): - """Update default options with user options. - - The user option is converted to the type of the default option if possible. - - Args: - default_options (NamedTuple): Options that behave like a `typing.NamedTuple`, - i.e. have _fields as well as _asdict and _replace methods. - user_options (NamedTuple, Dict or None): User options as a dict or NamedTuple. - The default options will be updated by the user options. - - """ - if user_options is None: - return default_options - - # convert user options to dict - if not isinstance(user_options, dict): - user_options = user_options._asdict() - - # check that all user options are valid - invalid_fields = set(user_options) - set(default_options._fields) - if invalid_fields: - raise ValueError( - f"The following user options are not valid: {invalid_fields}. " - f"Valid options are {default_options._fields}." - ) - - # convert types if possible - typed = {} - for k, v in user_options.items(): - target_type = type(getattr(default_options, k)) - if isinstance(v, target_type): - typed[k] = v - else: - typed[k] = target_type(v) - - # update default options - out = default_options._replace(**typed) - - return out diff --git a/src/estimagic/optimization/tranquilo/poisedness.py b/src/estimagic/optimization/tranquilo/poisedness.py deleted file mode 100644 index 1359e0ddc..000000000 --- a/src/estimagic/optimization/tranquilo/poisedness.py +++ /dev/null @@ -1,211 +0,0 @@ -from functools import partial - -import numpy as np -from scipy.optimize import Bounds, NonlinearConstraint, minimize - - -def get_poisedness_constant(sample, shape="sphere"): - """Calculate the lambda poisedness constant of a sample. - - Note that the sample space is a trust-region with center 0 and radius 1. - It may be a (hyper-) sphere or cube. - - The implementation is based on :cite:`Conn2009`, Chapters 3 and 4. - - In general, if the sample is lambda-poised with a small lambda, where lambda >=1, - the sample is said to have "good" geometry or "spans" the trust-region well. - As lambda grows, the system represented by these points becomes increasingly - linearly dependent. - - Formal definition: - A sample Y is said to be lambda-poised on a region of interest if it is linearly - independent and the Lagrange polynomials L(i) of points i through N in Y satisfy: - - lambda >= max_i max_x | L(i) | (1) - - i.e. for each point i in the sample, we maximize the absolute criterion value - of its lagrange polynomial L(i); we then take the maximum over all these - criterion values as the lambda constant. - - When we compare different samples on the same trust-region, we are usually - interested in keeping the sample with the least lambda, so that (1) holds. - - - Args: - sample (np.ndarry): Array of shape (n_samples, n_params) containing the scaled - sample of points that lie within a trust-region with center 0 and radius 1. - shape (str): Geometric shape of the sample space. One of "sphere", "cube". - Default is "sphere". - - Returns: - tuple: - - lambda (float): The lambda poisedness constant. - - argmax (np.ndarray): 1d array of shape (n_params,) containing the - parameter vector that maximizes lambda. - - idx_max (int): Index relating to the position of the argmax in the sample. - - """ - n_params = sample.shape[1] - options = _get_minimize_options(shape, n_params) - - center = np.zeros(n_params) - lagrange_mat = _lagrange_poly_matrix(sample) - - lambda_ = 0 - idx_max = None - - for idx, poly in enumerate(lagrange_mat): - intercept = poly[0] - linear_terms = poly[1 : n_params + 1] - _coef_square_terms = poly[n_params + 1 :] - square_terms = _reshape_coef_to_square_terms(_coef_square_terms, n_params) - - neg_criterion = partial( - _eval_neg_absolute_value, - intercept=intercept, - linear_terms=linear_terms, - square_terms=square_terms, - ) - - result_max = minimize(fun=neg_criterion, x0=center, **options) - - critval = _eval_absolute_value( - result_max.x, intercept, linear_terms, square_terms - ) - - if critval > lambda_: - lambda_ = critval - argmax = result_max.x - idx_max = idx - - return lambda_, argmax, idx_max - - -def improve_poisedness(sample, shape="sphere", maxiter=5): - """Improve the lambda poisedness of the sample. - - The poisedness of the sample is improved in an incremental manner; replacing - one point at a time and reducing the upper bound on the absolute value of - the Lagrange polynomial. - - The implementation is based on algorithm 6.3 in :cite:`Conn2009`, - Chapter 6, p. 95 ff. - - Args: - sample (np.ndarry): Array of shape (n_samples, n_params). - shape (str): Geometric shape of the sample space. One of "sphere", "cube". - Default is "sphere". - maxiter (int): Maximum number of replacement iterations. Default is 5. - - Returns: - tuple: - - sample_improved (np.ndarray): Sample with improved poisedness. - - lambdas (list): History of lambdas. - - """ - sample_improved = sample.copy() - - lambdas = [] - - for _ in range(maxiter): - lambda_, argmax, idx_max = get_poisedness_constant( - sample=sample_improved, shape=shape - ) - - lambdas += [lambda_] - sample_improved[idx_max] = argmax - - return sample_improved, lambdas - - -def _lagrange_poly_matrix(sample): - """Construct matrix of lagrange polynomials. - - See :cite:`Conn2009`, Chapter 4.2, p. 60. - - Args: - sample (np.ndarry): Array of shape (n_samples, n_params). - - Returns: - np.ndarray: Matrix of lagrange polynomials of shape - (n_samples, n_params * (n_params + 1) // 2). - - """ - basis_mat = _scaled_polynomial_features(sample) - lagrange_mat = basis_mat @ np.linalg.pinv(basis_mat.T @ basis_mat) - - return lagrange_mat - - -def _scaled_polynomial_features(x): - """Construct linear terms, interactions, and scaled square terms. - - The square terms are scaled by 1 / 2. - - Args: - x (np.ndarray): Array of shape (n_samples, n_params). - - Returns: - np.ndarray: Linear terms, interactions and scaled square terms. - Has shape (n_samples, n_params * (n_params + 1) // 2). - - """ - n_samples, n_params = np.atleast_2d(x).shape - n_poly_terms = n_params * (n_params + 1) // 2 - - poly_terms = np.empty((n_poly_terms, n_samples), np.float64) - xt = x.T - - idx = 0 - for i in range(n_params): - poly_terms[idx] = 0.5 * xt[i] ** 2 - idx += 1 - - for j in range(i + 1, n_params): - poly_terms[idx] = xt[i] * xt[j] - idx += 1 - - intercept = np.ones((1, n_samples), x.dtype) - out = np.concatenate((intercept, xt, poly_terms), axis=0) - - return out.T - - -def _reshape_coef_to_square_terms(coef, n_params): - """Reshape square coefficients to matrix of square terms.""" - mat = np.empty((n_params, n_params)) - idx = -1 - - for j in range(n_params): - for i in range(j + 1): - idx += 1 - mat[i, j] = coef[idx] - mat[j, i] = coef[idx] - - return mat - - -def _get_minimize_options(shape, n_params): - """Get the minimizer options.""" - if shape == "sphere": - nonlinear_constraint = NonlinearConstraint(lambda x: np.linalg.norm(x), 0, 1) - options = {"method": "trust-constr", "constraints": [nonlinear_constraint]} - - elif shape == "cube": - bound_constraints = Bounds(-np.ones(n_params), np.ones(n_params)) - options = {"method": "trust-constr", "bounds": bound_constraints} - - else: - raise ValueError( - f"Invalid shape argument: {shape}. Must be one of sphere, cube." - ) - - return options - - -def _eval_absolute_value(x, intercept, linear_terms, square_terms): - return np.abs(intercept + linear_terms.T @ x + 0.5 * x.T @ square_terms @ x) - - -def _eval_neg_absolute_value(x, intercept, linear_terms, square_terms): - return -_eval_absolute_value(x, intercept, linear_terms, square_terms) diff --git a/src/estimagic/optimization/tranquilo/process_arguments.py b/src/estimagic/optimization/tranquilo/process_arguments.py deleted file mode 100644 index 0c75d8cd9..000000000 --- a/src/estimagic/optimization/tranquilo/process_arguments.py +++ /dev/null @@ -1,314 +0,0 @@ -import numpy as np - -from estimagic.optimization.algo_options import ( - CONVERGENCE_RELATIVE_CRITERION_TOLERANCE, - CONVERGENCE_RELATIVE_GRADIENT_TOLERANCE, -) -from estimagic.optimization.tranquilo.acceptance_decision import get_acceptance_decider -from estimagic.optimization.tranquilo.aggregate_models import get_aggregator -from estimagic.optimization.tranquilo.bounds import Bounds -from estimagic.optimization.tranquilo.estimate_variance import get_variance_estimator -from estimagic.optimization.tranquilo.filter_points import get_sample_filter -from estimagic.optimization.tranquilo.fit_models import get_fitter -from estimagic.optimization.tranquilo.history import History -from estimagic.optimization.tranquilo.options import ( - ConvOptions, - StagnationOptions, - StopOptions, - get_default_acceptance_decider, - get_default_aggregator, - get_default_batch_size, - get_default_model_fitter, - get_default_residualize, - get_default_model_type, - get_default_n_evals_at_start, - get_default_radius_options, - get_default_sample_size, - get_default_search_radius_factor, - update_option_bundle, -) -from estimagic.optimization.tranquilo.region import Region -from estimagic.optimization.tranquilo.sample_points import get_sampler -from estimagic.optimization.tranquilo.solve_subproblem import get_subsolver -from estimagic.optimization.tranquilo.wrap_criterion import get_wrapped_criterion - - -def process_arguments( - # functype, will be partialled out - functype, - # problem description - criterion, - x, - lower_bounds=None, - upper_bounds=None, - *, - # basic options - noisy=False, - # convergence options - disable_convergence=False, - convergence_absolute_criterion_tolerance=0.0, - convergence_absolute_gradient_tolerance=0.0, - convergence_absolute_params_tolerance=0.0, - convergence_relative_criterion_tolerance=CONVERGENCE_RELATIVE_CRITERION_TOLERANCE, - convergence_relative_gradient_tolerance=CONVERGENCE_RELATIVE_GRADIENT_TOLERANCE, - convergence_relative_params_tolerance=1e-8, - convergence_min_trust_region_radius=0.0, - # stopping options - stopping_max_criterion_evaluations=2_000, - stopping_max_iterations=200, - stopping_max_time=np.inf, - # single advanced options - batch_evaluator="joblib", - n_cores=1, - batch_size=None, - sample_size=None, - model_type=None, - search_radius_factor=None, - n_evals_per_point=1, - n_evals_at_start=None, - seed=925408, - # bundled advanced options - radius_options=None, - stagnation_options=None, - # component names and related options - sampler="optimal_hull", - sampler_options=None, - sample_filter="keep_all", - sample_filter_options=None, - model_fitter=None, - model_fitter_options=None, - cube_subsolver="bntr_fast", - sphere_subsolver="gqtpar_fast", - subsolver_options=None, - acceptance_decider=None, - acceptance_decider_options=None, - variance_estimator="classic", - variance_estimator_options=None, - infinity_handler="relative", - residualize=None, -): - # process convergence options - conv_options = ConvOptions( - disable=bool(disable_convergence), - ftol_abs=float(convergence_absolute_criterion_tolerance), - gtol_abs=float(convergence_absolute_gradient_tolerance), - xtol_abs=float(convergence_absolute_params_tolerance), - ftol_rel=float(convergence_relative_criterion_tolerance), - gtol_rel=float(convergence_relative_gradient_tolerance), - xtol_rel=float(convergence_relative_params_tolerance), - min_radius=float(convergence_min_trust_region_radius), - ) - - # process stopping options - stop_options = StopOptions( - max_iter=int(stopping_max_iterations), - max_eval=int(stopping_max_criterion_evaluations), - max_time=float(stopping_max_time), - ) - - # process simple options with static defaults - x = _process_x(x) - noisy = _process_noisy(noisy) - n_cores = _process_n_cores(n_cores) - stagnation_options = update_option_bundle(StagnationOptions(), stagnation_options) - n_evals_per_point = int(n_evals_per_point) - sampling_rng = _process_seed(seed) - n_evals_at_start = _process_n_evals_at_start( - n_evals_at_start, - noisy, - ) - - # process options that depend on arguments with static defaults - search_radius_factor = _process_search_radius_factor(search_radius_factor, functype) - batch_size = _process_batch_size(batch_size, n_cores) - radius_options = update_option_bundle(get_default_radius_options(x), radius_options) - model_type = _process_model_type(model_type, functype) - acceptance_decider = _process_acceptance_decider(acceptance_decider, noisy) - - # process options that depend on arguments with dependent defaults - target_sample_size = _process_sample_size( - sample_size=sample_size, - model_type=model_type, - x=x, - ) - model_fitter = _process_model_fitter( - model_fitter, model_type=model_type, sample_size=target_sample_size, x=x - ) - residualize = _process_residualize(residualize, model_fitter=model_fitter) - - # initialize components - history = History(functype=functype) - history.add_xs(x) - evaluate_criterion = get_wrapped_criterion( - criterion=criterion, - batch_evaluator=batch_evaluator, - n_cores=n_cores, - history=history, - ) - _bounds = Bounds(lower_bounds, upper_bounds) - trustregion = Region( - center=x, - radius=radius_options.initial_radius, - bounds=_bounds, - ) - - sample_points = get_sampler(sampler, sampler_options) - - solve_subproblem = get_subsolver( - cube_solver=cube_subsolver, - sphere_solver=sphere_subsolver, - user_options=subsolver_options, - ) - - filter_points = get_sample_filter( - sample_filter=sample_filter, - user_options=sample_filter_options, - ) - - fit_model = get_fitter( - fitter=model_fitter, - fitter_options=model_fitter_options, - model_type=model_type, - infinity_handling=infinity_handler, - residualize=residualize, - ) - - aggregate_model = get_aggregator( - aggregator=get_default_aggregator(functype=functype, model_type=model_type), - ) - - estimate_variance = get_variance_estimator( - variance_estimator, - variance_estimator_options, - ) - - accept_candidate = get_acceptance_decider( - acceptance_decider, - acceptance_decider_options, - ) - - # put everything in a dict - out = { - "evaluate_criterion": evaluate_criterion, - "x": x, - "noisy": noisy, - "conv_options": conv_options, - "stop_options": stop_options, - "radius_options": radius_options, - "batch_size": batch_size, - "target_sample_size": target_sample_size, - "stagnation_options": stagnation_options, - "search_radius_factor": search_radius_factor, - "n_evals_per_point": n_evals_per_point, - "n_evals_at_start": n_evals_at_start, - "trustregion": trustregion, - "sampling_rng": sampling_rng, - "history": history, - "sample_points": sample_points, - "solve_subproblem": solve_subproblem, - "filter_points": filter_points, - "fit_model": fit_model, - "aggregate_model": aggregate_model, - "estimate_variance": estimate_variance, - "accept_candidate": accept_candidate, - } - - return out - - -def _process_x(x): - return np.asarray(x, dtype=np.float64) - - -def _process_noisy(noisy): - return bool(noisy) - - -def _process_n_cores(n_cores): - return int(n_cores) - - -def _process_batch_size(batch_size, n_cores): - if batch_size is None: - batch_size = get_default_batch_size(n_cores) - - elif batch_size < n_cores: - raise ValueError("batch_size must be at least as large as n_cores.") - - return int(batch_size) - - -def _process_sample_size(sample_size, model_type, x): - if sample_size is None: - out = get_default_sample_size(model_type=model_type, x=x) - elif callable(sample_size): - out = sample_size(x=x, model_type=model_type) - else: - out = int(sample_size) - return out - - -def _process_model_type(model_type, functype): - out = get_default_model_type(functype) if model_type is None else model_type - - if out not in ["linear", "quadratic"]: - raise ValueError("model_type must be either 'linear' or 'quadratic'.") - - return out - - -def _process_search_radius_factor(search_radius_factor, functype): - if search_radius_factor is None: - out = get_default_search_radius_factor(functype) - else: - out = float(search_radius_factor) - - if out <= 0: - raise ValueError("search_radius_factor must be positive.") - - return out - - -def _process_seed(seed): - return np.random.default_rng(seed) - - -def _process_acceptance_decider(acceptance_decider, noisy): - if acceptance_decider is None: - out = get_default_acceptance_decider(noisy) - else: - out = acceptance_decider - - return out - - -def _process_model_fitter(model_fitter, model_type, sample_size, x): - if model_fitter is None: - out = get_default_model_fitter(model_type, sample_size=sample_size, x=x) - else: - out = model_fitter - - return out - - -def _process_residualize(residualize, model_fitter): - if residualize is None: - out = get_default_residualize(model_fitter) - else: - if not isinstance(residualize, bool): - raise ValueError("residualize must be a boolean.") - out = residualize - - return out - - -def _process_n_evals_at_start(n_evals, noisy): - if n_evals is None: - out = get_default_n_evals_at_start(noisy) - else: - out = int(n_evals) - - if out < 1: - raise ValueError("n_initial_acceptance_evals must be non-negative.") - - return out diff --git a/src/estimagic/optimization/tranquilo/region.py b/src/estimagic/optimization/tranquilo/region.py deleted file mode 100644 index 48abe2bcb..000000000 --- a/src/estimagic/optimization/tranquilo/region.py +++ /dev/null @@ -1,152 +0,0 @@ -from dataclasses import dataclass, replace - -import numpy as np - -from estimagic.optimization.tranquilo.bounds import Bounds -from estimagic.optimization.tranquilo.volume import ( - get_radius_of_cube_with_volume_of_sphere, -) - - -@dataclass(frozen=True) -class Region: - """Trust region.""" - - center: np.ndarray - radius: float - bounds: Bounds = None - - def __post_init__(self): - shape = _get_shape(self.center, self.radius, self.bounds) - cube_bounds = _get_cube_bounds(self.center, self.radius, self.bounds, shape) - cube_center = _get_cube_center(cube_bounds) - effective_center = _get_effective_center(shape, self.center, cube_center) - effective_radius = _get_effective_radius(shape, self.radius, cube_bounds) - - # cannot use standard __setattr__ because it is frozen - super().__setattr__("shape", shape) - super().__setattr__("_cube_bounds", cube_bounds) - super().__setattr__("_cube_center", cube_center) - super().__setattr__("effective_center", effective_center) - super().__setattr__("effective_radius", effective_radius) - - @property - def cube_bounds(self) -> Bounds: - if self.shape == "sphere": - raise AttributeError( - "The trustregion is a sphere, and thus has no cube bounds." - ) - return self._cube_bounds - - @property - def cube_center(self) -> np.ndarray: - if self.shape == "sphere": - raise AttributeError( - "The trustregion is a sphere, and thus has no cube center." - ) - return self._cube_center - - def map_to_unit(self, x: np.ndarray) -> np.ndarray: - """Map points from the trustregion to the unit sphere or cube.""" - if self.shape == "sphere": - out = _map_to_unit_sphere(x, center=self.center, radius=self.radius) - else: - out = _map_to_unit_cube(x, cube_bounds=self.cube_bounds) - return out - - def map_from_unit(self, x: np.ndarray) -> np.ndarray: - """Map points from the unit sphere or cube to the trustregion.""" - if self.shape == "sphere": - out = _map_from_unit_sphere(x, center=self.center, radius=self.radius) - else: - cube_bounds = self.cube_bounds - out = _map_from_unit_cube(x, cube_bounds=cube_bounds) - # Bounds may not be satisfied exactly due to numerical inaccuracies. - out = np.clip(out, cube_bounds.lower, cube_bounds.upper) - return out - - # make it behave like a NamedTuple - def _replace(self, **kwargs): - return replace(self, **kwargs) - - -def _map_to_unit_cube(x, cube_bounds): - """Map points from the trustregion to the unit cube.""" - out = 2 * (x - cube_bounds.lower) / (cube_bounds.upper - cube_bounds.lower) - 1 - return out - - -def _map_to_unit_sphere(x, center, radius): - """Map points from the trustregion to the unit sphere.""" - out = (x - center) / radius - return out - - -def _map_from_unit_cube(x, cube_bounds): - """Map points from the unit cube to the trustregion.""" - out = (cube_bounds.upper - cube_bounds.lower) * (x + 1) / 2 + cube_bounds.lower - return out - - -def _map_from_unit_sphere(x, center, radius): - """Map points from the unit sphere to the trustregion.""" - out = x * radius + center - return out - - -def _get_shape(center, radius, bounds): - any_bounds_binding = _any_bounds_binding( - bounds=bounds, center=center, radius=radius - ) - return "cube" if any_bounds_binding else "sphere" - - -def _get_cube_bounds(center, radius, bounds, shape): - if shape == "cube": - radius = get_radius_of_cube_with_volume_of_sphere(radius, len(center)) - cube_bounds = _create_cube_bounds(center=center, radius=radius, bounds=bounds) - return cube_bounds - - -def _get_cube_center(cube_bounds): - cube_center = (cube_bounds.lower + cube_bounds.upper) / 2 - return cube_center - - -def _get_effective_center(shape, center, cube_center): - effective_center = center if shape == "sphere" else cube_center - return effective_center - - -def _get_effective_radius(shape, radius, cube_bounds): - if shape == "sphere": - effective_radius = radius - else: - effective_radius = (cube_bounds.upper - cube_bounds.lower) / 2 - return effective_radius - - -def _create_cube_bounds(center, radius, bounds): - """Get new bounds that define the intersection of the trustregion and the bounds.""" - lower_bounds = center - radius - upper_bounds = center + radius - - if bounds is not None and bounds.lower is not None: - lower_bounds = np.clip(lower_bounds, bounds.lower, np.inf) - - if bounds is not None and bounds.upper is not None: - upper_bounds = np.clip(upper_bounds, -np.inf, bounds.upper) - - return Bounds(lower=lower_bounds, upper=upper_bounds) - - -def _any_bounds_binding(bounds, center, radius): - """Check if any bound is binding, i.e. inside the trustregion.""" - out = False - if bounds is not None and bounds.has_any: - if bounds.lower is not None: - lower_binding = np.min(center - bounds.lower) <= radius - if bounds.upper is not None: - upper_binding = np.min(bounds.upper - center) <= radius - out = np.any(lower_binding) or np.any(upper_binding) - return out diff --git a/src/estimagic/optimization/tranquilo/rho_noise.py b/src/estimagic/optimization/tranquilo/rho_noise.py deleted file mode 100644 index 9df77335e..000000000 --- a/src/estimagic/optimization/tranquilo/rho_noise.py +++ /dev/null @@ -1,87 +0,0 @@ -import numpy as np - -from estimagic.optimization.tranquilo.acceptance_decision import calculate_rho - - -def simulate_rho_noise( - xs, - vector_model, - trustregion, - noise_cov, - model_fitter, - model_aggregator, - subsolver, - rng, - n_draws=100, - ignore_corelation=True, -): - """Simulate a rho that would obtain on average if there is no approximation error. - - This can be used to adjust the sample size in the presence of noise. - - Throughout this function the prefix true refers to what is considered as ground - truth for the purpose of the simulation. The prefix sim refers to the simulated - quantities. - - Args: - xs (np.ndarray): Sample of points on which surrogate models will be - fitted during the simulation. This sample is not scaled to the trustregion. - vector_model (VectorModel): A vector surrogate model that is taken as true model - for the simulation. In many cases this model was fitted on xs but this is - not a requirement. - trustregion (Region): The trustregion in which the optimization is performed. - noise_cov(np.ndarray): Covariance matrix of the noise. The noise is assumed to - be drawn from a multivariate normal distribution with mean zero and this - covariance matrix. - model_fitter (callable): A function that fits a model. - model_aggregator (callable): A function that aggregates a vector model to a - scalar model. - subsolver (callable): A function that solves the subproblem. - rng (np.random.Generator): Random number generator. - n_draws (int): Number of draws used to estimate the rho noise. - ignore_corelation (bool): If True, the noise is assumed to be uncorrelated and - only the diagonal entries of the covariance matrix are used. - - """ - n_samples, n_params = xs.shape - n_residuals = len(noise_cov) - - x_unit = trustregion.map_to_unit(xs) - - true_fvecs = vector_model.predict(x_unit) - - true_scalar_model = model_aggregator(vector_model=vector_model) - - true_current_fval = true_scalar_model.predict(np.zeros(n_params)) - - if ignore_corelation: - noise_cov = np.diag(np.diag(noise_cov)) - - noise = rng.multivariate_normal( - mean=np.zeros(n_residuals), cov=noise_cov, size=n_draws * n_samples - ).reshape(n_draws, n_samples, n_residuals) - - rhos = [] - for draw in noise: - sim_fvecs = true_fvecs + draw - sim_vector_model = model_fitter( - xs, - sim_fvecs, - weights=None, - region=trustregion, - old_model=None, - ) - sim_scalar_model = model_aggregator(vector_model=sim_vector_model) - sim_sub_sol = subsolver(sim_scalar_model, trustregion) - - sim_candidate_fval = true_scalar_model.predict(sim_sub_sol.x_unit) - sim_actual_improvement = -(sim_candidate_fval - true_current_fval) - - sim_rho = calculate_rho( - actual_improvement=sim_actual_improvement, - expected_improvement=sim_sub_sol.expected_improvement, - ) - - rhos.append(sim_rho) - - return np.array(rhos) diff --git a/src/estimagic/optimization/tranquilo/sample_points.py b/src/estimagic/optimization/tranquilo/sample_points.py deleted file mode 100644 index b4d7a500c..000000000 --- a/src/estimagic/optimization/tranquilo/sample_points.py +++ /dev/null @@ -1,466 +0,0 @@ -from functools import partial - -import numpy as np -from scipy.spatial.distance import pdist -from scipy.special import gammainc, logsumexp - -import estimagic as em -from estimagic.optimization.tranquilo.get_component import get_component -from estimagic.optimization.tranquilo.options import SamplerOptions - - -def get_sampler(sampler, user_options=None): - """Get sampling function partialled options. - - Args: - sampler (str or callable): Name of a sampling method or sampling function. - The arguments of sampling functions need to be: ``trustregion``, - ``n_points``, ``rng``, ``existing_xs`` and ``bounds``. - Sampling functions need to return a dictionary with the entry "points" - (and arbitrary additional information). See ``reference_sampler`` for - details. - user_options (dict): Additional keyword arguments for the sampler. Options that - are not used by the sampler are ignored with a warning. If sampler is - 'hull_sampler' or 'optimal_hull_sampler' the user options must contain the - argument 'order', which is a positive integer. - - Returns: - callable: Function that depends on trustregion, n_points, existing_xs and - returns a new sample. - - """ - built_in_samplers = { - "random_interior": _interior_sampler, - "random_hull": _hull_sampler, - "optimal_hull": _optimal_hull_sampler, - } - - mandatory_args = [ - "trustregion", - "n_points", - "existing_xs", - "rng", - ] - - out = get_component( - name_or_func=sampler, - component_name="sampler", - func_dict=built_in_samplers, - user_options=user_options, - default_options=SamplerOptions(), - mandatory_signature=mandatory_args, - ) - - return out - - -def _interior_sampler( - trustregion, - n_points, - rng, - existing_xs=None, # noqa: ARG001 -): - """Random generation of trustregion points inside a ball or box. - - Args: - trustregion (Region): Trustregion. See module region.py. - n_points (int): how many new points to sample - rng (numpy.random.Generator): Random number generator. - existing_xs (np.ndarray or None): 2d numpy array in which each row is an - x vector at which the criterion function has already been evaluated, that - satisfies lower_bounds <= existing_xs <= upper_bounds. - - """ - if trustregion.shape == "sphere": - _sampler = _ball_sampler - else: - _sampler = _box_sampler - - out = _sampler( - trustregion=trustregion, - n_points=n_points, - rng=rng, - ) - return out - - -def _box_sampler( - trustregion, - n_points, - rng, -): - """Naive random generation of trustregion points inside a box. - - Args: - trustregion (Region): Trustregion. See module region.py. - n_points (int): how many new points to sample - rng (numpy.random.Generator): Random number generator. - existing_xs (np.ndarray or None): 2d numpy array in which each row is an - x vector at which the criterion function has already been evaluated, that - satisfies lower_bounds <= existing_xs <= upper_bounds. - - """ - n_params = len(trustregion.center) - bounds = trustregion.cube_bounds - points = rng.uniform( - low=bounds.lower, - high=bounds.upper, - size=(n_points, n_params), - ) - return points - - -def _ball_sampler( - trustregion, - n_points, - rng, -): - """Naive random generation of trustregion points inside a ball. - - Code is adapted from https://tinyurl.com/y3p2dz6b. - - Args: - trustregion (Region): Trustregion. See module region.py. - n_points (int): how many new points to sample - rng (numpy.random.Generator): Random number generator. - existing_xs (np.ndarray or None): 2d numpy array in which each row is an - x vector at which the criterion function has already been evaluated, that - satisfies lower_bounds <= existing_xs <= upper_bounds. - - """ - n_params = len(trustregion.center) - raw = rng.normal(size=(n_points, n_params)) - norm = np.linalg.norm(raw, axis=1, ord=2) - scale = gammainc(n_params / 2, norm**2 / 2) ** (1 / n_params) / norm - points = raw * scale.reshape(-1, 1) - out = trustregion.map_from_unit(points) - return out - - -def _hull_sampler( - trustregion, - n_points, - rng, - distribution, - existing_xs=None, # noqa: ARG001 -): - """Random generation of trustregion points on the hull of general sphere / cube. - - Points are sampled randomly on a hull of a sphere or cube. These points are then - mapped into the feasible region, which is defined by the intersection of the - trustregion and the bounds. - - Args: - trustregion (Region): Trustregion. See module region.py. - n_points (int): how many new points to sample - rng (numpy.random.Generator): Random number generator. - distribution (str): Distribution to use for initial sample before points are - projected onto unit hull. Must be in {'normal', 'uniform'}. - existing_xs (np.ndarray or None): 2d numpy array in which each row is an - x vector at which the criterion function has already been evaluated, that - satisfies lower_bounds <= existing_xs <= upper_bounds. - - """ - n_params = len(trustregion.center) - - if distribution is None: - distribution = "normal" if trustregion.shape == "sphere" else "uniform" - raw = _draw_from_distribution(distribution, rng=rng, size=(n_points, n_params)) - points = _project_onto_unit_hull(raw, trustregion_shape=trustregion.shape) - out = trustregion.map_from_unit(points) - return out - - -def _optimal_hull_sampler( - trustregion, - n_points, - rng, - distribution, - hardness, - algorithm, - algo_options, - criterion, - n_points_randomsearch, - return_info, - existing_xs=None, -): - """Optimal generation of trustregion points on the hull of general sphere / cube. - - Points are sampled optimally on a hull of a sphere or cube, where the criterion that - is maximized is the minimum distance of all pairs of points, except for pairs of - existing points. These points are then mapped into the feasible region, which is - defined by the intersection of the trustregion and the bounds. Instead of using a - hard minimum we return the soft minimum, whose accuracy we govern by the hardness - factor. For more information on the soft-minimum, seek: - https://tinyurl.com/mrythbk4. - - Args: - trustregion (Region): Trustregion. See module region.py. - n_points (int): how many new points to sample - rng (numpy.random.Generator): Random number generator. - distribution (str): Distribution to use for initial sample before points are - projected onto unit hull. Must be in {'normal', 'uniform'}. - hardness (float): Positive scaling factor. As hardness tends to infinity the - soft minimum (logsumexp) approaches the hard minimum. Default is 1. A - detailed explanation is given in the docstring. - algorithm (str): Optimization algorithm. - algo_options (dict): Algorithm specific configuration of the optimization. See - :ref:`list_of_algorithms` for supported options of each algorithm. Default - sets ``stopping_max_iterations=n_params``. - criterion (str or None): "distance", "determinant" or None. - - "distance": maximize the minimal distance between points, excluding - distances between existing points. This is a fast and relatively simple - optimization problem and yields the same points as "determinant" in - many circumstances. - - "determinant": maximize the determinant of the x'x where x is the matrix - of points. This is known as d-optimality in the optimal design literature - and as fekete points in the function approximation literature. This - criterion has the best theoretical properties but is very hard to - optimize. Thus the practical performance can be bad. - - None: Use the "determinant" criterion if only one point is added and the - "distance" criterion if multiple points are added. - n_points_randomsearch (int): Number of random points to from which to select - the best in terms of the Fekete criterion before starting the optimization. - Default is 1. - existing_xs (np.ndarray or None): 2d numpy array in which each row is an - x vector at which the criterion function has already been evaluated, that - satisfies lower_bounds <= existing_xs <= upper_bounds. - - Returns: - - np.ndarray: Generated points. Has shape (n_points, len(trustregion.center)). - - dict: Information about the optimization. Only returned if ``return_info`` is - True. - - """ - n_params = len(trustregion.center) - - if n_points <= 0: - return np.array([]) - - if criterion is None: - criterion = "determinant" if n_points == 1 else "distance" - - algo_options = {} if algo_options is None else algo_options - if "stopping_max_iterations" not in algo_options: - algo_options["stopping_max_iterations"] = 2 * n_params + 5 - - if existing_xs is not None: - # map existing points into unit space for easier optimization - - existing_xs_unit = trustregion.map_to_unit(existing_xs) - - if criterion == "distance": - dist_to_center = np.linalg.norm(existing_xs_unit, axis=1) - not_centric = dist_to_center >= 0.1 - if not_centric.any(): - existing_xs_unit = existing_xs_unit[not_centric] - else: - existing_xs_unit = None - - else: - existing_xs_unit = None - - # Define criterion functions. "determinant" is the Fekete criterion and "distance" - # corresponds to an approximation of the Fekete criterion. - criterion_kwargs = { - "existing_xs": existing_xs_unit, - "trustregion_shape": trustregion.shape, - "n_params": n_params, - } - - func_dict = { - "determinant": partial(_determinant_on_hull, **criterion_kwargs), - "distance": partial( - _minimal_pairwise_distance_on_hull, - **criterion_kwargs, - hardness=hardness, - ), - } - - # Select start params through random search - if distribution is None: - distribution = "normal" if trustregion.shape == "sphere" else "uniform" - - candidates = _draw_from_distribution( - distribution, rng=rng, size=(n_points_randomsearch, n_points, n_params) - ) - candidates = [ - _project_onto_unit_hull(_x, trustregion_shape=trustregion.shape) - for _x in candidates - ] - - if n_points_randomsearch == 1: - x0 = candidates[0] - else: - _fekete_criterion = [func_dict["determinant"](_x) for _x in candidates] - x0 = candidates[np.argmax(_fekete_criterion)] - - x0 = x0.flatten() # flatten so that em.maximize uses fast path - - # This would raise an error because there are zero pairs to calculate the - # pairwise distance - if existing_xs_unit is None and n_points == 1: - opt_params = x0 - else: - res = em.maximize( - criterion=func_dict[criterion], - params=x0, - algorithm=algorithm, - lower_bounds=-np.ones_like(x0), - upper_bounds=np.ones_like(x0), - algo_options=algo_options, - ) - opt_params = res.params - - # Make sure the optimal sampling is actually better than the initial one with - # respect to the fekete criterion. This could be violated if the surrogate - # criterion is not a good approximation or if the optimization fails. - start_fekete = func_dict["determinant"](x0) - end_fekete = func_dict["determinant"](opt_params) - - if start_fekete >= end_fekete: - opt_params = x0 - - points = _project_onto_unit_hull( - opt_params.reshape(-1, n_params), trustregion_shape=trustregion.shape - ) - points = trustregion.map_from_unit(points) - - # Collect additional information. Mostly used for testing. - info = { - "start_params": x0, - "opt_params": opt_params, - "start_fekete": start_fekete, - "opt_fekete": end_fekete, - } - - out = (points, info) if return_info else points - return out - - -# ====================================================================================== -# Helper functions -# ====================================================================================== - - -def _minimal_pairwise_distance_on_hull( - x, existing_xs, trustregion_shape, hardness, n_params -): - """Compute minimal pairwise distance of new and existing points. - - Instead of optimizing the distance of points in the feasible trustregion, this - criterion function leads to the maximization of the minimum distance of the points - in the unit space. These can then be mapped into the feasible trustregion. We do not - consider the distances between existing points. Instead of using a hard minimum we - return the soft minimum, whose accuracy we govern by the hardness factor. For more - information on the soft-minimum, seek: https://tinyurl.com/mrythbk4. - - Args: - x (np.ndarray): Flattened 1d array of internal points. Each value is in [-1, 1]. - existing_xs (np.ndarray or None): 2d numpy array in which each row is an - x vector at which the criterion function has already been evaluated, that - satisfies -1 <= existing_xs <= 1. - trustregion_shape (str): Shape of the trustregion. Either "cube" or "sphere". - hardness (float): Positive scaling factor. As hardness tends to infinity the - soft minimum (logsumexp) approaches the hard minimum. Default is 1. A - detailed explanation is given in the docstring. - n_params (int): Dimensionality of the problem. - - Returns: - float: The criterion value. - - """ - x = x.reshape(-1, n_params) - x = _project_onto_unit_hull(x, trustregion_shape=trustregion_shape) - - if existing_xs is not None: - sample = np.row_stack([x, existing_xs]) - n_existing_pairs = len(existing_xs) * (len(existing_xs) - 1) // 2 - slc = slice(0, -n_existing_pairs) if n_existing_pairs else slice(None) - else: - sample = x - slc = slice(None) - - dist = pdist(sample) ** 2 - - # drop distances between existing points. They could introduce flat spots. - dist = dist[slc] - - # soft minimum - crit_value = -logsumexp(-hardness * dist) - return crit_value - - -def _determinant_on_hull(x, existing_xs, trustregion_shape, n_params): - """Compute d-optimality criterion of new and existing points. - - Instead of optimizing the distance of points in the feasible trustregion, this - criterion function leads to the maximization of the minimum distance of the points - in the unit space. - - Args: - x (np.ndarray): Flattened 1d array of internal points. Each value is in [-1, 1]. - existing_xs (np.ndarray or None): 2d numpy array in which each row is an - x vector at which the criterion function has already been evaluated, that - satisfies -1 <= existing_xs <= 1. - trustregion_shape (str): Shape of the trustregion. Either "cube" or "sphere". - n_params (int): Dimensionality of the problem. - - Returns: - float: The criterion value. - - """ - x = x.reshape(-1, n_params) - n_samples = len(x) - - x = _project_onto_unit_hull(x, trustregion_shape=trustregion_shape) - - if existing_xs is not None: - sample = np.row_stack([x, existing_xs]) - else: - sample = x - - crit_value = np.linalg.det(sample.T @ sample / n_samples) - - return crit_value - - -def _draw_from_distribution(distribution, rng, size): - """Draw points from distribution. - - Args: - distribution (str): Distribution to use for initial sample before points are - projected onto unit hull. Must be in {'normal', 'uniform'}. - rng (np.random.Generator): Random number generator. - size (Union[int, tuple[int]]): Output shape. - - Returns: - np.ndarray: Randomly drawn points. - - """ - if distribution == "normal": - draw = rng.normal(size=size) - elif distribution == "uniform": - draw = rng.uniform(-1, 1, size=size) - else: - raise ValueError( - f"distribution is {distribution}, but needs to be in ('normal', 'uniform')." - ) - return draw - - -def _project_onto_unit_hull(x, trustregion_shape): - """Project points from the unit space onto the hull of a geometric figure. - - Args: - x (np.ndarray): 2d array of points to be projects. Each value is in [-1, 1]. - trustregion_shape (str): Shape of the trustregion: {'sphere', 'cube'}. - - Returns: - np.ndarray: The projected points. - - """ - order = 2 if trustregion_shape == "sphere" else np.inf - norm = np.linalg.norm(x, axis=1, ord=order).reshape(-1, 1) - projected = x / norm - return projected diff --git a/src/estimagic/optimization/tranquilo/solve_subproblem.py b/src/estimagic/optimization/tranquilo/solve_subproblem.py deleted file mode 100644 index 58839a683..000000000 --- a/src/estimagic/optimization/tranquilo/solve_subproblem.py +++ /dev/null @@ -1,200 +0,0 @@ -from functools import partial -from typing import NamedTuple - -import numpy as np - -from estimagic.optimization.tranquilo.get_component import get_component -from estimagic.optimization.subsolvers.bntr import ( - bntr, -) -from estimagic.optimization.subsolvers.bntr_fast import ( - bntr_fast, -) -from estimagic.optimization.subsolvers.gqtpar import ( - gqtpar, -) -from estimagic.optimization.subsolvers.gqtpar_fast import gqtpar_fast -from estimagic.optimization.tranquilo.wrapped_subsolvers import ( - slsqp_sphere, - solve_multistart, -) -from estimagic.optimization.tranquilo.options import SubsolverOptions - - -def get_subsolver(sphere_solver, cube_solver, user_options=None): - """Get an algorithm-function with partialled options. - - Args: - sphere_solver (str or callable): Name of a subproblem solver or a subproblem - solver, designed to solve the problem in the unit sphere. The first argument - of any subsolver needs to be ``model``. The second argument needs to be - ``x_candidate``, an initial guess for the solution in the unit space. - Moreover, subsolvers can have any number of additional keyword arguments. - cube_solver (str or callable): Name of a subproblem solver or a subproblem - solver, designed to solve the problem in the unit box. The first argument - of any subsolver needs to be ``model``. The second and third arguments have - to be ``lower_bounds`` and ``upper_bounds``. The fourth argument needs to be - ``x_candidate``, an initial guess for the solution in the unit space. - Moreover, subsolvers can have any number of additional keyword arguments. - user_options (dict): - Options for the subproblem solver. The following are supported: - - maxiter (int): Maximum number of iterations to perform when solving the - trust-region subproblem ("bntr" and "gqtpar"). - - maxiter_gradient_descent (int): Maximum number of gradient descent - iterations to perform ("bntr"). - - conjugate_gradient_method (str): Method for computing the conjugate - gradient step ("bntr"). - Available conjugate gradient methods are: - - "cg" - - "steihaug_toint" - - "trsbox" (default) - - gtol_abs (float): Convergence tolerance for the absolute gradient norm - in the trust-region subproblem ("bntr"). - - gtol_rel (float): Convergence tolerance for the relative gradient norm - in the trust-region subproblem ("bntr"). - - gtol_scaled (float): Convergence tolerance for the scaled gradient norm - in the trust-region subproblem ("bntr"). - - gtol_abs_conjugate_gradient (float): Convergence tolerance for the - absolute gradient norm in the conjugate gradient step ("bntr"). - - gtol_rel_conjugate_gradient (float): Convergence tolerance for the - relative gradient norm in the conjugate gradient step ("bntr"). - - k_easy (float): topping criterion for the "easy" case in the trust-region - subproblem ("gqtpar"). - - k_hard (float): Stopping criterion for the "hard" case in the trust-region - subproblem ("gqtpar"). - - Returns: - callable: The subsolver. - - """ - built_in_sphere_solvers = { - "gqtpar": gqtpar, - "gqtpar_fast": gqtpar_fast, - "slsqp_sphere": slsqp_sphere, - } - - built_in_cube_solvers = { - "bntr": bntr, - "bntr_fast": bntr_fast, - "multistart": solve_multistart, - } - - _sphere_subsolver = get_component( - name_or_func=sphere_solver, - component_name="sphere_solver", - func_dict=built_in_sphere_solvers, - default_options=SubsolverOptions(), - user_options=user_options, - mandatory_signature=["model", "x_candidate"], - ) - - _cube_subsolver = get_component( - name_or_func=cube_solver, - component_name="cube_solver", - func_dict=built_in_cube_solvers, - default_options=SubsolverOptions(), - user_options=user_options, - mandatory_signature=["model", "x_candidate", "lower_bounds", "upper_bounds"], - ) - - solver = partial( - _solve_subproblem_template, - sphere_solver=_sphere_subsolver, - cube_solver=_cube_subsolver, - ) - - return solver - - -def _solve_subproblem_template( - model, - trustregion, - sphere_solver, - cube_solver, -): - """Solve the quadratic subproblem. - - Args: - model (ScalarModel): The fitted model of which we want to find the minimum. - trustregion (Region): The trustregion on which the model was fitted. - sphere_solver (callable): Spherical subproblem solver, designed to solve the - problem in the unit sphere. The first argument of any subsolver needs to be - ``model``. The second argument needs to be ``x_candidate``, an initial guess - for the solution in the unit space. Moreover, subsolvers can have any number - of additional keyword arguments. - cube_solver (callable): Cubical subproblem solver, designed to solve the problem - in the unit box. The first argument of any subsolver needs to be ``model``. - The second and third arguments have to be ``lower_bounds`` and - ``upper_bounds``. The fourth argument needs to be ``x_candidate``, an - initial guess for the solution in the unit space. Moreover, subsolvers can - have any number of additional keyword arguments. - - - Returns: - SubproblemResult: Namedtuple with the following entries: - - "x" (np.ndarray): The optimal x in terms of the original parameter space. - - "expected_improvement" (float): The expected improvement at the solution. - The sign has already been flipped, i.e. large means more improvement. - - "n_iterations" (int): Number of iterations performed before termination. - - "success" (bool): Boolean indicating whether a solution has been found - before reaching maxiter. - - "x_unit" (np.ndarray): The optimal x in terms of the unit space. - - "shape" (str): Whether the trustregion was a sphere or a cube, which in - turn determines whether the sphere or cube solver was used. - - """ - old_x_unit = trustregion.map_to_unit(trustregion.center) - - solver = sphere_solver if trustregion.shape == "sphere" else cube_solver - - raw_result = solver( - model=model, - x_candidate=old_x_unit, - # bounds can be passed to both solvers because the functions returned by - # `get_component` ignore redundant arguments. - lower_bounds=-np.ones_like(old_x_unit), - upper_bounds=np.ones_like(old_x_unit), - ) - - if trustregion.shape == "cube": - raw_result["x"] = np.clip(raw_result["x"], -1.0, 1.0) - - # make sure expected improvement is calculated accurately in case of clipping and - # does not depend on whether the subsolver ignores intercepts or not. - fval_old = model.predict(old_x_unit) - fval_candidate = model.predict(raw_result["x"]) - - expected_improvement = -(fval_candidate - fval_old) - - # in case of negative expected improvement, we return the old point - if expected_improvement >= 0: - success = raw_result["success"] - x_unit = raw_result["x"] - x = trustregion.map_from_unit(raw_result["x"]) - else: - success = False - x_unit = old_x_unit - x = trustregion.center - expected_improvement = 0.0 - - result = SubproblemResult( - x=x, - expected_improvement=expected_improvement, - n_iterations=raw_result["n_iterations"], - success=success, - x_unit=x_unit, - shape=trustregion.shape, - ) - - return result - - -class SubproblemResult(NamedTuple): - """Result of the subproblem solver.""" - - x: np.ndarray - expected_improvement: float - n_iterations: int - success: bool - x_unit: np.ndarray - shape: str diff --git a/src/estimagic/optimization/tranquilo/tranquilo.py b/src/estimagic/optimization/tranquilo/tranquilo.py deleted file mode 100644 index 83844b1bf..000000000 --- a/src/estimagic/optimization/tranquilo/tranquilo.py +++ /dev/null @@ -1,467 +0,0 @@ -import functools -from functools import partial -from typing import NamedTuple - -import numpy as np - -from estimagic.decorators import mark_minimizer -from estimagic.optimization.tranquilo.adjust_radius import adjust_radius -from estimagic.optimization.tranquilo.filter_points import ( - drop_worst_points, -) -from estimagic.optimization.tranquilo.models import ( - ScalarModel, - VectorModel, -) -from estimagic.optimization.tranquilo.process_arguments import process_arguments -from estimagic.optimization.tranquilo.region import Region - - -# wrapping gives us the signature and docstring of process arguments -@functools.wraps(process_arguments) -def _tranquilo(*args, **kwargs): - internal_kwargs = process_arguments(*args, **kwargs) - return _internal_tranquilo(**internal_kwargs) - - -def _internal_tranquilo( - evaluate_criterion, - x, - noisy, - conv_options, - stop_options, - radius_options, - batch_size, - target_sample_size, - stagnation_options, - search_radius_factor, - n_evals_per_point, - n_evals_at_start, - trustregion, - sampling_rng, - history, - sample_points, - solve_subproblem, - filter_points, - fit_model, - aggregate_model, - estimate_variance, - accept_candidate, -): - eval_info = {0: n_evals_at_start} - evaluate_criterion(eval_info) - - _init_fvec = history.get_fvecs(0).mean(axis=0) - - _init_vector_model = VectorModel( - intercepts=_init_fvec, - linear_terms=np.zeros((len(_init_fvec), len(x))), - square_terms=np.zeros((len(_init_fvec), len(x), len(x))), - shift=trustregion.center, - scale=trustregion.radius, - ) - - _init_model = aggregate_model(_init_vector_model) - - state = State( - trustregion=trustregion, - model_indices=[0], - model=_init_model, - vector_model=_init_vector_model, - index=0, - x=x, - fval=np.mean(history.get_fvals(0)), - rho=np.nan, - accepted=True, - new_indices=[0], - old_indices_discarded=[], - old_indices_used=[], - candidate_index=0, - candidate_x=x, - ) - - states = [state] - - # ================================================================================== - # main optimization loop - # ================================================================================== - converged, msg = False, None - for _ in range(stop_options.max_iter): - # ============================================================================== - # find, filter and count points - # ============================================================================== - - search_region = state.trustregion._replace( - radius=search_radius_factor * state.trustregion.radius - ) - - old_indices = history.get_x_indices_in_region(search_region) - - old_xs = history.get_xs(old_indices) - - model_xs, model_indices = filter_points( - xs=old_xs, - indices=old_indices, - state=state, - target_size=target_sample_size, - ) - - # ========================================================================== - # sample points if necessary and do simple iteration - # ========================================================================== - new_xs = sample_points( - trustregion=state.trustregion, - n_points=max(0, target_sample_size - len(model_xs)), - existing_xs=model_xs, - rng=sampling_rng, - ) - - new_indices = history.add_xs(new_xs) - - eval_info = {i: n_evals_per_point for i in new_indices} - - evaluate_criterion(eval_info) - - model_indices = _concatenate_indices(model_indices, new_indices) - - model_xs = history.get_xs(model_indices) - model_data = history.get_model_data( - x_indices=model_indices, - average=True, - ) - - vector_model = fit_model( - *model_data, - region=state.trustregion, - old_model=state.vector_model, - weights=None, - ) - - scalar_model = aggregate_model( - vector_model=vector_model, - ) - - sub_sol = solve_subproblem(model=scalar_model, trustregion=state.trustregion) - - _relative_step_length = ( - np.linalg.norm(sub_sol.x - state.x) / state.trustregion.radius - ) - - # ========================================================================== - # If we have enough points, drop points until the relative step length - # becomes large enough - # ========================================================================== - - if len(model_xs) > target_sample_size: - while ( - _relative_step_length < stagnation_options.min_relative_step_keep - and len(model_xs) > target_sample_size - ): - model_xs, model_indices = drop_worst_points( - xs=model_xs, - indices=model_indices, - state=state, - n_to_drop=1, - ) - - model_data = history.get_model_data( - x_indices=model_indices, - average=True, - ) - - vector_model = fit_model( - *model_data, - region=state.trustregion, - old_model=state.vector_model, - weights=None, - ) - - scalar_model = aggregate_model( - vector_model=vector_model, - ) - - sub_sol = solve_subproblem( - model=scalar_model, trustregion=state.trustregion - ) - - _relative_step_length = ( - np.linalg.norm(sub_sol.x - state.x) / state.trustregion.radius - ) - - # ========================================================================== - # If step length is still too small, replace the worst point with a new one - # ========================================================================== - - sample_counter = 0 - while _relative_step_length < stagnation_options.min_relative_step: - if stagnation_options.drop: - model_xs, model_indices = drop_worst_points( - xs=model_xs, - indices=model_indices, - state=state, - n_to_drop=stagnation_options.sample_increment, - ) - - new_xs = sample_points( - trustregion=state.trustregion, - n_points=stagnation_options.sample_increment, - existing_xs=model_xs, - rng=sampling_rng, - ) - - new_indices = history.add_xs(new_xs) - - eval_info = {i: n_evals_per_point for i in new_indices} - - evaluate_criterion(eval_info) - - model_indices = _concatenate_indices(model_indices, new_indices) - model_xs = history.get_xs(model_indices) - model_data = history.get_model_data( - x_indices=model_indices, - average=True, - ) - - vector_model = fit_model( - *model_data, - region=state.trustregion, - old_model=state.vector_model, - weights=None, - ) - - scalar_model = aggregate_model( - vector_model=vector_model, - ) - - sub_sol = solve_subproblem( - model=scalar_model, trustregion=state.trustregion - ) - - _relative_step_length = ( - np.linalg.norm(sub_sol.x - state.x) / state.trustregion.radius - ) - - sample_counter += 1 - if sample_counter >= stagnation_options.max_trials: - break - - # ============================================================================== - # fit noise model based on previous acceptance samples - # ============================================================================== - - if noisy: - scalar_noise_variance = estimate_variance( - trustregion=state.trustregion, - history=history, - model_type="scalar", - ) - else: - scalar_noise_variance = None - - # ============================================================================== - # acceptance decision - # ============================================================================== - - acceptance_result = accept_candidate( - subproblem_solution=sub_sol, - state=state, - wrapped_criterion=evaluate_criterion, - noise_variance=scalar_noise_variance, - history=history, - ) - - # ============================================================================== - # update state with information on this iteration - # ============================================================================== - - state = state._replace( - model_indices=model_indices, - model=scalar_model, - new_indices=np.setdiff1d(model_indices, old_indices), - old_indices_used=np.intersect1d(model_indices, old_indices), - old_indices_discarded=np.setdiff1d(old_indices, model_indices), - **acceptance_result._asdict(), - ) - - states.append(state) - - # ============================================================================== - # update state for beginning of next iteration - # ============================================================================== - - new_radius = adjust_radius( - radius=state.trustregion.radius, - rho=acceptance_result.rho, - step_length=acceptance_result.step_length, - options=radius_options, - ) - - new_trustregion = state.trustregion._replace( - center=acceptance_result.x, radius=new_radius - ) - - state = state._replace(trustregion=new_trustregion) - - # ============================================================================== - # convergence check - # ============================================================================== - - if acceptance_result.accepted and not conv_options.disable: - converged, msg = _is_converged(states=states, options=conv_options) - if converged: - break - - if history.get_n_fun() >= stop_options.max_eval: - converged = False - msg = "Maximum number of criterion evaluations reached." - break - - # ================================================================================== - # results processing - # ================================================================================== - res = { - "solution_x": state.x, - "solution_criterion": state.fval, - "states": states, - "message": msg, - "tranquilo_history": history, - } - - return res - - -class State(NamedTuple): - trustregion: Region - """The trustregion at the beginning of the iteration.""" - - # Information about the model used to make the acceptance decision in the iteration - model_indices: np.ndarray - """The indices of points used to build the current surrogate model `State.model`. - - The points can be retrieved through calling `history.get_xs(model_indices)`. - - """ - - model: ScalarModel - """The current surrogate model. - - The solution to the subproblem with this model as the criterion is stored as - `State.candidate_x`. - - """ - - vector_model: VectorModel - - # candidate information - candidate_index: int - """The index of the candidate point in the history. - - This corresponds to the index of the point in the history that solved the - subproblem. - - """ - - candidate_x: np.ndarray - """The candidate point. - - Is the same as `history.get_xs(candidate_index)`. - - """ - - # accepted parameters and function values at the end of the iteration - index: int - """The index of the accepted point in the history.""" - - x: np.ndarray - """The accepted point. - - Is the same as `history.get_xs(index)`. - - """ - - fval: np.ndarray # this is an estimate for noisy functions - """The function value at the accepted point. - - If `noisy=False` this is the same as `history.get_fval(index)`. Otherwise, this is - an average. - - """ - - # success information - rho: float - """The calculated rho in the current iteration.""" - - accepted: bool - """Whether the candidate point was accepted.""" - - # information on existing and new points - new_indices: np.ndarray - """The indices of new points generated for the model fitting in this iteration.""" - - old_indices_used: np.ndarray - """The indices of existing points used to build the model in this iteration.""" - - old_indices_discarded: np.ndarray - """The indices of existing points not used to build the model in this iteration.""" - - # information on step length - step_length: float = None - """The euclidian distance between `State.x` and `State.trustregion.center`.""" - - relative_step_length: float = None - """The step_length divided by the radius of the trustregion.""" - - -def _is_converged(states, options): - old, new = states[-2:] - - f_change_abs = np.abs(old.fval - new.fval) - f_change_rel = f_change_abs / max(np.abs(old.fval), 1) - x_change_abs = np.linalg.norm(old.x - new.x) - x_change_rel = np.linalg.norm((old.x - new.x) / np.clip(np.abs(old.x), 1, np.inf)) - g_norm_abs = np.linalg.norm(new.model.linear_terms) - g_norm_rel = g_norm_abs / max(g_norm_abs, 1) - - converged = True - if g_norm_rel <= options.gtol_rel: - msg = "Relative gradient norm smaller than tolerance." - elif g_norm_abs <= options.gtol_abs: - msg = "Absolute gradient norm smaller than tolerance." - elif f_change_rel <= options.ftol_rel: - msg = "Relative criterion change smaller than tolerance." - elif f_change_abs <= options.ftol_abs: - msg = "Absolute criterion change smaller than tolerance." - elif x_change_rel <= options.xtol_rel: - msg = "Relative params change smaller than tolerance." - elif x_change_abs <= options.xtol_abs: - msg = "Absolute params change smaller than tolerance." - else: - converged = False - msg = None - - return converged, msg - - -tranquilo = mark_minimizer( - func=partial(_tranquilo, functype="scalar"), - name="tranquilo", - primary_criterion_entry="value", - needs_scaling=True, - is_available=True, - is_global=False, -) - -tranquilo_ls = mark_minimizer( - func=partial(_tranquilo, functype="least_squares"), - primary_criterion_entry="root_contributions", - name="tranquilo_ls", - needs_scaling=True, - is_available=True, - is_global=False, -) - - -def _concatenate_indices(first, second): - first = np.atleast_1d(first).astype(int) - second = np.atleast_1d(second).astype(int) - return np.hstack((first, second)) diff --git a/src/estimagic/optimization/tranquilo/volume.py b/src/estimagic/optimization/tranquilo/volume.py deleted file mode 100644 index 1c092f84a..000000000 --- a/src/estimagic/optimization/tranquilo/volume.py +++ /dev/null @@ -1,81 +0,0 @@ -"""Functions to calculate volumes of hyperspheres and hypercubes. - -Hypercubes can be seen as hyperspheres when the distance from the center is calculated -in an infinity norm rather than a euclidean norm. - -This is why we caracterize hypercubes by their radius (half the side length). - -""" -import numpy as np -from scipy.special import gamma, loggamma - - -def get_radius_after_volume_scaling(radius, dim, scaling_factor): - out = radius * scaling_factor ** (1 / dim) - return out - - -def get_radius_of_sphere_with_volume_of_cube(cube_radius, dim, scaling_factor=None): - log_radius = ( - loggamma(dim / 2 + 1) / dim - - np.log(np.pi) / 2 - + np.log(2) - + np.log(cube_radius) - ) - if scaling_factor is not None: - log_radius += np.log(scaling_factor) / dim - out = np.exp(log_radius) - return out - - -def get_radius_of_cube_with_volume_of_sphere(sphere_radius, dim, scaling_factor=None): - log_radius = ( - np.log(np.pi) / 2 - + np.log(sphere_radius) - - np.log(2) - - loggamma(dim / 2 + 1) / dim - ) - if scaling_factor is not None: - log_radius += np.log(scaling_factor) / dim - out = np.exp(log_radius) - return out - - -def get_volume(radius, dim, shape): - if shape == "sphere": - out = _sphere_volume(radius, dim) - elif shape == "cube": - out = _cube_volume(radius, dim) - else: - raise ValueError(f"shape must be 'shpere' or 'cube', not: {shape}") - return out - - -def get_radius(volume, dim, shape): - if shape == "sphere": - out = _sphere_radius(volume, dim) - elif shape == "cube": - out = _cube_radius(volume, dim) - else: - raise ValueError(f"shape must be 'shpere' or 'cube', not: {shape}") - return out - - -def _sphere_volume(radius, dim): - vol = np.pi ** (dim / 2) * radius**dim / gamma(dim / 2 + 1) - return vol - - -def _cube_volume(radius, dim): - vol = (radius * 2) ** dim - return vol - - -def _sphere_radius(volume, dim): - radius = ((volume * gamma(dim / 2 + 1)) / (np.pi ** (dim / 2))) ** (1 / dim) - return radius - - -def _cube_radius(volume, dim): - radius = 0.5 * volume ** (1 / dim) - return radius diff --git a/src/estimagic/optimization/tranquilo/weighting.py b/src/estimagic/optimization/tranquilo/weighting.py deleted file mode 100644 index 1f655ff83..000000000 --- a/src/estimagic/optimization/tranquilo/weighting.py +++ /dev/null @@ -1,27 +0,0 @@ -from functools import partial - - -def get_sample_weighter(weighter, bounds): - """Get a function that calculates weights for points in a sample. - - The resulting function takes the following arguments: - - xs (np.ndarray): A 2d numpy array containing a sample. - - trustregion (Region): Trustregion. See module region.py. - - Args: - weighter (str) - bounds (Bounds) - - """ - if isinstance(weighter, str): - built_in_weighters = {"no_weights": no_weights} - weighter = built_in_weighters[weighter] - elif not callable(weighter): - raise TypeError("weighter must be a string or callable.") - - out = partial(weighter, bounds=bounds) - return out - - -def no_weights(xs, trustregion, bounds): # noqa: ARG001 - return None diff --git a/src/estimagic/optimization/tranquilo/wrap_criterion.py b/src/estimagic/optimization/tranquilo/wrap_criterion.py deleted file mode 100644 index 848571382..000000000 --- a/src/estimagic/optimization/tranquilo/wrap_criterion.py +++ /dev/null @@ -1,56 +0,0 @@ -import functools - -import numpy as np - -from estimagic.batch_evaluators import process_batch_evaluator - - -def get_wrapped_criterion(criterion, batch_evaluator, n_cores, history): - """Wrap the criterion function to do get parallelization and history handling. - - The wrapped criterion function takes a dict mapping x_indices to required numbers of - evaluations as only argument. It evaluates the criterion function in parallel and - saves the resulting function evaluations in the history. - - The wrapped criterion function does not return anything. - - """ - batch_evaluator = process_batch_evaluator(batch_evaluator) - - @functools.wraps(criterion) - def wrapper_criterion(eval_info): - if not isinstance(eval_info, dict): - raise ValueError("eval_info must be a dict.") - - if len(eval_info) == 0: - return - - x_indices = list(eval_info) - repetitions = list(eval_info.values()) - - xs = history.get_xs(x_indices) - xs = np.repeat(xs, repetitions, axis=0) - - arguments = list(xs) - - effective_n_cores = min(n_cores, len(arguments)) - - raw_evals = batch_evaluator( - criterion, - arguments=arguments, - n_cores=effective_n_cores, - ) - - # replace NaNs but keep infinite values. NaNs would be problematic in many - # places, infs are only a problem in model fitting and will be handled there - clipped_evals = [ - np.nan_to_num(critval, nan=np.inf, posinf=np.inf, neginf=-np.inf) - for critval in raw_evals - ] - - history.add_evals( - x_indices=np.repeat(x_indices, repetitions), - evals=clipped_evals, - ) - - return wrapper_criterion diff --git a/src/estimagic/optimization/tranquilo/wrapped_subsolvers.py b/src/estimagic/optimization/tranquilo/wrapped_subsolvers.py deleted file mode 100644 index b025bccd5..000000000 --- a/src/estimagic/optimization/tranquilo/wrapped_subsolvers.py +++ /dev/null @@ -1,94 +0,0 @@ -from functools import partial - -import numpy as np -from scipy.optimize import Bounds, NonlinearConstraint, minimize - -from estimagic.optimization.tiktak import draw_exploration_sample - - -def solve_multistart(model, x_candidate, lower_bounds, upper_bounds): - np.random.seed(12345) - start_values = draw_exploration_sample( - x=x_candidate, - lower=lower_bounds, - upper=upper_bounds, - n_samples=100, - sampling_distribution="uniform", - sampling_method="sobol", - seed=1234, - ) - - def crit(x): - return model.predict(x) - - bounds = Bounds(lower_bounds, upper_bounds) - - best_crit = np.inf - accepted_x = None - critvals = [] - for x in start_values: - res = minimize( - crit, - x, - method="L-BFGS-B", - bounds=bounds, - ) - if res.fun <= best_crit: - accepted_x = res.x - critvals.append(res.fun) - - return { - "x": accepted_x, - "std": np.std(critvals), - "n_iterations": None, - "success": None, - } - - -def slsqp_sphere(model, x_candidate): - crit, grad = get_crit_and_grad(model) - constraints = get_constraints() - - res = minimize( - crit, - x_candidate, - method="slsqp", - jac=grad, - constraints=constraints, - ) - - return { - "x": res.x, - "success": res.success, - "n_iterations": res.nit, - } - - -def get_crit_and_grad(model): - def _crit(x, c, g, h): - return c + x @ g + 0.5 * x @ h @ x - - def _grad(x, g, h): - return g + x @ h - - crit = partial(_crit, c=model.intercept, g=model.linear_terms, h=model.square_terms) - grad = partial(_grad, g=model.linear_terms, h=model.square_terms) - - return crit, grad - - -def get_constraints(): - def _constr_fun(x): - return x @ x - - def _constr_jac(x): - return 2 * x - - constr = NonlinearConstraint( - fun=_constr_fun, - lb=-np.inf, - ub=1, - jac=_constr_jac, - ) - - return (constr,) diff --git a/src/estimagic/visualization/visualize_tranquilo.py b/src/estimagic/visualization/visualize_tranquilo.py deleted file mode 100644 index 9b1a8f1c7..000000000 --- a/src/estimagic/visualization/visualize_tranquilo.py +++ /dev/null @@ -1,590 +0,0 @@ -from copy import deepcopy - -import numpy as np -import pandas as pd -import plotly.express as px -from numba import njit -from plotly import figure_factory as ff -from plotly import graph_objects as go -from plotly.subplots import make_subplots - -from estimagic.optimization.optimize_result import OptimizeResult -from estimagic.optimization.tranquilo.clustering import cluster -from estimagic.optimization.tranquilo.geometry import log_d_quality_calculator -from estimagic.optimization.tranquilo.volume import get_radius_after_volume_scaling - - -def visualize_tranquilo(results, iterations): - """Plot diagnostic information of optimization result in given iteration(s). - - Generates plots with sample points (trustregion or heatmap), criterion evaluations, - trustregion radii and other diagnostic information to compare different algorithms - at an iteration or different iterations for a given algorithm. - - Currently works for the following algorithms: `tranquilo`, `tranquilo_ls`, - `nag_pybobyqa` and `nag_dfols`. - - Args: - results (dict or OptimizeResult): An estimagic optimization result or a - dictionary with different estimagic optimization results. - iterations (int, list, tuple or dict): The iterations to compare the results - at. Can be an integer if we want to compare different results at the same - iteration, a list or tuple if we want to compare different iterations of - the same optimization result, or dictionary with the same keys as results - and with integer values if we want to compare different iterations of - different results. - Returns: - fig (plotly.Figure): Plotly figure that combines the following plots: - - sample points: plot with model points at current iteration and the - trust region, if number of parameters is not larger than 2, or - a heatmap of (absolute) correlations of sample points for higher - dimensional parameter spaces. - - distance plot: L2 and infinity norm-distances of model points from - the trustregion center. - - criterion plot: function evaluations with sample points and current - accepted point highlighted. - - rho plots: the ratio of expected and actual improvement in function - values at each iteration. - - radius plots: trustregion radii at each iteration. - - cluster plots: number of clusters relative to number of sample points - at each iteration. - - fekete criterion plots: the value of the fekete criterion at each - iteration. - - """ - results = deepcopy(results) - if isinstance(iterations, int): - iterations = {case: iterations for case in results} - results = {case: _process_results(results[case]) for case in results} - elif isinstance(results, OptimizeResult): - results = _process_results(results) - results = {f"iteration {i}": results for i in iterations} - iterations = {f"iteration {iteration}": iteration for iteration in iterations} - - cases = results.keys() - nrows = 8 - ncols = len(cases) - fig = make_subplots( - rows=nrows, - cols=ncols, - subplot_titles=list(cases), - horizontal_spacing=1 / (ncols * 6), - vertical_spacing=(1 / (nrows - 1)) / 4, - shared_yaxes=True, - ) - color_dict = { - "existing": "rgb(0,0,255)", - "new": "rgb(230,0,0)", - "discarded": "rgb(0,0,0)", - } - xl = [] - xu = [] - for i, case in enumerate(cases): - result = results[case] - iteration = iterations[case] - state = result.algorithm_output["states"][iteration] - params_history = np.array(result.history["params"]) - criterion_history = np.array(result.history["criterion"]) - fig = _plot_sample_points( - params_history, state, color_dict, fig, row=1, col=i + 1 - ) - fig = _plot_distances_from_center( - params_history, state, fig, rows=[2, 3], col=i + 1 - ) - xl.append(fig.get_subplot(row=2, col=i + 1).xaxis.range[0]) - xu.append(fig.get_subplot(row=2, col=i + 1).xaxis.range[1]) - fig = _plot_criterion( - criterion_history, state, color_dict, fig, row=4, col=i + 1 - ) - fig = _plot_rhos(result, fig, iteration=iteration, row=5, col=i + 1) - fig = _plot_radii(result, fig, iteration=iteration, row=6, col=i + 1) - fig = _plot_clusters_points_ratio(result, iteration, fig, row=7, col=i + 1) - fig = _plot_fekete_criterion(result, fig, iteration=iteration, row=8, col=i + 1) - fig.layout.annotations[i].update(y=1.015) - for r in [2, 3]: - for c in range(1, ncols + 1): - fig.update_xaxes(range=[min(xl) - 0.25, max(xu) + 0.25], row=r, col=c) - fig = _clean_legend_duplicates(fig) - fig.update_layout(height=400 * nrows, width=460 * ncols, template="plotly_white") - fig.update_yaxes( - showgrid=False, showline=True, linewidth=1, linecolor="black", zeroline=False - ) - fig.update_xaxes( - showgrid=False, showline=True, linewidth=1, linecolor="black", zeroline=False - ) - fig.update_layout(hovermode="x unified") - - return fig - - -def _plot_criterion(history, state, color_dict, fig, row, col): - fig.add_trace( - go.Scatter( - y=history, - x=np.arange(len(history)), - showlegend=False, - line_color="#C0C0C0", - name="Criterion", - mode="lines", - ), - row=row, - col=col, - ) - - fig.add_trace( - go.Scatter( - y=history[state.old_indices_used], - x=state.old_indices_used, - mode="markers", - marker_size=10, - name="existing ", - showlegend=False, - marker_color=color_dict["existing"], - opacity=0.6, - ), - col=col, - row=row, - ) - fig.add_trace( - go.Scatter( - y=history[state.new_indices], - x=state.new_indices, - mode="markers", - marker_size=10, - name="new ", - showlegend=False, - marker_color=color_dict["new"], - opacity=0.6, - ), - col=col, - row=row, - ) - fig.add_trace( - go.Scatter( - y=history[ - getattr(state, "old_indices_discarded", np.array([], dtype="int")) - ], - x=getattr(state, "old_indices_discarded", np.array([], dtype="int")), - mode="markers", - marker_size=10, - name="discarded ", - showlegend=False, - marker_color=color_dict["discarded"], - opacity=0.6, - ), - col=col, - row=row, - ) - fig.add_trace( - go.Scatter( - y=[history[state.index]], - x=[state.index], - mode="markers", - marker_size=12, - name="current index", - showlegend=False, - marker_color="red", - marker_symbol="star", - marker_line_color="black", - marker_line_width=1, - opacity=0.6, - ), - col=col, - row=row, - ) - fig.update_xaxes(title_text="Function evaluations", row=row, col=col) - if col == 1: - fig.update_yaxes(title_text="Criterion value", row=row, col=col) - return fig - - -def _plot_sample_points(history, state, color_dict, fig, row, col): - sample_points = _get_sample_points(state, history) - if state.x.shape[0] <= 2: - trustregion = state.trustregion - radius = trustregion.radius - center = trustregion.center - fig.add_shape( - type="circle", - xref="x", - yref="y", - x0=center[0] - radius, - y0=center[1] - radius, - x1=center[0] + radius, - y1=center[1] + radius, - line_width=0.5, - col=col, - row=row, - line_color="grey", - ) - - fig.add_traces( - px.scatter( - sample_points, - x=0, - y=1, - color="case", - color_discrete_map=color_dict, - opacity=0.7, - ).data, - cols=col, - rows=row, - ) - fig.update_traces( - marker_size=10, - marker_line_color="black", - marker_line_width=2, - col=col, - row=row, - ) - fig.update_yaxes(scaleanchor="x", scaleratio=1, col=col, row=row) - fig.update_xaxes(scaleanchor="y", scaleratio=1, col=col, row=row) - else: - params = [col for col in sample_points.columns if col != "case"] - corr = sample_points[params].corr().abs() - mask = np.zeros_like(corr, dtype=bool) - mask[np.tril_indices_from(mask, k=-1)] = True - corr = corr.where(mask) - fig.add_trace( - go.Heatmap( - z=corr, - x=corr.columns.values, - y=corr.index.values, - showscale=False, - colorscale="Magenta", - zmin=0, - zmax=1, - text=corr.to_numpy().round(2).tolist(), # xxxx, - texttemplate="%{text}", - ), - row=row, - col=col, - ) - fig.update_layout(yaxis_autorange="reversed") - fig.update_xaxes(tickmode="array", tickvals=corr.index.values, col=col, row=row) - fig.update_yaxes( - tickmode="array", tickvals=corr.columns.values, col=col, row=row - ) - return fig - - -def _plot_radii(res, fig, row, col, iteration): - radii = [state.trustregion.radius for state in res.algorithm_output["states"]] - traces = plot_line_with_lighlighted_point( - x=np.arange(len(radii)), y=radii, highlighted_point=iteration, name="Radius" - ) - fig.add_traces( - traces, - rows=row, - cols=col, - ) - fig.update_xaxes(title_text="Iteration", row=row, col=col) - if col == 1: - fig.update_yaxes(title_text="Radius", row=row, col=col) - return fig - - -def _plot_rhos(res, fig, row, col, iteration): - rhos = np.array([state.rho for state in res.algorithm_output["states"]]) - rhos[~pd.isna(rhos)] = np.clip(rhos[~pd.isna(rhos)], -1, 3) - traces = plot_line_with_lighlighted_point( - x=np.arange(len(rhos)), y=rhos, highlighted_point=iteration, name="Rho" - ) - fig.add_traces( - traces, - rows=row, - cols=col, - ) - fig.update_xaxes(title_text="Iteration", row=row, col=col) - if col == 1: - fig.update_yaxes(title_text="Rho", row=row, col=col) - return fig - - -def _plot_fekete_criterion(res, fig, row, col, iteration): - fekete = _get_fekete_criterion(res) - traces = plot_line_with_lighlighted_point( - x=np.arange(len(fekete)), y=fekete, highlighted_point=iteration, name="Fekete" - ) - fig.add_traces( - traces, - rows=row, - cols=col, - ) - fig.update_xaxes(title_text="Iteration", row=row, col=col) - if col == 1: - fig.update_yaxes(title_text="Fekete criterion", row=row, col=col) - return fig - - -def _plot_clusters_points_ratio(res, iteration, fig, row, col): - dim = res.params.shape[0] - history = np.array(res.history["params"]) - states = res.algorithm_output["states"] - colors = [ - "rgb(251,106,74)", - "rgb(203,24,29)", - "rgb(103,0,13)", - ] - for i, f in enumerate([1, 2, 10]): - ratios = [np.nan] - for state in states[1:]: - n_points = state.model_indices.shape[0] - points = history[state.model_indices] - scaling = 1 / (f * n_points) - radius = get_radius_after_volume_scaling( - state.trustregion.radius, dim, scaling - ) - _, centers = cluster(points, radius) - n_clusters = centers.shape[0] - ratios.append(n_clusters / n_points) - fig.add_trace( - go.Scatter( - y=ratios, - x=np.arange(len(ratios)), - mode="lines", - opacity=0.5, - line_color=colors[i], - line_width=1.5, - name=f"s={f}*n", - ), - col=col, - row=row, - ) - fig.add_trace( - go.Scatter( - y=[ratios[iteration]], - x=[iteration], - mode="markers", - marker_color=colors[i], - opacity=1, - marker_size=10, - name=f"s={f}*n", - showlegend=False, - ), - col=col, - row=row, - ) - fig.update_xaxes(title_text="Iteration", row=row, col=col) - if col == 1: - fig.update_yaxes(title_text="Cluster ratio", row=row, col=col) - return fig - - -def _plot_distances_from_center(history, state, fig, col, rows): - dist_sq = ( - np.linalg.norm( - history[state.model_indices] - state.trustregion.center, - axis=1, - ) - / state.trustregion.radius - ) - - dist_inf = ( - np.linalg.norm( - history[state.model_indices] - state.trustregion.center, - axis=1, - ord=np.inf, - ) - / state.trustregion.radius - ) - - for r, inputs in enumerate([dist_sq, dist_inf]): - data = ff.create_distplot( - [inputs], - show_curve=False, - show_rug=True, - group_labels=[""], - show_hist=False, - ).data - - data[0].update( - { - "yaxis": "y", - "y": [0] * len(inputs), - "showlegend": False, - "marker_size": 20, - } - ) - fig.add_traces(data, cols=col, rows=rows[r]) - - min_dist = min(dist_inf.min(), dist_sq.min()) - max_dist = max(dist_inf.max(), dist_sq.max()) - - fig.update_xaxes( - title_text="L2 norm", range=[min_dist, max_dist], row=rows[0], col=col - ) - fig.update_xaxes( - title_text="Inf norm", range=[min_dist, max_dist], row=rows[1], col=col - ) - return fig - - -def _get_fekete_criterion(res): - states = res.algorithm_output["states"][1:] - history = np.array(res.history["params"]) - - out = [np.nan] + [ - log_d_quality_calculator( - sample=history[state.model_indices], - trustregion=state.trustregion, - ) - for state in states - ] - return out - - -def _get_sample_points(state, history): - current_points = history[state.model_indices] - discarded_points = history[ - getattr(state, "old_indices_discarded", np.array([], dtype="int")) - ] - df = pd.DataFrame( - data=np.vstack([current_points, discarded_points]), - index=np.hstack( - [ - state.model_indices, - getattr(state, "old_indices_discarded", np.array([], dtype="int")), - ] - ), - ) - df["case"] = np.nan - df.loc[state.new_indices, "case"] = "new" - df.loc[state.old_indices_used, "case"] = "existing" - df.loc[ - getattr(state, "old_indices_discarded", np.array([], dtype="int")), "case" - ] = "discarded" - return df - - -def plot_line_with_lighlighted_point(x, y, name, highlighted_point): - """Plot line x,y, add markers to the line to highlight data points. - args: - x(np.ndarray or list): 1d array or list of data for x axis - y(np.ndarray or list): 1d array or list of data for y axis - highlight_points(np.ndarray or list): 1d array of indices of the to highlight. - in case of - - criterion: x is the array with numbers of function evaluations - y is the array with function values - highlight points is a nested list with lists of - - existing points - - new points - - discarded points - - other plots: x is the array with iteration numbers - y is the array with corresponding objective values. - highlight points is the index of the current iteration. - - returns: - go.Figure - - """ - highlight_color = "#035096" - highlight_size = 10 - line_color = "#C0C0C0" - data = [ - go.Scatter( - y=y, x=x, mode="lines", line_color=line_color, name=name, showlegend=False - ), - go.Scatter( - x=[highlighted_point], - y=[y[highlighted_point]], - mode="markers", - marker_color=highlight_color, - marker_size=highlight_size, - name="current val", - showlegend=False, - ), - ] - - return data - - -def _clean_legend_duplicates(fig): - trace_names = set() - - def disable_legend_if_duplicate(trace): - if trace.name in trace_names: - # in this case the legend is a duplicate - trace.update(showlegend=False) - else: - trace_names.add(trace.name) - - fig.for_each_trace(disable_legend_if_duplicate) - return fig - - -def _process_results(result): - """Add model indices to states of optimization result.""" - result = deepcopy(result) - xs = np.array(result.history["params"]) - if result.algorithm in ["nag_pybobyqa", "nag_dfols"]: - for i in range(1, len(result.algorithm_output["states"])): - state = result.algorithm_output["states"][i] - result.algorithm_output["states"][i] = state._replace( - model_indices=_get_model_indices(xs, state), - new_indices=_get_model_indices(xs, state), - index=_find_index( - xs, - state.x, - )[0], - ) - elif result.algorithm in ["tranquilo", "tranquilo_ls"]: - pass - else: - NotImplementedError( - f"Diagnostic plots are not implemented for {result.algorithm}" - ) - return result - - -@njit -def _find_indices_in_trust_region(xs, center, radius): - """Get the row indices of all parameter vectors in a trust region. - - This is for square trust regions, i.e. balls in term of an infinity norm. - - Args: - xs (np.ndarray): 2d numpy array where each row is a parameter vector. - center (np.ndarray): 1d numpy array that marks the center of the trust region. - radius (float): Radius of the trust region. - - Returns: - np.ndarray: The indices of parameters in the trust region. - - """ - n_obs, dim = xs.shape - out = np.zeros(n_obs).astype(np.int64) - success_counter = 0 - upper = center + radius - lower = center - radius - for i in range(n_obs): - success = True - for j in range(dim): - value = xs[i, j] - if not (lower[j] <= value <= upper[j]) or np.isnan(value): - success = False - continue - if success: - out[success_counter] = i - success_counter += 1 - - return out[:success_counter] - - -def _find_index(xs, point): - radius = 1e-100 - out = np.array([]) - while len(out) == 0: - out = _find_indices_in_trust_region(xs=xs, center=point, radius=radius) - radius = np.sqrt(radius) - if len(out) == 1: - return out - else: - return out[0] - - -def _get_model_indices(xs, state): - model_indices = np.array([]) - for point in state.model_points: - model_indices = np.concatenate([model_indices, _find_index(xs, point)]) - return model_indices.astype(int) diff --git a/tests/optimization/subsolvers/test_bntr_fast.py b/tests/optimization/subsolvers/test_bntr_fast.py deleted file mode 100644 index cf24d6216..000000000 --- a/tests/optimization/subsolvers/test_bntr_fast.py +++ /dev/null @@ -1,551 +0,0 @@ -import numpy as np -import pandas as pd -import pytest -from estimagic.config import TEST_FIXTURES_DIR -from estimagic.optimization.subsolvers.bntr import ( - ActiveBounds, - _update_trustregion_radius_and_gradient_descent, - bntr, -) -from estimagic.optimization.subsolvers.bntr import ( - _apply_bounds_to_conjugate_gradient_step as bounds_cg_orig, -) -from estimagic.optimization.subsolvers.bntr import ( - _apply_bounds_to_x_candidate as apply_bounds_orig, -) -from estimagic.optimization.subsolvers.bntr import ( - _compute_conjugate_gradient_step as cg_step_orig, -) -from estimagic.optimization.subsolvers.bntr import ( - _compute_predicted_reduction_from_conjugate_gradient_step as reduction_cg_step_orig, -) -from estimagic.optimization.subsolvers.bntr import ( - _evaluate_model_criterion as eval_criterion_orig, -) -from estimagic.optimization.subsolvers.bntr import ( - _find_hessian_submatrix_where_bounds_inactive as find_hessian_inact_orig, -) -from estimagic.optimization.subsolvers.bntr import ( - _get_fischer_burmeister_direction_vector as fb_vector_orig, -) -from estimagic.optimization.subsolvers.bntr import ( - _get_information_on_active_bounds as get_info_bounds_orig, -) -from estimagic.optimization.subsolvers.bntr import ( - _perform_gradient_descent_step as gradient_descent_orig, -) -from estimagic.optimization.subsolvers.bntr import ( - _project_gradient_onto_feasible_set as grad_feas_orig, -) -from estimagic.optimization.subsolvers.bntr import ( - _take_preliminary_gradient_descent_step_and_check_for_solution as pgd_orig, -) -from estimagic.optimization.subsolvers.bntr import ( - _update_trustregion_radius_conjugate_gradient as update_radius_cg_orig, -) -from estimagic.optimization.subsolvers.bntr_fast import ( - _apply_bounds_to_conjugate_gradient_step as bounds_cg_fast, -) -from estimagic.optimization.subsolvers.bntr_fast import ( - _apply_bounds_to_x_candidate as apply_bounds_fast, -) -from estimagic.optimization.subsolvers.bntr_fast import ( - _bntr_fast_jitted, -) -from estimagic.optimization.subsolvers.bntr_fast import ( - _compute_conjugate_gradient_step as cg_step_fast, -) -from estimagic.optimization.subsolvers.bntr_fast import ( - _compute_predicted_reduction_from_conjugate_gradient_step as reduction_cg_step_fast, -) -from estimagic.optimization.subsolvers.bntr_fast import ( - _evaluate_model_criterion as eval_criterion_fast, -) -from estimagic.optimization.subsolvers.bntr_fast import ( - _find_hessian_submatrix_where_bounds_inactive as find_hessian_inact_fast, -) -from estimagic.optimization.subsolvers.bntr_fast import ( - _get_fischer_burmeister_direction_vector as fb_vector_fast, -) -from estimagic.optimization.subsolvers.bntr_fast import ( - _get_information_on_active_bounds as get_info_bounds_fast, -) -from estimagic.optimization.subsolvers.bntr_fast import ( - _perform_gradient_descent_step as gradient_descent_fast, -) -from estimagic.optimization.subsolvers.bntr_fast import ( - _project_gradient_onto_feasible_set as grad_feas_fast, -) -from estimagic.optimization.subsolvers.bntr_fast import ( - _take_preliminary_gradient_descent_step_and_check_for_solution as pgd_fast, -) -from estimagic.optimization.subsolvers.bntr_fast import ( - _update_trustregion_radius_and_gradient_descent as _update_trr_and_gd_fast, -) -from estimagic.optimization.subsolvers.bntr_fast import ( - _update_trustregion_radius_conjugate_gradient as update_radius_cg_fast, -) -from estimagic.optimization.tranquilo.models import ScalarModel -from numpy.testing import assert_array_almost_equal as aaae -from numpy.testing import assert_array_equal as aae - - -def test_eval_criterion(): - x_candidate = np.zeros(5) - linear_terms = np.arange(5).astype(float) - square_terms = np.arange(25).reshape(5, 5).astype(float) - assert eval_criterion_orig( - x_candidate, linear_terms, square_terms - ) == eval_criterion_fast(x_candidate, linear_terms, square_terms) - - -def test_get_info_on_active_bounds(): - x_candidate = np.array([-1.5, -1.5, 0, 1.5, 1.5]) - indices = np.arange(len(x_candidate)) - linear_terms = np.array([1, 1, 0, -1, -1]) - lower_bounds = -np.ones(5) - upper_bounds = np.ones(5) - info_orig = get_info_bounds_orig( - x_candidate, linear_terms, lower_bounds, upper_bounds - ) - ( - active_lower, - active_upper, - active_fixed, - inactive, - ) = get_info_bounds_fast(x_candidate, linear_terms, lower_bounds, upper_bounds) - aae(info_orig.lower, indices[active_lower]) - aae(info_orig.upper, indices[active_upper]) - aae(info_orig.fixed, indices[active_fixed]) - aae(info_orig.active, indices[~inactive]) - aae(info_orig.inactive, indices[inactive]) - - -def test_project_gradient_on_feasible_set(): - grad = np.arange(5).astype(float) - bounds_info = ActiveBounds( - inactive=np.array([0, 1, 2]), - ) - inactive = np.array([True, True, True, False, False]) - aae(grad_feas_orig(grad, bounds_info), grad_feas_fast(grad, inactive)) - - -def test_find_hessian_inactive_bounds(): - hessian = np.arange(25).reshape(5, 5).astype(float) - inactive = np.array([False, False, True, True, True]) - model = ScalarModel(square_terms=hessian, intercept=0, linear_terms=np.zeros(5)) - - bounds_info = ActiveBounds( - inactive=np.arange(5)[inactive], - ) - - aae( - find_hessian_inact_orig(model, bounds_info), - find_hessian_inact_fast(hessian, inactive), - ) - - -def test_fischer_burmeister_direction_vector(): - x = np.array([-1.5, -1.5, 0, 1.5, 1.5]) - grad = np.ones(5) - lb = -np.ones(5) - ub = np.ones(5) - aae(fb_vector_orig(x, grad, lb, ub), fb_vector_fast(x, grad, lb, ub)) - - -def test_apply_bounds_candidate_x(): - x = np.array([-1.5, -1.5, 0, 1.5, 1.5]) - lb = -np.ones(5) - ub = np.ones(5) - aae(apply_bounds_orig(x, lb, ub), apply_bounds_fast(x, lb, ub)) - - -@pytest.mark.slow() -def test_take_preliminary_gradient_descent_and_check_for_convergence(): - model_gradient = np.array( - [ - -5.71290e02, - -3.11506e03, - -8.18100e02, - 2.47760e02, - -1.26540e02, - ] - ) - model_hessian = np.array( - [ - [-619.23, -1229.2, 321.9, 106.98, -45.45], - [-1229.2, -668.95, -250.05, 165.77, -47.47], - [321.9, -250.05, -1456.88, -144.75, 900.99], - [106.98, 165.77, -144.75, 686.35, -3.51], - [-45.45, -47.47, 900.99, -3.51, -782.91], - ] - ) - model = ScalarModel( - linear_terms=model_gradient, square_terms=model_hessian, intercept=0 - ) - x_candidate = np.zeros(5) - lower_bounds = -np.ones(len(x_candidate)) - upper_bounds = np.ones(len(x_candidate)) - kwargs = { - "x_candidate": x_candidate, - "model": model, - "lower_bounds": lower_bounds, - "upper_bounds": upper_bounds, - "maxiter_gradient_descent": 5, - "gtol_abs": 1e-08, - "gtol_rel": 1e-08, - "gtol_scaled": 0, - } - kwargs_fast = { - "model_gradient": model_gradient, - "model_hessian": model_hessian, - "lower_bounds": lower_bounds, - "upper_bounds": upper_bounds, - "x_candidate": x_candidate, - "maxiter_gradient_descent": 5, - "gtol_abs": 1e-08, - "gtol_rel": 1e-08, - "gtol_scaled": 0, - } - res_fast = pgd_fast(**kwargs_fast) - res_orig = pgd_orig(**kwargs) - for i in range(5): - aae(np.array(res_fast[i]), np.array(res_orig[i])) - bounds_info_orig = res_orig[5] - indices = np.arange(5) - for i, bounds in enumerate(["lower", "upper", "fixed", "inactive"]): - aae( - np.array(getattr(bounds_info_orig, bounds)), - indices[res_fast[5 + i]], - ) - assert res_orig[6] == res_fast[10] - - -@pytest.mark.slow() -def test_apply_bounds_to_conjugate_gradient_step(): - step_inactive = np.ones(7) - x_candidate = np.zeros(10) - lower_bounds = -np.ones(10) - upper_bounds = np.array([1] * 7 + [-0.01] * 3) - indices = np.arange(len(x_candidate)) - inactive_bounds = np.array([True] * 7 + [False] * 3) - active_lower_bounds = np.array([False] * 10) - active_upper_bounds = np.array([False] * 7 + [True] * 3) - active_fixed_bounds = np.array([False] * 10) - bounds_info = ActiveBounds( - lower=indices[active_lower_bounds], - upper=indices[active_upper_bounds], - fixed=indices[active_fixed_bounds], - inactive=indices[inactive_bounds], - ) - res_fast = bounds_cg_fast( - step_inactive, - x_candidate, - lower_bounds, - upper_bounds, - inactive_bounds, - active_lower_bounds, - active_upper_bounds, - active_fixed_bounds, - ) - res_orig = bounds_cg_orig( - step_inactive, x_candidate, lower_bounds, upper_bounds, bounds_info - ) - aae(res_orig, res_fast) - pass - - -@pytest.mark.slow() -def test_compute_conjugate_gradient_setp(): - x_candidate = np.array([0] * 8 + [1.5] * 2) - gradient_inactive = np.arange(6).astype(float) - hessian_inactive = np.arange(36).reshape(6, 6).astype(float) - lower_bounds = np.array([-1] * 6 + [0.5] * 2 + [-1] * 2) - upper_bounds = np.ones(10) - indices = np.arange(len(x_candidate)) - inactive = np.array([True] * 6 + [False] * 4) - active_lower = np.array([False] * 5 + [True, True] + [False] * 3) - active_upper = np.array([False] * 8 + [True] * 2) - active_fixed = np.array([False] * 10) - bounds_info = ActiveBounds( - inactive=indices[inactive], - lower=indices[active_lower], - upper=indices[active_upper], - fixed=indices[active_fixed], - ) - tr_radius = 10.0 - cg_method = "trsbox" - gtol_abs = 1e-8 - gtol_rel = 1e-8 - default_radius = 100.00 - min_radius = 1e-10 - max_radius = 1e10 - - res_fast = cg_step_fast( - x_candidate, - gradient_inactive, - hessian_inactive, - lower_bounds, - upper_bounds, - inactive, - active_lower, - active_upper, - active_fixed, - tr_radius, - cg_method, - gtol_abs, - gtol_rel, - default_radius, - min_radius, - max_radius, - ) - res_orig = cg_step_orig( - x_candidate=x_candidate, - gradient_inactive=gradient_inactive, - hessian_inactive=hessian_inactive, - lower_bounds=lower_bounds, - upper_bounds=upper_bounds, - active_bounds_info=bounds_info, - trustregion_radius=tr_radius, - conjugate_gradient_method=cg_method, - gtol_abs_conjugate_gradient=gtol_abs, - gtol_rel_conjugate_gradient=gtol_rel, - options_update_radius={ - "default_radius": default_radius, - "min_radius": min_radius, - "max_radius": max_radius, - }, - ) - aae(res_orig[0], res_fast[0]) - aae(res_orig[1], res_fast[1]) - aaae(res_orig[2], res_fast[2]) - - -@pytest.mark.slow() -def test_compute_predicet_reduction_from_conjugate_gradient_step(): - cg_step = np.arange(10).astype(float) / 10 - cg_step_inactive = np.array([1, 2, 3]).astype(float) - grad = np.arange(10).astype(float) - grad_inactive = np.arange(3).astype(float) - hessian_inactive = np.arange(9).reshape(3, 3).astype(float) - indices = np.arange(10) - inactive_bounds = np.array([False] + [True] * 3 + [False] * 6) - res_fast = reduction_cg_step_fast( - cg_step, - cg_step_inactive, - grad, - grad_inactive, - hessian_inactive, - inactive_bounds, - ) - bounds_info = ActiveBounds( - inactive=indices[inactive_bounds], active=indices[~inactive_bounds] - ) - res_orig = reduction_cg_step_orig( - cg_step, cg_step_inactive, grad, grad_inactive, hessian_inactive, bounds_info - ) - aae(res_orig, res_fast) - - -@pytest.mark.slow() -def test_update_trustregion_radius_conjugate_gradient(): - f_candidate = -1234.56 - predicted_reduction = 200 - actual_reduction = 150 - x_norm_cg = 3.16 - tr_radius = 5 - options_update_radius = { - "eta1": 1.0e-4, - "eta2": 0.25, - "eta3": 0.50, - "eta4": 0.90, - "alpha1": 0.25, - "alpha2": 0.50, - "alpha3": 1.00, - "alpha4": 2.00, - "alpha5": 4.00, - "min_radius": 1e-10, - "max_radius": 1e10, - } - res_fast = update_radius_cg_fast( - f_candidate=f_candidate, - predicted_reduction=predicted_reduction, - actual_reduction=actual_reduction, - x_norm_cg=x_norm_cg, - trustregion_radius=tr_radius, - **options_update_radius, - ) - res_orig = update_radius_cg_orig( - f_candidate=f_candidate, - predicted_reduction=predicted_reduction, - actual_reduction=actual_reduction, - x_norm_cg=x_norm_cg, - trustregion_radius=tr_radius, - options=options_update_radius, - ) - assert res_orig[0] == res_fast[0] - assert res_orig[1] == res_fast[1] - - -@pytest.mark.slow() -def test_perform_gradient_descent_step(): - x_candidate = np.zeros(10) - f_candidate_initial = 1234.56 - gradient_projected = np.arange(10).astype(float) - hessian_inactive = np.arange(64).reshape(8, 8).astype(float) - model_gradient = gradient_projected / 2 - model_hessian = np.arange(100).reshape(10, 10).astype(float) - lower_bounds = -np.ones(10) - upper_bounds = np.array([1] * 8 + [-0.01] * 2) - indices = np.arange(10) - inactive_bounds = np.array([True] * 8 + [False] * 2) - - maxiter = 3 - options_update_radius = { - "mu1": 0.35, - "mu2": 0.50, - "gamma1": 0.0625, - "gamma2": 0.5, - "gamma3": 2.0, - "gamma4": 5.0, - "theta": 0.25, - "default_radius": 100, - } - model = ScalarModel( - linear_terms=model_gradient, square_terms=model_hessian, intercept=0 - ) - bounds_info = ActiveBounds(inactive=indices[inactive_bounds]) - res_fast = gradient_descent_fast( - x_candidate=x_candidate, - f_candidate_initial=f_candidate_initial, - gradient_projected=gradient_projected, - hessian_inactive=hessian_inactive, - model_gradient=model_gradient, - model_hessian=model_hessian, - lower_bounds=lower_bounds, - upper_bounds=upper_bounds, - inactive_bounds=inactive_bounds, - maxiter_steepest_descent=maxiter, - **options_update_radius, - ) - res_orig = gradient_descent_orig( - x_candidate=x_candidate, - f_candidate_initial=f_candidate_initial, - gradient_projected=gradient_projected, - hessian_inactive=hessian_inactive, - model=model, - lower_bounds=lower_bounds, - upper_bounds=upper_bounds, - active_bounds_info=bounds_info, - maxiter_steepest_descent=maxiter, - options_update_radius=options_update_radius, - ) - aae(res_orig[0], res_fast[0]) - for i in range(1, len(res_orig)): - assert res_orig[i] == res_fast[i] - - -@pytest.mark.slow() -def test_update_trustregion_radius_and_gradient_descent(): - options_update_radius = { - "mu1": 0.35, - "mu2": 0.50, - "gamma1": 0.0625, - "gamma2": 0.5, - "gamma3": 2.0, - "gamma4": 5.0, - "theta": 0.25, - "min_radius": 1e-10, - "max_radius": 1e10, - "default_radius": 100, - } - - trustregion_radius = 100.00 - radius_lower_bound = 90.00 - predicted_reduction = 0.9 - actual_reduction = 1.1 - gradient_norm = 10.0 - res_orig = _update_trustregion_radius_and_gradient_descent( - trustregion_radius, - radius_lower_bound, - predicted_reduction, - actual_reduction, - gradient_norm, - options_update_radius, - ) - options_update_radius.pop("min_radius") - options_update_radius.pop("max_radius") - options_update_radius.pop("default_radius") - res_fast = _update_trr_and_gd_fast( - trustregion_radius, - radius_lower_bound, - predicted_reduction, - actual_reduction, - gradient_norm, - **options_update_radius, - ) - assert res_orig[0] == res_fast[0] - assert res_fast[1] == res_orig[1] - - -@pytest.mark.slow() -def test_minimize_bntr(): - model_data = pd.read_pickle(TEST_FIXTURES_DIR / "scalar_model.pkl") - model = ScalarModel(**model_data) - lower_bounds = -np.ones(len(model.linear_terms)) - upper_bounds = np.ones(len(model.linear_terms)) - options = { - "maxiter": 20, - "maxiter_gradient_descent": 5, - "conjugate_gradient_method": "cg", - "gtol_abs": 1e-08, - "gtol_rel": 1e-08, - "gtol_scaled": 0.0, - "gtol_abs_conjugate_gradient": 1e-08, - "gtol_rel_conjugate_gradient": 1e-06, - } - x0 = np.zeros_like(lower_bounds) - res_orig = bntr(model, lower_bounds, upper_bounds, x_candidate=x0, **options) - res_fast = _bntr_fast_jitted( - model_gradient=model.linear_terms, - model_hessian=model.square_terms, - lower_bounds=lower_bounds, - upper_bounds=upper_bounds, - x_candidate=x0, - **options, - ) - # using aaae to get tests run on windows machines. - aaae(res_orig["x"], res_fast[0]) - aaae(res_orig["criterion"], res_fast[1]) - assert res_orig["success"] == res_fast[3] - - -@pytest.mark.slow() -def test_minimize_bntr_break_loop_early(): - model_data = pd.read_pickle(TEST_FIXTURES_DIR / "scalar_model.pkl") - model = ScalarModel(**model_data) - lower_bounds = -np.ones(len(model.linear_terms)) - upper_bounds = np.ones(len(model.linear_terms)) - options = { - "maxiter": 20, - "maxiter_gradient_descent": 5, - "conjugate_gradient_method": "cg", - "gtol_abs": 10, - "gtol_rel": 10, - "gtol_scaled": 10, - "gtol_abs_conjugate_gradient": 10, - "gtol_rel_conjugate_gradient": 10, - } - res_fast = _bntr_fast_jitted( - model_gradient=model.linear_terms, - model_hessian=model.square_terms, - lower_bounds=lower_bounds, - upper_bounds=upper_bounds, - x_candidate=np.zeros_like(lower_bounds), - **options, - ) - # using aaae to get tests run on windows machines. - aaae(np.zeros(len(model.linear_terms)), res_fast[0]) - aaae(0, res_fast[1]) - assert res_fast[3] - assert res_fast[2] == 0 diff --git a/tests/optimization/subsolvers/test_gqtpar_fast.py b/tests/optimization/subsolvers/test_gqtpar_fast.py deleted file mode 100644 index 2c8fdb66e..000000000 --- a/tests/optimization/subsolvers/test_gqtpar_fast.py +++ /dev/null @@ -1,98 +0,0 @@ -import numpy as np -from estimagic.optimization.subsolvers.gqtpar import ( - DampingFactors, - HessianInfo, -) -from estimagic.optimization.subsolvers.gqtpar import ( - _compute_smallest_step_len_for_candidate_vector as compute_smallest_step_orig, -) -from estimagic.optimization.subsolvers.gqtpar import ( - _find_new_candidate_and_update_parameters as find_new_and_update_candidate_orig, -) -from estimagic.optimization.subsolvers.gqtpar import ( - _get_initial_guess_for_lambdas as init_lambdas_orig, -) -from estimagic.optimization.subsolvers.gqtpar_fast import ( - _compute_smallest_step_len_for_candidate_vector as compute_smallest_step_fast, -) -from estimagic.optimization.subsolvers.gqtpar_fast import ( - _find_new_candidate_and_update_parameters as find_new_and_update_candidate_fast, -) -from estimagic.optimization.subsolvers.gqtpar_fast import ( - _get_initial_guess_for_lambdas as init_lambdas_fast, -) -from estimagic.optimization.tranquilo.models import ScalarModel -from numpy.testing import assert_array_almost_equal as aaae - - -def test_get_initial_guess_for_lambda(): - rng = np.random.default_rng(12345) - model_gradient = rng.random(10) - model_hessian = rng.random((10, 10)) - model_hessian = model_hessian @ model_hessian.T - model = ScalarModel( - linear_terms=model_gradient, square_terms=model_hessian, intercept=None - ) - res = init_lambdas_fast(model_gradient, model_hessian) - expected = init_lambdas_orig(model) - assert res[0] == expected.candidate - assert res[1] == expected.lower_bound - aaae(res[2], expected.upper_bound) - - -def test_find_new_candidate_and_update_parameters(): - rng = np.random.default_rng(12345) - model_gradient = rng.random(10) - model_hessian = rng.random((10, 10)) - model_hessian = model_hessian @ model_hessian.T - model = ScalarModel( - linear_terms=model_gradient, square_terms=model_hessian, intercept=None - ) - hessian_upper_triangular = np.triu(model_hessian) - candidate = 0.8 - hessian_plus_lambda = model_hessian + candidate * np.eye(10) - lower_bound = 0.3 - upper_bound = 1.3 - criteria = {"k_easy": 0.1, "k_hard": 0.2} - converged = False - already_factorized = False - lambdas = DampingFactors( - candidate=candidate, lower_bound=lower_bound, upper_bound=upper_bound - ) - hessian_info = HessianInfo( - hessian_plus_lambda=hessian_plus_lambda, - upper_triangular=hessian_upper_triangular, - already_factorized=already_factorized, - ) - res = find_new_and_update_candidate_fast( - model_gradient, - model_hessian, - hessian_upper_triangular, - hessian_plus_lambda, - already_factorized, - candidate, - lower_bound, - upper_bound, - criteria, - converged, - ) - expected = find_new_and_update_candidate_orig( - model, hessian_info, lambdas, criteria, converged - ) - aaae(res[0], expected[0]) - aaae(res[1], expected[1].hessian_plus_lambda) - aaae(res[2], expected[1].already_factorized) - aaae(res[3], expected[2].candidate) - aaae(res[4], expected[2].lower_bound) - aaae(res[5], expected[2].upper_bound) - assert res[6] == expected[3] - - -def test_compute_smallest_step_len_for_candidate_vector(): - rng = np.random.default_rng(12345) - x_candidate = rng.random(10) - rng = np.random.default_rng(45667) - z_min = rng.random(10) - res = compute_smallest_step_fast(x_candidate, z_min) - expected = compute_smallest_step_orig(x_candidate, z_min) - aaae(res, expected) diff --git a/tests/optimization/subsolvers/test_gqtpar_lambdas.py b/tests/optimization/subsolvers/test_gqtpar_lambdas.py deleted file mode 100644 index 606af5a98..000000000 --- a/tests/optimization/subsolvers/test_gqtpar_lambdas.py +++ /dev/null @@ -1,20 +0,0 @@ -import estimagic as em -from estimagic.benchmarking.get_benchmark_problems import get_benchmark_problems - - -def test_gqtpar_lambdas(): - algo_options = { - "disable_convergence": True, - "stopping_max_iterations": 30, - "sample_filter": "keep_all", - "sampler": "random_hull", - "subsolver_options": {"k_hard": 0.001, "k_easy": 0.001}, - } - problem_info = get_benchmark_problems("more_wild")["freudenstein_roth_good_start"] - - em.minimize( - criterion=problem_info["inputs"]["criterion"], - params=problem_info["inputs"]["params"], - algo_options=algo_options, - algorithm="tranquilo", - ) diff --git a/tests/optimization/subsolvers/test_minimize_trust_region.py b/tests/optimization/subsolvers/test_minimize_trust_region.py deleted file mode 100644 index b00941fa8..000000000 --- a/tests/optimization/subsolvers/test_minimize_trust_region.py +++ /dev/null @@ -1,484 +0,0 @@ -import numpy as np -import pytest -from estimagic.optimization.subsolvers._conjugate_gradient import ( - _get_distance_to_trustregion_boundary as gdtb, -) -from estimagic.optimization.subsolvers._conjugate_gradient import ( - _update_vectors_for_next_iteration as uvnr, -) -from estimagic.optimization.subsolvers._conjugate_gradient import ( - minimize_trust_cg, -) -from estimagic.optimization.subsolvers._conjugate_gradient_fast import ( - _get_distance_to_trustregion_boundary as gdtb_fast, -) -from estimagic.optimization.subsolvers._conjugate_gradient_fast import ( - _update_vectors_for_next_iteration as uvnr_fast, -) -from estimagic.optimization.subsolvers._conjugate_gradient_fast import ( - minimize_trust_cg_fast, -) -from estimagic.optimization.subsolvers._steihaug_toint import ( - minimize_trust_stcg, -) -from estimagic.optimization.subsolvers._steihaug_toint_fast import ( - minimize_trust_stcg_fast, -) -from estimagic.optimization.subsolvers._trsbox import ( - _apply_bounds_to_candidate_vector, - minimize_trust_trsbox, -) -from estimagic.optimization.subsolvers._trsbox import ( - _calc_greatest_criterion_reduction as greatest_reduction_orig, -) -from estimagic.optimization.subsolvers._trsbox import ( - _calc_new_reduction as new_reduction_orig, -) -from estimagic.optimization.subsolvers._trsbox import ( - _calc_upper_bound_on_tangent as upper_bound_tangent_orig, -) -from estimagic.optimization.subsolvers._trsbox import ( - _compute_new_search_direction_and_norm as new_dir_and_norm_orig, -) -from estimagic.optimization.subsolvers._trsbox import ( - _take_constrained_step_up_to_boundary as step_constrained_orig, -) -from estimagic.optimization.subsolvers._trsbox import ( - _take_unconstrained_step_up_to_boundary as step_unconstrained_orig, -) -from estimagic.optimization.subsolvers._trsbox import ( - _update_candidate_vectors_and_reduction as update_candidate_orig, -) -from estimagic.optimization.subsolvers._trsbox import ( - _update_candidate_vectors_and_reduction_alt_step as update_candidate_alt_orig, -) -from estimagic.optimization.subsolvers._trsbox import ( - _update_tangent as update_tanget_orig, -) -from estimagic.optimization.subsolvers._trsbox_fast import ( - _calc_greatest_criterion_reduction as greatest_reduction_fast, -) -from estimagic.optimization.subsolvers._trsbox_fast import ( - _calc_new_reduction as new_reduction_fast, -) -from estimagic.optimization.subsolvers._trsbox_fast import ( - _calc_upper_bound_on_tangent as upper_bound_tangent_fast, -) -from estimagic.optimization.subsolvers._trsbox_fast import ( - _compute_new_search_direction_and_norm as new_dir_and_norm_fast, -) -from estimagic.optimization.subsolvers._trsbox_fast import ( - _perform_alternative_trustregion_step as perform_step_alt_fast, -) -from estimagic.optimization.subsolvers._trsbox_fast import ( - _perform_alternative_trustregion_step as perform_step_alt_orig, -) -from estimagic.optimization.subsolvers._trsbox_fast import ( - _take_constrained_step_up_to_boundary as step_constrained_fast, -) -from estimagic.optimization.subsolvers._trsbox_fast import ( - _take_unconstrained_step_up_to_boundary as step_unconstrained_fast, -) -from estimagic.optimization.subsolvers._trsbox_fast import ( - _update_candidate_vectors_and_reduction as update_candidate_fast, -) -from estimagic.optimization.subsolvers._trsbox_fast import ( - _update_candidate_vectors_and_reduction_alt_step as update_candidate_alt_fast, -) -from estimagic.optimization.subsolvers._trsbox_fast import ( - _update_tangent as update_tanget_fast, -) -from estimagic.optimization.subsolvers._trsbox_fast import ( - minimize_trust_trsbox_fast, -) -from numpy.testing import assert_array_almost_equal as aaae -from numpy.testing import assert_array_equal as aae - - -def test_minimize_trust_cg(): - grad = np.arange(5).astype(float) - hessian = np.arange(25).reshape(5, 5).astype(float) - radius = 2 - gtol_abs = 1e-8 - gtol_rel = 1e-6 - aae( - minimize_trust_cg(grad, hessian, radius), - minimize_trust_cg_fast(grad, hessian, radius, gtol_abs, gtol_rel), - ) - - -def test_get_distance_to_trustregion_boundary(): - x = np.arange(5).astype(float) - direction = np.arange(5).astype(float) - radius = 2 - assert gdtb(x, direction, radius) == gdtb_fast(x, direction, radius) - - -def test_update_vectors(): - x = np.arange(5).astype(float) - residual = np.ones(5) * 0.5 - direction = np.ones(5) - hessian = np.arange(25).reshape(5, 5) - alpha = 0.5 - res_orig = uvnr(x, residual, direction, hessian, alpha) - res_fast = uvnr_fast(x, residual, direction, hessian, alpha) - for i in range(len(res_orig)): - aae(res_orig[i], res_fast[i]) - - -def test_take_unconstrained_step_towards_boundary(): - raw_distance = np.array([0.5]) - gradient_sumsq = 5.0 - gradient_projected_sumsq = 2.5 - g_x = 0.3 - g_hess_g = -0.3 - for i in range(2): - assert ( - step_unconstrained_orig( - raw_distance, gradient_sumsq, gradient_projected_sumsq, g_x, g_hess_g - )[i] - == step_unconstrained_fast( - raw_distance, gradient_sumsq, gradient_projected_sumsq, g_x, g_hess_g - )[i] - ) - - -def test_take_constrained_step_towards_boundary(): - x_candidate = np.zeros(5) - gradient_projected = np.ones(5) - step_len = 2.5 - lower_bounds = np.array([-1.0] * 3 + [0.01] * 2) - upper_bounds = np.ones(5) - for i in range(2): - assert ( - step_constrained_orig( - x_candidate, gradient_projected, step_len, lower_bounds, upper_bounds - )[i] - == step_constrained_fast( - x_candidate, gradient_projected, step_len, lower_bounds, upper_bounds - )[i] - ) - - -def test_update_candidate_vector_and_reduction_alt_step(): - x = np.zeros(5) - search_direction = 0.5 * np.ones(5) - x_bounded = np.array([0] * 2 + [1] * 3) - g = np.ones(5) - cosine = 0.5 - sine = 0.5 - hessian_s = np.ones(5) - hes_red = np.ones(5) - res_orig = update_candidate_alt_orig( - x, search_direction, x_bounded, g, cosine, sine, hessian_s, hes_red - ) - - res_fast = update_candidate_alt_fast( - x, search_direction, x_bounded, g, cosine, sine, hessian_s, hes_red - ) - for i in range(len(res_orig)): - aae(res_orig[i], res_fast[i]) - - -def test_update_candidate_vector_and_reduction(): - x_candidate = np.zeros(5) - x_bounded = np.array([0] * 3 + [-0.01] * 2) - gradient_candidate = np.ones(5) - gradient_projected = 0.5 * np.ones(5) - step_len = 0.05 - total_reduction = 0 - curve_min = -0.5 - index_bound_active = 3 - gradient_projected_sumsq = 25 - gradient_sumsq = 25 - g_hess_g = 100 - hess_g = np.arange(5).astype(float) - res_fast = update_candidate_fast( - x_candidate, - x_bounded, - gradient_candidate, - gradient_projected, - step_len, - total_reduction, - curve_min, - index_bound_active, - gradient_projected_sumsq, - gradient_sumsq, - g_hess_g, - hess_g, - ) - res_orig = update_candidate_orig( - x_candidate, - x_bounded, - gradient_candidate, - gradient_projected, - step_len, - total_reduction, - curve_min, - index_bound_active, - gradient_projected_sumsq, - gradient_sumsq, - g_hess_g, - hess_g, - ) - for i in range(len(res_orig)): - aae(res_orig[i], res_fast[i]) - - -def test_update_candidate_vector_and_reduction_without_active_bounds(): - x_candidate = np.zeros(5) - x_bounded = np.zeros(5) - gradient_candidate = np.ones(5) - gradient_projected = 0.5 * np.ones(5) - step_len = 0.05 - total_reduction = 0 - curve_min = -0.5 - gradient_projected_sumsq = 25 - gradient_sumsq = 25 - g_hess_g = 100 - hess_g = np.arange(5).astype(float) - res_fast = update_candidate_fast( - x_candidate, - x_bounded, - gradient_candidate, - gradient_projected, - step_len, - total_reduction, - curve_min, - np.array([]), - gradient_projected_sumsq, - gradient_sumsq, - g_hess_g, - hess_g, - ) - res_orig = update_candidate_orig( - x_candidate, - x_bounded, - gradient_candidate, - gradient_projected, - step_len, - total_reduction, - curve_min, - None, - gradient_projected_sumsq, - gradient_sumsq, - g_hess_g, - hess_g, - ) - for i in range(len(res_orig)): - aae(res_orig[i], res_fast[i]) - - -@pytest.mark.slow() -def test_perform_alternative_tr_step(): - x_candidate = np.zeros(5) - x_bounded = np.array([0.1] * 2 + [0] * 3) - gradient_candidate = np.ones(5).astype(float) - model_hessian = np.arange(25).reshape(5, 5).astype(float) - lower_bounds = np.array([0.1] * 2 + [-1] * 3) - upper_bounds = np.ones(5) - n_fixed_variables = 1 - total_reduction = 1.5 - res_orig = perform_step_alt_orig( - x_candidate, - x_bounded, - gradient_candidate, - model_hessian, - lower_bounds, - upper_bounds, - n_fixed_variables, - total_reduction, - ) - res_fast = perform_step_alt_fast( - x_candidate, - x_bounded, - gradient_candidate, - model_hessian, - lower_bounds, - upper_bounds, - n_fixed_variables, - total_reduction, - ) - aae(res_orig, res_fast) - - -@pytest.mark.slow() -def test_perform_alternative_tr_step_without_active_bounds(): - x_candidate = np.zeros(5) - x_bounded = np.zeros(5) - gradient_candidate = np.ones(5).astype(float) - model_hessian = np.arange(25).reshape(5, 5).astype(float) - lower_bounds = -10 * np.ones(5) - upper_bounds = 10 * np.ones(5) - n_fixed_variables = 1 - total_reduction = 1.5 - res_orig = perform_step_alt_orig( - x_candidate, - x_bounded, - gradient_candidate, - model_hessian, - lower_bounds, - upper_bounds, - n_fixed_variables, - total_reduction, - ) - res_fast = perform_step_alt_fast( - x_candidate, - x_bounded, - gradient_candidate, - model_hessian, - lower_bounds, - upper_bounds, - n_fixed_variables, - total_reduction, - ) - aae(res_orig, res_fast) - - -def test_calc_upper_bound_on_tangent(): - x_candidate = np.zeros(5) - search_direction = 0.5 * np.ones(5) - x_bounded = np.array([0] * 4 + [0.01]) - lower_bounds = np.array([-1] * 4 + [0.01]) - upper_bounds = np.ones(5) - n_fixed_variables = 2 - res_orig = upper_bound_tangent_orig( - x_candidate, - search_direction, - x_bounded, - lower_bounds, - upper_bounds, - n_fixed_variables, - ) - res_fast = upper_bound_tangent_fast( - x_candidate, - search_direction, - x_bounded, - lower_bounds, - upper_bounds, - n_fixed_variables, - ) - for i in range(len(res_orig)): - aae(res_orig[i], res_fast[i]) - - -def test_calc_upper_bound_on_tangent_without_active_bounds(): - x_candidate = np.zeros(5) - search_direction = 0.5 * np.ones(5) - x_bounded = np.zeros(5) - lower_bounds = -np.ones(5) - upper_bounds = np.ones(5) - n_fixed_variables = 2 - res_orig = upper_bound_tangent_orig( - x_candidate, - search_direction, - x_bounded, - lower_bounds, - upper_bounds, - n_fixed_variables, - ) - res_fast = upper_bound_tangent_fast( - x_candidate, - search_direction, - x_bounded, - lower_bounds, - upper_bounds, - n_fixed_variables, - ) - for i in range(len(res_orig)): - if res_orig[i] is not None: - aae(res_orig[i], res_fast[i]) - else: - assert res_fast[i].size == 0 - - -@pytest.mark.slow() -def test_minimize_trs_box_quadratic(): - model_gradient = np.arange(10).astype(float) - model_hessian = np.arange(100).reshape(10, 10).astype(float) - trustregion_radius = 10.0 - lower_bounds = -np.ones(10) - upper_bounds = np.ones(10) - res_fast = minimize_trust_trsbox_fast( - model_gradient, model_hessian, trustregion_radius, lower_bounds, upper_bounds - ) - res_orig = minimize_trust_trsbox( - model_gradient, - model_hessian, - trustregion_radius, - lower_bounds=lower_bounds, - upper_bounds=upper_bounds, - ) - aae(res_fast, res_orig) - - -@pytest.mark.slow() -def test_minimize_stcg_fast(): - model_gradient = np.arange(10).astype(float) - model_hessian = np.arange(100).reshape(10, 10).astype(float) - trustregion_radius = 10.0 - res_orig = minimize_trust_stcg(model_gradient, model_hessian, trustregion_radius) - res_fast = minimize_trust_stcg_fast( - model_gradient, model_hessian, trustregion_radius - ) - aaae(res_orig, res_fast) - - -def test_minimize_cg(): - model_gradient = np.arange(10).astype(float) - model_hessian = np.arange(100).reshape(10, 10).astype(float) - trustregion_radius = 10.0 - gtol_abs = 1e-8 - gtol_rel = 1e-6 - res_orig = minimize_trust_cg(model_gradient, model_hessian, trustregion_radius) - res_fast = minimize_trust_cg_fast( - model_gradient, model_hessian, trustregion_radius, gtol_abs, gtol_rel - ) - aaae(res_orig, res_fast) - - -def test_apply_bounds_to_candidate_vector(): - x_bounded = np.array([-1, 1, 0, 0, 0]) - x_candidate = np.zeros(5) - lower_bounds = np.array([-1, -1, 0.01, -1, -1]) - upper_bounds = np.array([1, 1, 1, -0.01, 1]) - res = _apply_bounds_to_candidate_vector( - x_candidate, x_bounded, lower_bounds, upper_bounds - ) - expected = np.array([-1, 1, 0.01, -0.01, 0]) - aae(res, expected) - - -def test_calc_greatest_criterion_reduction(): - res = greatest_reduction_fast(0.8, 1.1, 1.1, 1.1, 1.1, 1.1) - expected = greatest_reduction_orig(0.8, 1.1, 1.1, 1.1, 1.1, 1.1) - assert res == expected - - -def test_calc_new_reduction(): - res = new_reduction_fast(0.8, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1) - expected = new_reduction_orig(0.8, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1) - assert res == expected - - -def test_update_tangent(): - res = update_tanget_fast(0, 0.8, 2, 2, 1, 3) - expected = update_tanget_orig(0, 0.8, 2, 2, 1, 3) - assert res == expected - - -def test_compute_new_search_direction_and_norm(): - x_candidate = np.zeros(5) - x_bounded = np.zeros(5) - gradient_candidate = np.ones(5) - x_reduced = 0.5 - x_grad = 1 - raw_reduction = 0.5 - res = new_dir_and_norm_fast( - x_candidate, x_bounded, x_reduced, gradient_candidate, x_grad, raw_reduction - ) - expected = new_dir_and_norm_orig( - x_candidate, x_bounded, x_reduced, gradient_candidate, x_grad, raw_reduction - ) - aaae(expected[0], res[0]) - aaae(expected[1], res[1]) diff --git a/tests/optimization/test_history_collection.py b/tests/optimization/test_history_collection.py index 0354dfe61..57042825e 100644 --- a/tests/optimization/test_history_collection.py +++ b/tests/optimization/test_history_collection.py @@ -3,7 +3,7 @@ import numpy as np import pytest from estimagic.logging.read_log import OptimizeLogReader -from estimagic.optimization import AVAILABLE_ALGORITHMS +from estimagic.algorithms import AVAILABLE_ALGORITHMS from estimagic.optimization.optimize import minimize from numpy.testing import assert_array_almost_equal as aaae from numpy.testing import assert_array_equal as aae diff --git a/tests/optimization/test_many_algorithms.py b/tests/optimization/test_many_algorithms.py index ff738982b..8ab1373c6 100644 --- a/tests/optimization/test_many_algorithms.py +++ b/tests/optimization/test_many_algorithms.py @@ -9,7 +9,7 @@ import numpy as np import pytest -from estimagic.optimization import AVAILABLE_ALGORITHMS, GLOBAL_ALGORITHMS +from estimagic.algorithms import AVAILABLE_ALGORITHMS, GLOBAL_ALGORITHMS from estimagic.optimization.optimize import minimize from numpy.testing import assert_array_almost_equal as aaae diff --git a/tests/optimization/test_quadratic_subsolvers.py b/tests/optimization/test_quadratic_subsolvers.py index f541d4a82..c2d9f1fe2 100644 --- a/tests/optimization/test_quadratic_subsolvers.py +++ b/tests/optimization/test_quadratic_subsolvers.py @@ -12,9 +12,6 @@ from estimagic.optimization.subsolvers.bntr import ( bntr, ) -from estimagic.optimization.subsolvers.bntr_fast import ( - bntr_fast, -) from estimagic.optimization.subsolvers.gqtpar import ( gqtpar, ) @@ -473,15 +470,7 @@ def test_bounded_newton_trustregion( x_candidate=np.zeros_like(x_expected), **options ) - result_fast = bntr_fast( - main_model, - lower_bounds, - upper_bounds, - x_candidate=np.zeros_like(x_expected), - **options - ) aaae(result["x"], x_expected, decimal=5) - aaae(result_fast["x"], x_expected, decimal=5) # ====================================================================================== diff --git a/tests/optimization/test_with_nonlinear_constraints.py b/tests/optimization/test_with_nonlinear_constraints.py index 9aabfc233..284816864 100644 --- a/tests/optimization/test_with_nonlinear_constraints.py +++ b/tests/optimization/test_with_nonlinear_constraints.py @@ -5,7 +5,7 @@ import pytest from estimagic import maximize, minimize from estimagic.config import IS_CYIPOPT_INSTALLED -from estimagic.optimization import AVAILABLE_ALGORITHMS +from estimagic.algorithms import AVAILABLE_ALGORITHMS from numpy.testing import assert_array_almost_equal as aaae NLC_ALGORITHMS = [ diff --git a/tests/optimization/tranquilo/test_acceptance_decision.py b/tests/optimization/tranquilo/test_acceptance_decision.py deleted file mode 100644 index 57457be2f..000000000 --- a/tests/optimization/tranquilo/test_acceptance_decision.py +++ /dev/null @@ -1,140 +0,0 @@ -from collections import namedtuple - -import numpy as np -import pytest -from estimagic.optimization.tranquilo.acceptance_decision import ( - _accept_simple, - _get_acceptance_result, - calculate_rho, -) -from estimagic.optimization.tranquilo.history import History -from estimagic.optimization.tranquilo.region import Region -from estimagic.optimization.tranquilo.solve_subproblem import SubproblemResult -from numpy.testing import assert_array_equal - -# ====================================================================================== -# Fixtures -# ====================================================================================== - - -@pytest.fixture() -def subproblem_solution(): - res = SubproblemResult( - x=1 + np.arange(2.0), - expected_improvement=1.0, - n_iterations=1, - success=True, - x_unit=None, - shape=None, - ) - return res - - -# ====================================================================================== -# Test accept_xxx -# ====================================================================================== - - -trustregion = Region(center=np.zeros(2), radius=2.0) -State = namedtuple("State", "x trustregion fval index") -states = [ # we will parametrize over `states` - State(np.arange(2.0), trustregion, 0.25, 0), # better than candidate - State(np.arange(2.0), trustregion, 1, 0), # worse than candidate -] - - -@pytest.mark.parametrize("state", states) -def test_accept_simple( - state, - subproblem_solution, -): - history = History(functype="scalar") - - idxs = history.add_xs(np.arange(10).reshape(5, 2)) - - history.add_evals(idxs.repeat(2), np.arange(10)) - - def wrapped_criterion(eval_info): - indices = np.array(list(eval_info)).repeat(np.array(list(eval_info.values()))) - history.add_evals(indices, -indices) - - res_got = _accept_simple( - subproblem_solution=subproblem_solution, - state=state, - history=history, - wrapped_criterion=wrapped_criterion, - min_improvement=0.0, - n_evals=2, - ) - - assert res_got.accepted - assert res_got.index == 5 - assert res_got.candidate_index == 5 - assert_array_equal(res_got.x, subproblem_solution.x) - assert_array_equal(res_got.candidate_x, 1.0 + np.arange(2)) - - -# ====================================================================================== -# Test _get_acceptance_result -# ====================================================================================== - - -def test_get_acceptance_result(): - candidate_x = 1 + np.arange(2) - candidate_fval = 0 - candidate_index = 0 - rho = 1 - tr = Region(center=np.zeros(2), radius=2.0) - old_state = namedtuple("State", "x fval index trustregion")(np.arange(2), 1, 1, tr) - - ar_when_accepted = _get_acceptance_result( - candidate_x=candidate_x, - candidate_fval=candidate_fval, - candidate_index=candidate_index, - rho=rho, - old_state=old_state, - is_accepted=True, - ) - - assert_array_equal(ar_when_accepted.x, candidate_x) - assert ar_when_accepted.fval == candidate_fval - assert ar_when_accepted.index == candidate_index - assert ar_when_accepted.accepted is True - assert ar_when_accepted.step_length == np.sqrt(2) - assert ar_when_accepted.relative_step_length == np.sqrt(2) / 2 - - ar_when_not_accepted = _get_acceptance_result( - candidate_x=candidate_x, - candidate_fval=candidate_fval, - candidate_index=candidate_index, - rho=rho, - old_state=old_state, - is_accepted=False, - ) - - assert_array_equal(ar_when_not_accepted.x, old_state.x) - assert ar_when_not_accepted.fval == old_state.fval - assert ar_when_not_accepted.index == old_state.index - assert ar_when_not_accepted.accepted is False - assert ar_when_not_accepted.step_length == 0 - assert ar_when_not_accepted.relative_step_length == 0 - - -# ====================================================================================== -# Test calculate_rho -# ====================================================================================== - - -CASES = [ - (0, 0, -np.inf), - (-1, 0, -np.inf), - (1, 0, np.inf), - (0, 1, 0), - (1, 2, 1 / 2), -] - - -@pytest.mark.parametrize("actual_improvement, expected_improvement, expected", CASES) -def test_calculate_rho(actual_improvement, expected_improvement, expected): - rho = calculate_rho(actual_improvement, expected_improvement) - assert rho == expected diff --git a/tests/optimization/tranquilo/test_acceptance_sample_size.py b/tests/optimization/tranquilo/test_acceptance_sample_size.py deleted file mode 100644 index 85f279c3e..000000000 --- a/tests/optimization/tranquilo/test_acceptance_sample_size.py +++ /dev/null @@ -1,86 +0,0 @@ -import pytest -from estimagic.optimization.tranquilo.acceptance_sample_size import ( - _compute_factor, - _get_optimal_sample_sizes, -) -from scipy.stats import norm - -TEST_CASES = [ - (0.5, 0.5, 0.5, 0), - (1.0, norm.cdf(0.5), norm.sf(0.5), 1), - (2.0, norm.cdf(0.5), norm.sf(0.5), 1 / 4), -] - - -@pytest.mark.parametrize( - "minimal_effect_size, power_level, significance_level, expected_factor", TEST_CASES -) -def test_factor(minimal_effect_size, power_level, significance_level, expected_factor): - assert ( - abs( - expected_factor - - _compute_factor(minimal_effect_size, power_level, significance_level) - ) - < 1e-6 - ) - - -@pytest.mark.parametrize("minimal_effect_size", [0.5, 1.0, 2.0]) -@pytest.mark.parametrize("power_level", [0.25, 0.5, 0.75]) -@pytest.mark.parametrize("significance_level", [0.01, 0.05, 0.1, 0.2]) -def test_bounds(minimal_effect_size, power_level, significance_level): - res = [ - _get_optimal_sample_sizes( - sd_1=1, - sd_2=1, - existing_n1=_n1, - minimal_effect_size=minimal_effect_size, - power_level=power_level, - significance_level=significance_level, - ) - for _n1 in (0, 10) - ] - # test that if both sample sizes are chosen optimally the overall number is smaller - assert sum(res[0]) <= sum(res[1]) + 10 - # test that if there are existing samples in the first group, the second group - # can be smaller than if there are no existing samples in the first group - assert res[0][1] >= res[1][1] - - -def test_standard_deviation_influence(): - n1, n2 = _get_optimal_sample_sizes( - sd_1=1, - sd_2=3, - existing_n1=0, - minimal_effect_size=0.5, - power_level=0.5, - significance_level=0.2, - ) - assert n1 < n2 - - -def test_inequality(): - # Test that the inequality condition is satisfied - n1, n2 = _get_optimal_sample_sizes( - sd_1=1, - sd_2=2, - existing_n1=0, - minimal_effect_size=0.5, - power_level=0.5, - significance_level=0.2, - ) - factor = _compute_factor(0.5, 0.5, 0.2) - lhs = (1 / n1 + 2 / n2) ** (-1) - assert lhs >= factor - - -def test_first_group_is_not_sampled(): - n1, _ = _get_optimal_sample_sizes( - sd_1=1, - sd_2=1, - existing_n1=10, - minimal_effect_size=0.5, - power_level=0.5, - significance_level=0.2, - ) - assert n1 == 0 diff --git a/tests/optimization/tranquilo/test_adjust_radius.py b/tests/optimization/tranquilo/test_adjust_radius.py deleted file mode 100644 index 472ac3aca..000000000 --- a/tests/optimization/tranquilo/test_adjust_radius.py +++ /dev/null @@ -1,104 +0,0 @@ -import numpy as np -import pytest -from estimagic.optimization.tranquilo.adjust_radius import adjust_radius -from estimagic.optimization.tranquilo.options import RadiusOptions - - -@pytest.fixture() -def options(): - return RadiusOptions(initial_radius=0.1) - - -def test_increase(options): - calculated = adjust_radius( - radius=1, - rho=1.5, - step_length=np.linalg.norm(np.ones(2)), - options=options, - ) - - expected = 2 - - assert calculated == expected - - -def test_increase_blocked_by_small_step(options): - calculated = adjust_radius( - radius=1, - rho=1.5, - step_length=np.linalg.norm(np.array([0.1, 0.1])), - options=options, - ) - - expected = 1 - - assert calculated == expected - - -def test_decrease(options): - calculated = adjust_radius( - radius=1, - rho=0.05, - step_length=np.linalg.norm(np.ones(2)), - options=options, - ) - - expected = 0.5 - - assert calculated == expected - - -def test_max_radius_is_not_violated(options): - calculated = adjust_radius( - radius=750_000, - rho=1.5, - step_length=np.linalg.norm(np.array([750_000])), - options=options, - ) - - expected = 1e6 - - assert calculated == expected - - -def test_min_radius_is_not_violated(options): - calculated = adjust_radius( - radius=1e-09, - rho=0.05, - step_length=np.linalg.norm(np.ones(2)), - options=options, - ) - - expected = 1e-06 - - assert calculated == expected - - -def test_constant_radius(): - options = RadiusOptions(rho_increase=1.6, initial_radius=0.1) - - calculated = adjust_radius( - radius=1, - rho=1.5, - step_length=np.linalg.norm(np.ones(2)), - options=options, - ) - - expected = 1 - - assert calculated == expected - - -def test_max_radius_to_step_ratio_is_not_violated(): - options = RadiusOptions(max_radius_to_step_ratio=2, initial_radius=0.1) - - calculated = adjust_radius( - radius=1, - rho=1.5, - step_length=np.linalg.norm(np.array([0.75])), - options=options, - ) - - expected = 1.5 - - assert calculated == expected diff --git a/tests/optimization/tranquilo/test_aggregate_models.py b/tests/optimization/tranquilo/test_aggregate_models.py deleted file mode 100644 index 1e2f1ed1a..000000000 --- a/tests/optimization/tranquilo/test_aggregate_models.py +++ /dev/null @@ -1,79 +0,0 @@ -import numpy as np -import pytest -from estimagic.optimization.tranquilo.aggregate_models import ( - aggregator_identity, - aggregator_information_equality_linear, - aggregator_least_squares_linear, - aggregator_sum, -) -from estimagic.optimization.tranquilo.models import ScalarModel, VectorModel -from numpy.testing import assert_array_equal - - -@pytest.mark.parametrize("square_terms", [np.arange(9).reshape(1, 3, 3), None]) -def test_aggregator_identity(square_terms): - vector_model = VectorModel( - intercepts=np.array([2.0]), - linear_terms=np.arange(3).reshape(1, 3), - square_terms=square_terms, - ) - - if square_terms is None: - expected_square_terms = np.zeros((3, 3)) - else: - expected_square_terms = np.arange(9).reshape(3, 3) - - got = ScalarModel(*aggregator_identity(vector_model)) - - assert_array_equal(got.intercept, 2.0) - assert_array_equal(got.linear_terms, np.arange(3)) - assert_array_equal(got.square_terms, expected_square_terms) - - -def test_aggregator_sum(): - vector_model = VectorModel( - intercepts=np.array([1.0, 2.0]), - linear_terms=np.arange(6).reshape(2, 3), - square_terms=np.arange(18).reshape(2, 3, 3), - ) - - got = ScalarModel(*aggregator_sum(vector_model)) - - assert_array_equal(got.intercept, 3.0) - assert_array_equal(got.linear_terms, np.array([3, 5, 7])) - assert_array_equal( - got.square_terms, np.array([[9, 11, 13], [15, 17, 19], [21, 23, 25]]) - ) - - -def test_aggregator_least_squares_linear(): - vector_model = VectorModel( - intercepts=np.array([0, 2]), - linear_terms=np.arange(6).reshape(2, 3), - square_terms=np.arange(18).reshape(2, 3, 3), # should not be used by aggregator - ) - - got = ScalarModel(*aggregator_least_squares_linear(vector_model)) - - assert_array_equal(got.intercept, 4.0) - assert_array_equal(got.linear_terms, np.array([12, 16, 20])) - assert_array_equal( - got.square_terms, np.array([[18, 24, 30], [24, 34, 44], [30, 44, 58]]) - ) - - -def test_aggregator_information_equality_linear(): - vector_model = VectorModel( - intercepts=np.array([1.0, 2.0]), - linear_terms=np.arange(6).reshape(2, 3), - square_terms=np.arange(18).reshape(2, 3, 3), # should not be used by aggregator - ) - - got = ScalarModel(*aggregator_information_equality_linear(vector_model)) - - assert_array_equal(got.intercept, 3.0) - assert_array_equal(got.linear_terms, np.array([3, 5, 7])) - assert_array_equal( - got.square_terms, - np.array([[-4.5, -6.0, -7.5], [-6.0, -8.5, -11.0], [-7.5, -11.0, -14.5]]), - ) diff --git a/tests/optimization/tranquilo/test_bounds.py b/tests/optimization/tranquilo/test_bounds.py deleted file mode 100644 index fec2dbbf6..000000000 --- a/tests/optimization/tranquilo/test_bounds.py +++ /dev/null @@ -1,38 +0,0 @@ -import numpy as np -import pytest -from estimagic.optimization.tranquilo.bounds import Bounds, _any_finite - -CASES = [ - (np.array([1, 2]), np.array([5, 6]), True), - (np.array([1, 2]), None, True), - (None, np.array([5, 6]), True), - (None, None, False), - (np.array([np.inf, np.inf]), np.array([np.inf, np.inf]), False), - (np.array([-np.inf, -np.inf]), np.array([np.inf, np.inf]), False), - (np.array([1, 2]), np.array([np.inf, np.inf]), True), -] - - -@pytest.mark.parametrize("lb, ub, exp", CASES) -def test_any_finite_true(lb, ub, exp): - out = _any_finite(lb, ub) - assert out is exp - - -def test_bounds_none(): - bounds = Bounds(lower=None, upper=None) - assert bounds.has_any is False - - -def test_bounds_inifinite(): - lb = np.array([np.inf, np.inf]) - ub = np.array([np.inf, np.inf]) - bounds = Bounds(lower=lb, upper=ub) - assert bounds.has_any is False - - -def test_bounds_finite(): - lb = np.array([1, 2]) - ub = np.array([5, 6]) - bounds = Bounds(lower=lb, upper=ub) - assert bounds.has_any is True diff --git a/tests/optimization/tranquilo/test_clustering.py b/tests/optimization/tranquilo/test_clustering.py deleted file mode 100644 index 8cf2d588b..000000000 --- a/tests/optimization/tranquilo/test_clustering.py +++ /dev/null @@ -1,34 +0,0 @@ -import numpy as np -from estimagic.optimization.tranquilo.clustering import cluster -from numpy.testing import assert_array_equal as aae - - -def test_cluster_lollipop(): - rng = np.random.default_rng(123456) - center = np.array([0.25, 0.25]) - radius = 0.05 - - x = np.array( - [ - center, - *(center + rng.uniform(low=-radius, high=radius, size=(6, 2))).tolist(), - [0.5, 0.5], - [0.75, 0.75], - ], - ) - - clusters, centers = cluster(x, epsilon=0.1) - assert len(centers) == 3 - aae(np.unique(clusters), np.arange(3)) - - -def test_cluster_grid(): - base_grid = np.linspace(-1, 1, 11) - a, b = np.meshgrid(base_grid, base_grid) - x = np.column_stack([a.flatten(), b.flatten()]) - - clusters, centers = cluster(x, epsilon=0.1) - - assert len(centers) == len(x) - aae(np.sort(clusters), np.arange(len(x))) - aae(np.sort(centers), np.arange(len(x))) diff --git a/tests/optimization/tranquilo/test_estimate_variance.py b/tests/optimization/tranquilo/test_estimate_variance.py deleted file mode 100644 index 8e7cd1793..000000000 --- a/tests/optimization/tranquilo/test_estimate_variance.py +++ /dev/null @@ -1,44 +0,0 @@ -import numpy as np -import pytest -from estimagic.optimization.tranquilo.estimate_variance import ( - _estimate_variance_classic, -) -from estimagic.optimization.tranquilo.history import History -from estimagic.optimization.tranquilo.tranquilo import Region -from numpy.testing import assert_array_almost_equal as aaae - - -@pytest.mark.parametrize("model_type", ["scalar", "vector"]) -def test_estimate_variance_classic(model_type): - xs = np.array( - [ - [0.0, 0.0], # center with multiple evaluations - [10, -10], # far away with multiple evaluations - [0.1, 0.1], # close to center with too few evaluations - ] - ) - - history = History(functype="scalar") - idxs = history.add_xs(xs) - - repetitions = np.array([5, 5, 2]) - - # squaring makes sure variance is not the same across all subsamples - evals = np.arange(12) ** 2 - - history.add_evals(idxs.repeat(repetitions), evals) - - got = _estimate_variance_classic( - trustregion=Region(center=np.array([0.0, 0.0]), radius=1.0), - history=history, - model_type=model_type, - max_distance_factor=1.0, - min_n_evals=4, - ) - - if model_type == "scalar": - expected = np.var(evals[:5], ddof=1) - else: - expected = np.var(evals[:5], ddof=1).reshape(1, 1) - - aaae(got, expected) diff --git a/tests/optimization/tranquilo/test_filter_points.py b/tests/optimization/tranquilo/test_filter_points.py deleted file mode 100644 index 97ded5a11..000000000 --- a/tests/optimization/tranquilo/test_filter_points.py +++ /dev/null @@ -1,48 +0,0 @@ -from estimagic.optimization.tranquilo.filter_points import get_sample_filter -from estimagic.optimization.tranquilo.tranquilo import State -from estimagic.optimization.tranquilo.region import Region -from numpy.testing import assert_array_equal as aae -import pytest -import numpy as np - - -@pytest.fixture() -def state(): - out = State( - trustregion=Region(center=np.ones(2), radius=0.3), - model_indices=None, - model=None, - vector_model=None, - candidate_index=5, - candidate_x=np.array([1.1, 1.2]), - index=2, - x=np.ones(2), - fval=15, - rho=None, - accepted=True, - old_indices_used=None, - old_indices_discarded=None, - new_indices=None, - step_length=0.1, - relative_step_length=0.1 / 0.3, - ) - return out - - -def test_discard_all(state): - filter = get_sample_filter("discard_all") - xs = np.arange(10).reshape(5, 2) - indices = np.arange(5) - got_xs, got_idxs = filter(xs=xs, indices=indices, state=state) - expected_xs = np.ones((1, 2)) - aae(got_xs, expected_xs) - aae(got_idxs, np.array([2])) - - -def test_keep_all(): - filter = get_sample_filter("keep_all") - xs = np.arange(10).reshape(5, 2) - indices = np.arange(5) - got_xs, got_idxs = filter(xs=xs, indices=indices, state=None) - aae(got_xs, xs) - aae(got_idxs, indices) diff --git a/tests/optimization/tranquilo/test_fit_models.py b/tests/optimization/tranquilo/test_fit_models.py deleted file mode 100644 index de5b9f065..000000000 --- a/tests/optimization/tranquilo/test_fit_models.py +++ /dev/null @@ -1,145 +0,0 @@ -import numpy as np -import pytest -from estimagic import first_derivative, second_derivative -from estimagic.optimization.tranquilo.fit_models import _quadratic_features, get_fitter -from estimagic.optimization.tranquilo.region import Region -from numpy.testing import assert_array_almost_equal, assert_array_equal - - -def aaae(x, y, decimal=None, case=None): - tolerance = { - "hessian": 2, - "gradient": 3, - } - decimal = decimal or tolerance.get(case, None) - assert_array_almost_equal(x, y, decimal=decimal) - - -# ====================================================================================== -# Fixtures -# ====================================================================================== - - -@pytest.fixture() -def quadratic_case(): - """Test scenario with true quadratic function. - - We return true function, and function evaluations and data on random points. - - """ - n_params = 4 - n_samples = 15 - - # theoretical terms - linear_terms = 1 + np.arange(n_params) - square_terms = np.arange(n_params**2).reshape(n_params, n_params) - square_terms = square_terms + square_terms.T - - def func(x): - y = -10 + linear_terms @ x + 0.5 * x.T @ square_terms @ x - return y - - x0 = np.ones(n_params) - - # random data - rng = np.random.default_rng(56789) - x = np.array([x0 + rng.uniform(-0.01 * x0, 0.01 * x0) for _ in range(n_samples)]) - y = np.array([func(_x) for _x in list(x)]).reshape(-1, 1) - - out = { - "func": func, - "x0": x0, - "x": x, - "y": y, - "linear_terms_expected": linear_terms, - "square_terms_expected": square_terms, - } - return out - - -# ====================================================================================== -# Tests -# ====================================================================================== - - -@pytest.mark.parametrize("fitter", ["ols", "ridge", "powell", "tranquilo"]) -def test_fit_against_truth_quadratic(fitter, quadratic_case): - options = {"l2_penalty_square": 0} - fit_pounders = get_fitter( - fitter, - options, - model_type="quadratic", - residualize=False, - infinity_handling="relative", - ) - got = fit_pounders( - quadratic_case["x"], - quadratic_case["y"], - region=Region(center=np.zeros(4), radius=1.0), - old_model=None, - ) - decimal = 3 if fitter != "ridge" else 1 - aaae( - got.linear_terms.flatten(), - quadratic_case["linear_terms_expected"], - decimal=decimal, - ) - aaae( - got.square_terms.reshape((4, 4)), - quadratic_case["square_terms_expected"], - decimal=decimal, - ) - - -@pytest.mark.parametrize("model", ["ols", "ridge", "tranquilo"]) -def test_fit_ols_against_gradient(model, quadratic_case): - options = {"l2_penalty_square": 0} - fit_ols = get_fitter( - model, - options, - model_type="quadratic", - residualize=False, - infinity_handling="relative", - ) - got = fit_ols( - quadratic_case["x"], - quadratic_case["y"], - region=Region(center=np.zeros(4), radius=1.0), - old_model=None, - ) - - a = got.linear_terms.flatten() - hess = got.square_terms.reshape((4, 4)) - grad = a + hess @ quadratic_case["x0"] - - gradient = first_derivative(quadratic_case["func"], quadratic_case["x0"]) - aaae(gradient["derivative"], grad, case="gradient") - - -@pytest.mark.parametrize("model", ("ols", "ridge", "tranquilo", "powell")) -def test_fit_ols_against_hessian(model, quadratic_case): - options = {"l2_penalty_square": 0} - fit_ols = get_fitter( - model, - options, - model_type="quadratic", - residualize=False, - infinity_handling="relative", - ) - got = fit_ols( - quadratic_case["x"], - quadratic_case["y"], - region=Region(center=np.zeros(4), radius=1.0), - old_model=None, - ) - hessian = second_derivative(quadratic_case["func"], quadratic_case["x0"]) - hess = got.square_terms.reshape((4, 4)) - aaae(hessian["derivative"], hess, case="hessian") - - -def test_quadratic_features(): - x = np.array([[0, 1, 2], [3, 4, 5]]) - - expected = np.array([[0, 1, 2, 0, 0, 0, 1, 2, 4], [3, 4, 5, 9, 12, 15, 16, 20, 25]]) - got = _quadratic_features(x) - assert_array_equal(got, expected) diff --git a/tests/optimization/tranquilo/test_get_component.py b/tests/optimization/tranquilo/test_get_component.py deleted file mode 100644 index 08ebf7263..000000000 --- a/tests/optimization/tranquilo/test_get_component.py +++ /dev/null @@ -1,170 +0,0 @@ -import pytest -from collections import namedtuple -from estimagic.optimization.tranquilo.get_component import ( - _add_redundant_argument_handling, - _fail_if_mandatory_argument_is_missing, - _get_function_and_name, - _get_valid_options, - get_component, -) - - -@pytest.fixture() -def func_dict(): - out = { - "f": lambda x: x, - "g": lambda x, y: x + y, - } - return out - - -@pytest.fixture -def default_options(): - options = namedtuple("default_options", "x y") - return options(x=1, y=1) - - -def test_get_component(func_dict, default_options): - got = get_component( - name_or_func="g", - component_name="component", - func_dict=func_dict, - default_options=default_options, - user_options={"y": 2}, - redundant_option_handling="ignore", - redundant_argument_handling="ignore", - mandatory_signature=["x"], - ) - - assert got() == 3 - assert got(bla=15) == 3 - - -def test_get_function_and_name_valid_string(func_dict): - _func, _name = _get_function_and_name( - name_or_func="f", - component_name="component", - func_dict=func_dict, - ) - assert _func == func_dict["f"] - assert _name == "f" - - -def test_get_function_and_name_invalid_string(): - with pytest.raises(ValueError, match="If component is a string, it must be one of"): - _get_function_and_name( - name_or_func="h", - component_name="component", - func_dict={"f": lambda x: x, "g": lambda x, y: x + y}, - ) - - -def test_get_function_and_name_valid_function(): - def _f(x): - return x - - _func, _name = _get_function_and_name( - name_or_func=_f, - component_name="component", - func_dict=None, - ) - assert _func == _f - assert _name == "_f" - - -def test_get_function_and_string_wrong_type(): - with pytest.raises(TypeError, match="name_or_func must be a string or a callable."): - _get_function_and_name( - name_or_func=1, - component_name="component", - func_dict=None, - ) - - -def test_get_valid_options_ignore(default_options): - got = _get_valid_options( - default_options=default_options, - user_options={"x": 3, "y": 4}, - signature=["x", "y"], - name="bla", - component_name="component", - redundant_option_handling="ignore", - ) - expected = {"x": 3, "y": 4} - - assert got == expected - - -def test_get_valid_options_raise_update_option_bundle(default_options): - # provokes error in update_option_bundle - with pytest.raises(ValueError, match="The following user options are not valid"): - _get_valid_options( - default_options=default_options, - user_options={"x": 3, "z": 4}, - signature=["x", "y"], - name="bla", - component_name="component", - redundant_option_handling="raise", - ) - - -def test_get_valid_options_raise(default_options): - with pytest.raises(ValueError, match="The following options are not supported"): - _get_valid_options( - default_options=default_options, - user_options={"y": 3}, - signature=["x"], - name="bla", - component_name="component", - redundant_option_handling="raise", - ) - - -def test_get_valid_options_warn(default_options): - with pytest.warns(UserWarning, match="The following options are not supported"): - _get_valid_options( - default_options=default_options, - user_options={"y": 3}, - signature=["x"], - name="bla", - component_name="component", - redundant_option_handling="warn", - ) - - -def test_fail_if_mandatory_argument_is_missing(): - with pytest.raises( - ValueError, match="The following mandatory arguments are missing" - ): - _fail_if_mandatory_argument_is_missing( - mandatory_arguments=["a", "c"], - signature=["a", "b"], - name="bla", - component_name="component", - ) - - -def test_add_redundant_argument_handling_ignore(): - def f(a, b): - return a + b - - _f = _add_redundant_argument_handling( - func=f, - signature=["a", "b"], - warn=False, - ) - - assert _f(1, b=2, c=3) == 3 - - -def test_add_redundant_argument_handling_warn(): - def f(a, b): - return a + b - - _f = _add_redundant_argument_handling( - func=f, - signature=["a", "b"], - warn=True, - ) - with pytest.warns(UserWarning, match="The following arguments are not supported"): - _f(1, b=2, c=3) diff --git a/tests/optimization/tranquilo/test_handle_infinity.py b/tests/optimization/tranquilo/test_handle_infinity.py deleted file mode 100644 index 6030b9a0f..000000000 --- a/tests/optimization/tranquilo/test_handle_infinity.py +++ /dev/null @@ -1,15 +0,0 @@ -import numpy as np -from estimagic.optimization.tranquilo.handle_infinity import get_infinity_handler -from numpy.testing import assert_array_almost_equal as aaae - - -def test_clip_relative(): - func = get_infinity_handler("relative") - - fvecs = np.array([[1, np.inf, 3, 1], [-np.inf, 0, 1, 2], [-1, 5, 6, 3]]) - - got = func(fvecs) - - expected = np.array([[1, 16, 3, 1], [-6, 0, 1, 2], [-1, 5, 6, 3]]) - - aaae(got, expected) diff --git a/tests/optimization/tranquilo/test_history.py b/tests/optimization/tranquilo/test_history.py deleted file mode 100644 index bf2410df5..000000000 --- a/tests/optimization/tranquilo/test_history.py +++ /dev/null @@ -1,230 +0,0 @@ -"""Test the history class for least-squares optimizers.""" -import numpy as np -import pytest -from estimagic.optimization.tranquilo.history import History -from estimagic.optimization.tranquilo.region import Region -from numpy.testing import assert_array_almost_equal as aaae - - -XS = [ - np.arange(3), - np.arange(3).tolist(), - np.arange(3).reshape(1, 3), - np.arange(3).reshape(1, 3).tolist(), -] - - -@pytest.mark.parametrize("xs", XS) -def test_add_xs_not_initialized(xs): - history = History(functype="least_squares") - - new_indices = history.add_xs(xs) - - if len(xs) == 1: - aaae(new_indices, np.array([0])) - else: - assert new_indices == 0 - - assert isinstance(history.xs, np.ndarray) - aaae(history.xs[0], np.arange(3)) - - assert history.index_mapper == {0: []} - assert history.n_xs == 1 - assert history.n_fun == 0 - - -@pytest.mark.parametrize("xs", XS) -def test_add_xs_initialized_with_space(xs): - history = History(functype="least_squares") - - history.add_xs(np.ones((20, 3))) - new_indices = history.add_xs(xs) - - if len(xs) == 1: - aaae(new_indices, np.array([20])) - else: - assert new_indices == 20 - - assert isinstance(history.xs, np.ndarray) - aaae(history.xs[:21], np.vstack([np.ones((20, 3)), np.arange(3)])) - - assert history.index_mapper == {i: [] for i in range(21)} - assert history.n_xs == 21 - assert history.n_fun == 0 - - -@pytest.mark.parametrize("xs", XS) -def test_add_xs_initialized_extension_needed(xs): - history = History(functype="least_squares") - - history.add_xs(np.ones(3)) - initial_size = len(history.xs) - history.add_xs(np.ones((initial_size - 1, 3))) - history.add_xs(xs) - - assert len(history.xs) > initial_size - - aaae(history.xs[initial_size], np.arange(3)) - - assert history.n_xs == initial_size + 1 - assert history.n_fun == 0 - - -EVALS = [ - (0, np.arange(5)), - ([0], [np.arange(5)]), - (np.array([0]), np.arange(5).reshape(1, 5)), -] - - -@pytest.mark.parametrize("x_indices, evals", EVALS) -def test_add_evals_not_initialized(x_indices, evals): - history = History(functype="least_squares") - history.add_xs(np.arange(3)) - - history.add_evals(x_indices, evals) - - assert history.get_n_fun() == 1 - assert history.get_n_xs() == 1 - - aaae(history.fvecs[0], np.arange(5)) - aaae(history.fvals[0], 30.0) - - assert history.index_mapper == {0: [0]} - - -@pytest.mark.parametrize("evals", [tup[1] for tup in EVALS]) -def test_add_evals_initialized_with_space(evals): - history = History(functype="least_squares") - history.add_xs(np.arange(6).reshape(2, 3)) - history.add_evals([0] * 20, np.ones((20, 5))) - - history.add_evals(1, evals) - - assert history.get_n_fun() == 21 - assert history.get_n_xs() == 2 - - aaae(history.fvecs[:21], np.vstack([np.ones((20, 5)), np.arange(5)])) - aaae(history.fvals[20], 30.0) - - assert history.index_mapper == {0: list(range(20)), 1: [20]} - - -def test_get_indices_in_trustregion(): - history = History(functype="least_squares") - xs = [[1, 1], [1.1, 1.2], [1.5, 1], [0.9, 0.9]] - fvecs = np.zeros((4, 3)) - indices = history.add_xs(xs) - history.add_evals(indices, fvecs) - - trustregion = Region( - center=np.ones(2), - radius=0.3, - ) - - indices = history.get_x_indices_in_region(trustregion) - - aaae(indices, np.array([0, 1, 3])) - - -@pytest.fixture() -def history(): - history = History(functype="least_squares") - xs = np.arange(15).reshape(5, 3) - fvecs = np.arange(25).reshape(5, 5) - indices = history.add_xs(xs) - history.add_evals(indices, fvecs) - return history - - -def test_get_xs_no_indices(history): - xs = history.get_xs() - aaae(xs, np.arange(15).reshape(5, 3)) - - -def test_get_xs_with_indices(history): - xs = history.get_xs([0, 2, 4]) - aaae(xs, np.arange(15).reshape(5, 3)[[0, 2, 4]]) - - -def test_get_xs_scalar_index(history): - xs = history.get_xs(0) - aaae(xs, np.arange(3)) - - -def test_add_eval_for_invalid_x(history): - with pytest.raises(ValueError): - history.add_evals(5, np.arange(5)) - - -def test_get_fvecs_scalar_index(history): - fvecs = history.get_fvecs(0) - aaae(fvecs, np.arange(5).reshape(1, 5)) - - -def test_get_fvecs_with_indices(history): - fvecs = history.get_fvecs([0]) - assert isinstance(fvecs, dict) - assert len(fvecs) == 1 - assert 0 in fvecs - aaae(fvecs[0], np.arange(5).reshape(1, 5)) - - -def test_get_fvals_scalar_index(history): - fvals = history.get_fvals(0) - aaae(fvals, 30.0) - - -def test_get_fvals_with_indices(history): - fvals = history.get_fvals([0]) - assert isinstance(fvals, dict) - assert len(fvals) == 1 - assert 0 in fvals - aaae(fvals[0], 30.0) - - -@pytest.mark.parametrize("average", [True, False]) -def test_get_model_data_trivial_averaging(history, average): - got_xs, got_fvecs = history.get_model_data( - x_indices=[0, 1], - average=average, - ) - - aaae(got_xs, np.arange(6).reshape(2, 3)) - aaae(got_fvecs, np.arange(10).reshape(2, 5)) - - -def test_get_model_data_no_averaging(history): - got_xs, got_fvecs = history.get_model_data(x_indices=[0, 1]) - aaae(got_xs, np.arange(6).reshape(2, 3)) - aaae(got_fvecs, np.arange(10).reshape(2, 5)) - - -@pytest.fixture() -def noisy_history(): - history = History(functype="least_squares") - history.add_xs(np.arange(6).reshape(2, 3)) - fvecs = np.arange(25).reshape(5, 5) - history.add_evals([0, 0, 1, 1, 1], fvecs) - return history - - -@pytest.mark.parametrize("average", [True, False]) -def test_get_model_data_with_repeated_evaluations(noisy_history, average): - got_xs, got_fvecs = noisy_history.get_model_data( - x_indices=[0, 1], - average=average, - ) - - if average: - aaae(got_xs, np.arange(6).reshape(2, 3)) - expected_fvecs = np.array( - [ - np.arange(10).reshape(2, 5).mean(axis=0), - np.arange(10, 25).reshape(3, 5).mean(axis=0), - ] - ) - aaae(got_fvecs, expected_fvecs) - else: - aaae(got_xs, np.arange(6).reshape(2, 3).repeat([2, 3], axis=0)) - aaae(got_fvecs, np.arange(25).reshape(5, 5)) diff --git a/tests/optimization/tranquilo/test_models.py b/tests/optimization/tranquilo/test_models.py deleted file mode 100644 index cffffa93e..000000000 --- a/tests/optimization/tranquilo/test_models.py +++ /dev/null @@ -1,190 +0,0 @@ -import numpy as np -import pytest -from estimagic.optimization.tranquilo.region import Region -from estimagic.optimization.tranquilo.models import ( - ScalarModel, - VectorModel, - _predict_scalar, - _predict_vector, - add_models, - is_second_order_model, - move_model, - n_free_params, - n_interactions, - n_second_order_terms, -) -from numpy.testing import assert_array_almost_equal as aaae -from numpy.testing import assert_array_equal - - -def test_predict_scalar(): - model = ScalarModel( - intercept=1.0, - linear_terms=np.arange(2), - square_terms=(np.arange(4) + 1).reshape(2, 2), - ) - x = np.array([[0, 0], [0, 1], [1, 0], [1, 2]]) - exp = np.array([1, 4, 1.5, 16.5]) - got = _predict_scalar(model, x) - assert_array_equal(exp, got) - - -def test_predict_vector(): - model = VectorModel( - intercepts=1 + np.arange(3), - linear_terms=np.arange(6).reshape(3, 2), - square_terms=(np.arange(3 * 2 * 2) + 1).reshape(3, 2, 2), - ) - x = np.array([[0, 0], [0, 1], [1, 0], [1, 2]], dtype=float) - exp = np.array( - [ - [1, 4, 1.5, 16.5], - [2, 9, 6.5, 41.5], - [3, 14, 11.5, 66.5], - ] - ).T - got = _predict_vector(model, x) - assert_array_equal(exp, got) - - -def test_n_free_params_name_quadratic(): - assert n_free_params(dim=2, model_type="quadratic") == 1 + 2 + 3 - assert n_free_params(dim=3, model_type="quadratic") == 1 + 3 + 6 - assert n_free_params(dim=9, model_type="quadratic") == 1 + 9 + 45 - - -def test_n_free_params_name_invalid(): - with pytest.raises(ValueError): - assert n_free_params(dim=3, model_type="invalid") - - -@pytest.mark.parametrize("dim", [2, 3, 9]) -def test_n_free_params_info_linear(dim): - assert n_free_params(dim, model_type="linear") == 1 + dim - - -@pytest.mark.parametrize("dim", [2, 3, 9]) -def test_n_free_params_info_quadratic(dim): - assert n_free_params(dim, model_type="quadratic") == 1 + dim + n_second_order_terms( - dim - ) - - -def test_n_free_params_invalid(): - model = ScalarModel(intercept=1.0, linear_terms=np.ones(1), square_terms=np.ones(1)) - with pytest.raises(ValueError): - n_free_params(dim=1, model_type=model) - - -def test_n_second_order_terms(): - assert n_second_order_terms(3) == 6 - - -def test_n_interactions(): - assert n_interactions(3) == 3 - - -@pytest.mark.parametrize("model_type", ("linear", "quadratic")) -def test_is_second_order_model_type(model_type): - assert is_second_order_model(model_type) == (model_type == "quadratic") - - -def test_is_second_order_model_model(): - model = ScalarModel(intercept=1.0, linear_terms=np.ones(1)) - assert is_second_order_model(model) is False - - model = ScalarModel(intercept=1.0, linear_terms=np.ones(1), square_terms=np.ones(1)) - assert is_second_order_model(model) is True - - -def test_is_second_order_model_invalid(): - model = np.linalg.lstsq - with pytest.raises(TypeError): - is_second_order_model(model) - - -@pytest.fixture() -def scalar_model(): - out = ScalarModel( - intercept=0.5, - linear_terms=np.array([-0.3, 0.3]), - square_terms=np.array([[0.8, 0.2], [0.2, 0.7]]), - shift=np.array([0.2, 0.3]), - scale=0.6, - ) - return out - - -@pytest.fixture() -def vector_model(): - out = VectorModel( - intercepts=np.array([0.5, 0.4, 0.3]), - linear_terms=np.array([[-0.3, 0.3], [-0.2, 0.1], [-0.2, 0.1]]), - square_terms=np.array( - [ - [[0.8, 0.2], [0.2, 0.7]], - [[0.6, 0.2], [0.2, 0.5]], - [[0.8, 0.2], [0.2, 0.7]], - ] - ), - shift=np.array([0.2, 0.3]), - scale=0.6, - ) - return out - - -def test_move_scalar_model(scalar_model): - old_region = Region(center=scalar_model.shift, radius=scalar_model.scale) - new_region = Region(center=np.array([-0.1, 0.1]), radius=0.45) - - old_model = scalar_model - x_unscaled = np.array([[0.5, 0.5]]) - x_old = old_region.map_to_unit(x_unscaled) - x_new = new_region.map_to_unit(x_unscaled) - - new_model = move_model(old_model, new_region) - - old_prediction = old_model.predict(x_old) - new_prediction = new_model.predict(x_new) - - assert new_model.scale == new_region.radius - aaae(new_model.shift, new_region.center) - - assert np.allclose(old_prediction, new_prediction) - - -def test_move_vector_model(vector_model): - old_region = Region(center=vector_model.shift, radius=vector_model.scale) - new_region = Region(center=np.array([-0.1, 0.1]), radius=0.45) - - old_model = vector_model - - x_unscaled = np.array([[0.5, 0.5]]) - x_old = old_region.map_to_unit(x_unscaled) - x_new = new_region.map_to_unit(x_unscaled) - - new_model = move_model(old_model, new_region) - - old_prediction = old_model.predict(x_old) - new_prediction = new_model.predict(x_new) - - assert new_model.scale == new_region.radius - aaae(new_model.shift, new_region.center) - - assert np.allclose(old_prediction, new_prediction) - - -def test_add_scalar_models(scalar_model): - got = add_models(scalar_model, scalar_model) - - assert got.intercept == scalar_model.intercept * 2 - aaae(got.linear_terms, scalar_model.linear_terms * 2) - aaae(got.square_terms, scalar_model.square_terms * 2) - - -def test_add_vector_models(vector_model): - got = add_models(vector_model, vector_model) - - assert np.allclose(got.intercepts, vector_model.intercepts * 2) - aaae(got.linear_terms, vector_model.linear_terms * 2) - aaae(got.square_terms, vector_model.square_terms * 2) diff --git a/tests/optimization/tranquilo/test_options.py b/tests/optimization/tranquilo/test_options.py deleted file mode 100644 index e8a6297de..000000000 --- a/tests/optimization/tranquilo/test_options.py +++ /dev/null @@ -1,56 +0,0 @@ -import pytest -from collections import namedtuple -from estimagic.optimization.tranquilo.options import ( - get_default_aggregator, - update_option_bundle, -) - - -def test_get_default_aggregator_scalar_quadratic(): - assert get_default_aggregator("scalar", "quadratic") == "identity" - - -def test_get_default_aggregator_error(): - with pytest.raises( - NotImplementedError, - match="The requested combination of functype and model_type is not supported.", - ): - get_default_aggregator("scalar", "linear") - - -@pytest.fixture -def default_options(): - options = namedtuple("default_options", "number") - return options(number=1) - - -def test_update_option_bundle_fast_path(): - assert update_option_bundle("whatever", user_options=None) == "whatever" - - -def test_update_option_bundle_dict(default_options): - got = update_option_bundle(default_options, user_options={"number": 2}) - assert got.number == 2 - - -def test_update_option_bundle_namedtuple(default_options): - user_option = default_options._replace(number=2) - got = update_option_bundle(default_options, user_options=user_option) - assert got.number == 2 - - -def test_update_option_bundle_convert_type(default_options): - got = update_option_bundle(default_options, user_options={"number": "2"}) - assert got.number == 2 - - -def test_update_option_bundle_wrong_type(default_options): - with pytest.raises(ValueError, match="invalid literal for int"): - update_option_bundle(default_options, user_options={"number": "not_a_number"}) - - -def test_update_option_bundle_invalid_field(default_options): - with pytest.raises( - ValueError, match="The following user options are not valid: {'not_a_field'}" - ): - update_option_bundle(default_options, user_options={"not_a_field": 10}) diff --git a/tests/optimization/tranquilo/test_poisedness.py b/tests/optimization/tranquilo/test_poisedness.py deleted file mode 100644 index 2a846d0a2..000000000 --- a/tests/optimization/tranquilo/test_poisedness.py +++ /dev/null @@ -1,388 +0,0 @@ -import numpy as np -import pytest -from estimagic.optimization.tranquilo.poisedness import ( - _get_minimize_options, - _lagrange_poly_matrix, - _reshape_coef_to_square_terms, - get_poisedness_constant, - improve_poisedness, -) -from numpy.testing import assert_array_almost_equal as aaae - - -def evaluate_scalar_model(x, intercept, linear_terms, square_terms): - return intercept + linear_terms.T @ x + 0.5 * x.T @ square_terms @ x - - -# ====================================================================================== -# Improve poisedness -# ====================================================================================== - -TEST_CASES = [ - ( - np.array( - [ - [-0.98, -0.96], - [-0.96, -0.98], - [0, 0], - [0.98, 0.96], - [0.96, 0.98], - [0.94, 0.94], - ] - ), - "sphere", - 5, - [ - 5324.240935366314, - 36.87996947175511, - 11.090857556966462, - 1.3893207179888898, - 1.0016763267639168, - ], - ), - ( - np.array( - [ - [-0.98, -0.96], - [-0.96, -0.98], - [0, 0], - [0.98, 0.96], - [0.96, 0.98], - [0.94, 0.94], - ] - ), - "cube", - 10, - [ - 10648.478006222356, - 49.998826793338836, - 13.145227394549012, - 1.0313287779903457, - 1.008398336326099, - 1.0306831620836225, - 1.0019247733166188, - 1.0044418474330754, - 1.0024393102571791, - 1.0017007017773365, - ], - ), - ( - np.array( - [ - [-0.98, -0], - [-0.96, -0.01], - [0, 0], - [-0.02, 0.98], - [0.03, -0.96], - [0.94, 0.06], - ] - ), - "sphere", - 5, - [ - 50.83088699521032, - 1.4010345122261196, - 1.109469103188152, - 1.0614725892080803, - 1.0368961283088556, - ], - ), - ( - np.array( - [ - [-0.98, 0.0], - [-0.56, -0.01], - [-0.3, -0.07], - [0.98, 0.02], - [0.46, 0.03], - [0.94, 0.06], - ] - ), - "sphere", - 5, - [ - 687.9333361325548, - 22.830295678507802, - 11.89595397927371, - 1.590858593504958, - 1.1143219029197806, - ], - ), -] - - -@pytest.mark.parametrize("sample, shape, maxiter, expected", TEST_CASES) -def test_improve_poisedness(sample, shape, maxiter, expected): - _, got_lambdas = improve_poisedness(sample=sample, shape=shape, maxiter=maxiter) - aaae(got_lambdas[-5:], expected[-5:], decimal=2) - - -# ====================================================================================== -# Lambda poisedness constant -# ====================================================================================== -TEST_CASES = [ - ( - np.array( - [ - [-0.98, -0.96], - [-0.96, -0.98], - [0, 0], - [0.98, 0.96], - [0.96, 0.98], - [0.94, 0.94], - ] - ), - 5324.241743151584, - ), - ( - np.array( - [ - [-0.98, -0.96], - [-0.96, -0.98], - [0.0, 0.0], - [0.98, 0.96], - [0.96, 0.98], - [-0.70710678, 0.70710678], - ] - ), - 36.87996947175511, - ), - ( - np.array( - [ - [-0.98, -0.96], - [-0.96, -0.98], - [0.0, 0.0], - [0.84885278, -0.52862932], - [0.96, 0.98], - [-0.70710678, 0.70710678], - ] - ), - 11.090857500607644, - ), - ( - np.array( - [ - [-0.98, -0.96], - [-0.02260674, 0.99974443], - [0.0, 0.0], - [0.84885278, -0.52862932], - [0.96, 0.98], - [-0.70710678, 0.70710678], - ] - ), - 1.3893205660280858, - ), - ( - np.array( - [ - [-0.98, -0.96], - [-0.02260674, 0.99974443], - [0.0, 0.0], - [0.84885278, -0.52862932], - [0.96, 0.98], - [-0.96706306, 0.2545369], - ] - ), - 1.0016763272061744, - ), -] - - -@pytest.mark.parametrize("sample, expected", TEST_CASES) -def test_poisedness_constant_scaled(sample, expected): - """Test cases are modified versions from :cite:`Conn2009` p. - - 99. - - """ - got, *_ = get_poisedness_constant(sample, shape="sphere") - assert np.allclose(got, expected) - - -TEST_CASES = [ - ( - np.array( - [ - [-0.98, -0.96], - [-0.96, -0.98], - [0, 0], - [0.98, 0.96], - [0.96, 0.98], - [0.94, 0.94], - ] - ), - 5324, - ), - ( - np.array( - [ - [-0.98, -0.96], - [-0.96, -0.98], - [0.0, 0.0], - [0.98, 0.96], - [0.96, 0.98], - [-0.707, 0.707], - ] - ), - 36.88, - ), - ( - np.array( - [ - [-0.967, 0.254], - [-0.96, -0.98], - [0, 0], - [0.98, 0.96], - [-0.199, 0.979], - [0.707, -0.707], - ] - ), - 1.001, - ), -] - - -@pytest.mark.parametrize("sample, expected", TEST_CASES) -def test_poisedness_constant_textbook_scaled(sample, expected): - """Test cases are taken from :cite:`Conn2009` p. - - 99. - - """ - got, *_ = get_poisedness_constant(sample, shape="sphere") - assert np.allclose(got, expected, rtol=1e-3) - - -TEST_CASES = [ - ( - np.array( - [ - [0.524, 0.0006], - [0.032, 0.323], - [0.187, 0.890], - [0.5, 0.5], - [0.982, 0.368], - [0.774, 0.918], - ] - ), - 1, - ) -] - - -@pytest.mark.parametrize("sample, expected", TEST_CASES) -def test_poisedness_constant_textbook_unscaled(sample, expected): - """This test case is taken from :cite:`Conn2009` p. - - 45. - - """ - n_params = sample.shape[1] - - radius = 0.5 - center = 0.5 * np.ones(n_params) - sample_scaled = (sample - center) / radius - - got, *_ = get_poisedness_constant(sample_scaled, shape="sphere") - assert np.allclose(got, expected, rtol=1e-3) - - -def test_invalid_shape_argument(): - with pytest.raises(ValueError): - assert _get_minimize_options(shape="ellipse", n_params=10) - - -# ====================================================================================== -# Lagrange polynomials -# ====================================================================================== - -TEST_CASES = [ - ( - np.array([[0, 0], [1, 0], [0, 1], [2, 0], [1, 1], [0, 2], [0.5, 0.5]]), - np.array( - [ - [ - 1, - -1.5, - -1.5, - 1, - 1, - 1, - ], - [ - 0, - 5 / 3, - -1 / 3, - -1.64705882e00, - -7.64705882e-01, - 3.52941176e-01, - ], - [ - 0, - -1 / 3, - 5 / 3, - 3.52941176e-01, - -7.64705882e-01, - -1.64705882e00, - ], - [ - 0, - -5 / 12, - 1 / 12, - 9.11764706e-01, - -5.88235294e-02, - -8.82352941e-02, - ], - [ - -0, - -1 / 6, - -1 / 6, - 1.76470588e-01, - 1.11764706e00, - 1.76470588e-01, - ], - [ - 0, - 1 / 12, - -5 / 12, - -8.82352941e-02, - -5.88235294e-02, - 9.11764706e-01, - ], - [ - 0, - 2 / 3, - 2 / 3, - -7.05882353e-01, - -4.70588235e-01, - -7.05882353e-01, - ], - ] - ), - np.array([1, 0.84, 0.84, 0.99, 0.96, 0.99, 0.37]), - ) -] - - -@pytest.mark.parametrize("sample, expected_lagrange_mat, expected_critval", TEST_CASES) -def test_lagrange_poly_matrix(sample, expected_lagrange_mat, expected_critval): - """This test case is taken from :cite:`Conn2009` p. - - 62. - - """ - sample = np.array([[0, 0], [1, 0], [0, 1], [2, 0], [1, 1], [0, 2], [0.5, 0.5]]) - n_params = sample.shape[1] - - lagrange_mat = _lagrange_poly_matrix(sample) - aaae(lagrange_mat, expected_lagrange_mat) - - for idx, lagrange_poly in enumerate(lagrange_mat): - intercept = lagrange_poly[0] - linear_terms = lagrange_poly[1 : n_params + 1] - _coef_square_terms = lagrange_poly[n_params + 1 :] - square_terms = _reshape_coef_to_square_terms(_coef_square_terms, n_params) - - got = evaluate_scalar_model(sample[idx], intercept, linear_terms, square_terms) - aaae(got, expected_critval[idx], decimal=2) diff --git a/tests/optimization/tranquilo/test_process_arguments.py b/tests/optimization/tranquilo/test_process_arguments.py deleted file mode 100644 index 8532e65ea..000000000 --- a/tests/optimization/tranquilo/test_process_arguments.py +++ /dev/null @@ -1,137 +0,0 @@ -"""Tests for the process_arguments function and subfunctions. - -When testing process_arguments we should only test the values of outputs that somehow -depend on the inputs, not the values with static defaults. - -""" -import pytest -import numpy as np -from estimagic.optimization.tranquilo.process_arguments import ( - process_arguments, - _process_batch_size, - _process_sample_size, - _process_model_type, - _process_search_radius_factor, - _process_acceptance_decider, - _process_model_fitter, - _process_residualize, - _process_n_evals_at_start, -) - - -def test_process_arguments_scalar_deterministic(): - res = process_arguments( - functype="scalar", - criterion=lambda x: x @ x, - x=np.array([-3, 1, 2]), - radius_options={"initial_radius": 1.0}, - ) - assert res["radius_options"].initial_radius == 1.0 - - -def test_process_batch_size(): - assert _process_batch_size(batch_size=2, n_cores=2) == 2 - assert _process_batch_size(batch_size=None, n_cores=3) == 3 - - -def test_process_batch_size_invalid(): - with pytest.raises(ValueError, match="batch_size must be"): - _process_batch_size(batch_size=1, n_cores=2) - - -def test_process_sample_size(): - x = np.arange(3) - assert _process_sample_size(sample_size=None, model_type="linear", x=x) == 4 - assert _process_sample_size(sample_size=None, model_type="quadratic", x=x) == 7 - assert _process_sample_size(10, None, None) == 10 - - -def test_process_sample_size_callable(): - x = np.arange(3) - sample_size = lambda x, model_type: len(x) ** 2 - assert _process_sample_size(sample_size=sample_size, model_type="linear", x=x) == 9 - - -def test_process_model_type(): - assert _process_model_type(model_type="linear", functype="scalar") == "linear" - assert _process_model_type(model_type=None, functype="scalar") == "quadratic" - assert _process_model_type(model_type=None, functype="least_squares") == "linear" - assert _process_model_type(model_type=None, functype="likelihood") == "linear" - - -def test_process_model_type_invalid(): - with pytest.raises(ValueError, match="model_type must be"): - _process_model_type(model_type="invalid", functype="scalar") - - -def test_process_search_radius_factor(): - assert _process_search_radius_factor(search_radius_factor=1.1, functype=None) == 1.1 - assert ( - _process_search_radius_factor(search_radius_factor=None, functype="scalar") - == 4.25 - ) - assert ( - _process_search_radius_factor( - search_radius_factor=None, functype="least_squares" - ) - == 5.0 - ) - - -def test_process_search_radius_factor_negative(): - with pytest.raises(ValueError, match="search_radius_factor must be"): - _process_search_radius_factor(-1, "scalar") - - -def test_process_acceptance_decider(): - assert _process_acceptance_decider(acceptance_decider=None, noisy=True) == "noisy" - assert ( - _process_acceptance_decider(acceptance_decider=None, noisy=False) == "classic" - ) - assert ( - _process_acceptance_decider(acceptance_decider="classic", noisy=None) - == "classic" - ) - - -def test_process_model_fitter(): - assert ( - _process_model_fitter( - model_fitter=None, model_type="quadratic", sample_size=3, x=np.arange(3) - ) - == "tranquilo" - ) - assert ( - _process_model_fitter( - model_fitter=None, model_type="linear", sample_size=4, x=np.arange(3) - ) - == "ols" - ) - assert ( - _process_model_fitter( - model_fitter="xyz", model_type=None, sample_size=None, x=None - ) - == "xyz" - ) - - -def test_process_residualize(): - assert _process_residualize(residualize=None, model_fitter="tranquilo") is True - assert _process_residualize(residualize=None, model_fitter="ols") is False - assert _process_residualize(residualize=False, model_fitter="custom") is False - - -def test_process_residualize_invalid(): - with pytest.raises(ValueError, match="residualize must be a boolean."): - _process_residualize(residualize="invalid", model_fitter=None) - - -def test_process_n_evals_at_start(): - assert _process_n_evals_at_start(n_evals=None, noisy=True) == 5 - assert _process_n_evals_at_start(n_evals=None, noisy=False) == 1 - assert _process_n_evals_at_start(n_evals=10, noisy=None) == 10 - - -def test_process_n_evals_at_start_negative(): - with pytest.raises(ValueError, match="n_initial_acceptance_evals must be"): - _process_n_evals_at_start(n_evals=-1, noisy=None) diff --git a/tests/optimization/tranquilo/test_region.py b/tests/optimization/tranquilo/test_region.py deleted file mode 100644 index c28c959dd..000000000 --- a/tests/optimization/tranquilo/test_region.py +++ /dev/null @@ -1,128 +0,0 @@ -import numpy as np -from estimagic.optimization.tranquilo.bounds import Bounds -from estimagic.optimization.tranquilo.region import ( - Region, - _any_bounds_binding, - _get_shape, - _get_cube_bounds, - _get_cube_center, - _get_effective_radius, - _get_effective_center, - _map_from_unit_cube, - _map_from_unit_sphere, - _map_to_unit_cube, - _map_to_unit_sphere, -) -from numpy.testing import assert_array_equal -import pytest - - -def test_map_to_unit_sphere(): - got = _map_to_unit_sphere(np.ones(2), center=2 * np.ones(1), radius=2) - assert_array_equal(got, -0.5 * np.ones(2)) - - -def test_map_to_unit_cube(): - bounds = Bounds(lower=np.zeros(2), upper=2 * np.ones(2)) - got = _map_to_unit_cube(np.ones(2), cube_bounds=bounds) - assert_array_equal(got, np.zeros(2)) - - -def test_map_from_unit_sphere(): - got = _map_from_unit_sphere(-0.5 * np.ones(2), center=2 * np.ones(1), radius=2) - assert_array_equal(got, np.ones(2)) - - -def test_map_from_unit_cube(): - bounds = Bounds(lower=np.zeros(2), upper=2 * np.ones(2)) - got = _map_from_unit_cube(np.zeros(2), cube_bounds=bounds) - assert_array_equal(got, np.ones(2)) - - -def test_any_bounds_binding_true(): - bounds = Bounds(lower=-np.ones(2), upper=np.ones(2)) - out = _any_bounds_binding(bounds, center=np.zeros(2), radius=2) - assert out - - -def test_any_bounds_binding_false(): - bounds = Bounds(lower=-np.ones(2), upper=np.ones(2)) - out = _any_bounds_binding(bounds, center=np.zeros(2), radius=0.5) - assert not out - - -def test_get_shape_sphere(): - out = _get_shape(center=np.zeros(2), radius=1, bounds=None) - assert out == "sphere" - - -def test_get_shape_cube(): - bounds = Bounds(lower=np.zeros(2), upper=np.ones(2)) - out = _get_shape(center=np.zeros(2), radius=1, bounds=bounds) - assert out == "cube" - - -def test_get_cube_bounds(): - bounds = Bounds(lower=-np.ones(2), upper=np.ones(2)) - out = _get_cube_bounds(center=np.zeros(2), radius=1, bounds=bounds, shape="sphere") - assert_array_equal(out.lower, bounds.lower) - assert_array_equal(out.upper, bounds.upper) - - -def test_get_cube_bounds_no_bounds(): - bounds = Bounds(lower=None, upper=None) - out = _get_cube_bounds(center=np.zeros(2), radius=1, bounds=bounds, shape="sphere") - assert_array_equal(out.lower, -np.ones(2)) - assert_array_equal(out.upper, np.ones(2)) - - -def test_get_cube_bounds_updated_upper_bounds(): - bounds = Bounds(lower=-2 * np.ones(2), upper=0.5 * np.ones(2)) - out = _get_cube_bounds(center=np.zeros(2), radius=1, bounds=bounds, shape="cube") - np.all(out.lower > -np.ones(2)) - np.all(out.lower < np.zeros(2)) - np.all(out.upper == 0.5 * np.ones(2)) - - -def test_get_cube_center(): - bounds = Bounds(lower=np.array([0, 0.5]), upper=np.array([1, 10])) - out = _get_cube_center(cube_bounds=bounds) - assert_array_equal(out, np.array([0.5, 5.25])) - - -def test_get_effective_radius(): - bounds = Bounds(lower=np.array([0, 0.5]), upper=np.array([1, 10])) - out = _get_effective_radius(shape="cube", radius=None, cube_bounds=bounds) - assert_array_equal(out, np.array([0.5, 4.75])) - - -def test_get_effective_center_sphere(): - out = _get_effective_center(shape="sphere", center=np.ones(2), cube_center=None) - assert_array_equal(out, np.ones(2)) - - -def test_get_effective_center_cube(): - out = _get_effective_center(shape="cube", center=None, cube_center=np.zeros(2)) - assert_array_equal(out, np.zeros(2)) - - -def test_region_non_binding_bounds(): - region = Region(center=np.zeros(2), radius=1) - assert region.shape == "sphere" - assert region.radius == 1 - assert region.bounds is None - with pytest.raises(AttributeError, match="The trustregion is a sphere"): - region.cube_bounds # noqa: B018 - with pytest.raises(AttributeError, match="The trustregion is a sphere"): - region.cube_center # noqa: B018 - - -def test_region_binding_bounds(): - bounds = Bounds(lower=-np.ones(2), upper=0.5 * np.ones(2)) - region = Region(center=np.zeros(2), radius=1, bounds=bounds) - assert region.shape == "cube" - assert region.radius == 1 - assert region.bounds is bounds - # shrinkage because cube radius is smaller than (spherical) radius - assert np.all(bounds.lower - region.cube_bounds.lower < 0) - assert_array_equal(region.cube_bounds.upper, bounds.upper) diff --git a/tests/optimization/tranquilo/test_rho_noise.py b/tests/optimization/tranquilo/test_rho_noise.py deleted file mode 100644 index dd62528b1..000000000 --- a/tests/optimization/tranquilo/test_rho_noise.py +++ /dev/null @@ -1,78 +0,0 @@ -import numpy as np -import pytest -from estimagic.optimization.tranquilo.aggregate_models import get_aggregator -from estimagic.optimization.tranquilo.fit_models import get_fitter -from estimagic.optimization.tranquilo.region import Region -from estimagic.optimization.tranquilo.bounds import Bounds -from estimagic.optimization.tranquilo.rho_noise import simulate_rho_noise -from estimagic.optimization.tranquilo.solve_subproblem import get_subsolver -from numpy.testing import assert_array_almost_equal as aaae - - -@pytest.mark.parametrize("functype", ["scalar", "least_squares"]) -def test_convergence_to_one_if_noise_is_tiny(functype): - """Test simulate_rho_noise. - - For the test, the "true" model is a standard sphere function. - - """ - xs = ( - np.array( - [ - [0.0, 0.0], - [0.0, 1.0], - [1.0, 0.0], - [-1.0, 0.0], - [0.0, -1.0], - ] - ) - + 0.5 - ) - - if functype == "least_squares": - fvecs = xs.copy() - model_type = "linear" - model_aggregator = get_aggregator( - aggregator="least_squares_linear", - ) - n_residuals = 2 - else: - fvecs = (xs**2).sum(axis=1).reshape(-1, 1) - model_type = "quadratic" - model_aggregator = get_aggregator( - aggregator="identity", - ) - n_residuals = 1 - - noise_cov = np.eye(n_residuals) * 1e-12 - - trustregion = Region(center=np.ones(2) * 0.5, radius=1.0, bounds=Bounds(None, None)) - model_fitter = get_fitter( - fitter="ols", - model_type=model_type, - residualize=False, - infinity_handling="relative", - ) - - vector_model = model_fitter( - xs, fvecs, weights=None, region=trustregion, old_model=None - ) - - subsolver = get_subsolver(sphere_solver="gqtpar", cube_solver="bntr") - - rng = np.random.default_rng(123) - - got = simulate_rho_noise( - xs=xs, - vector_model=vector_model, - trustregion=trustregion, - noise_cov=noise_cov, - model_fitter=model_fitter, - model_aggregator=model_aggregator, - subsolver=subsolver, - rng=rng, - n_draws=100, - ignore_corelation=True, - ) - - aaae(got, np.ones_like(got), decimal=4) diff --git a/tests/optimization/tranquilo/test_sample_points.py b/tests/optimization/tranquilo/test_sample_points.py deleted file mode 100644 index 5f264a28f..000000000 --- a/tests/optimization/tranquilo/test_sample_points.py +++ /dev/null @@ -1,171 +0,0 @@ -import numpy as np -import pytest -from estimagic.optimization.tranquilo.bounds import Bounds -from estimagic.optimization.tranquilo.region import Region -from estimagic.optimization.tranquilo.sample_points import ( - _draw_from_distribution, - _minimal_pairwise_distance_on_hull, - _project_onto_unit_hull, - get_sampler, -) -from numpy.testing import assert_array_almost_equal as aaae -from scipy.spatial.distance import pdist - -SAMPLERS = ["random_interior", "random_hull", "optimal_hull"] - - -@pytest.mark.parametrize("sampler", SAMPLERS) -def test_samplers(sampler): - _sampler = get_sampler(sampler) - trustregion = Region(center=np.array([0.0, 0]), radius=1.5, bounds=None) - sample = _sampler( - trustregion=trustregion, - n_points=5, - rng=np.random.default_rng(1234), - ) - assert len(sample) == 5 - assert np.all(-1.5 <= sample) - assert np.all(sample <= 1.5) - - -@pytest.mark.parametrize("sampler", SAMPLERS) -def test_bounds_are_satisfied(sampler): - bounds = Bounds(lower=np.array([-2.0, -2.0]), upper=np.array([0.25, 0.5])) - _sampler = get_sampler(sampler) - trustregion = Region(center=np.array([0.0, 0]), radius=1.5, bounds=bounds) - sample = _sampler( - trustregion=trustregion, - n_points=5, - rng=np.random.default_rng(1234), - ) - lower = np.full_like(sample, bounds.lower) - upper = np.full_like(sample, bounds.upper) - assert np.all(lower <= sample) - assert np.all(sample <= upper) - - -@pytest.mark.parametrize("sampler", SAMPLERS) -def test_enough_existing_points(sampler): - # test that if enough existing points exist an empty array is returned - sampler = get_sampler(sampler=sampler) - bounds = Bounds(lower=-np.ones(3), upper=np.ones(3)) - calculated = sampler( - trustregion=Region(center=np.zeros(3), radius=1, bounds=bounds), - n_points=0, - existing_xs=np.empty((5, 3)), - rng=np.random.default_rng(1234), - ) - - assert calculated.size == 0 - - -def test_optimization_ignores_existing_points(): - # test that existing points behave as constants in the optimal sampling - sampler = get_sampler(sampler="optimal_hull") - bounds = Bounds(lower=-np.ones(3), upper=np.ones(3)) - calculated = sampler( - trustregion=Region(center=np.zeros(3), radius=1, bounds=bounds), - n_points=3, - existing_xs=np.ones((2, 3)), # same point implies min distance of zero always - rng=np.random.default_rng(1234), - ) - - assert pdist(calculated).min() > 0 - - -def test_optimality(): - # test that optimal versions of hull samplers produce better criterion value - standard_sampler = get_sampler(sampler="random_hull") - optimal_sampler = get_sampler(sampler="optimal_hull") - bounds = Bounds(lower=-np.ones(3), upper=np.ones(3)) - distances = [] - for sampler in [standard_sampler, optimal_sampler]: - sample = sampler( - trustregion=Region(center=np.zeros(3), radius=1, bounds=bounds), - n_points=5, - rng=np.random.default_rng(1234), - ) - distances.append(pdist(sample).min()) - - assert distances[1] > distances[0] - - -@pytest.mark.parametrize("n_points_randomsearch", [1, 2, 5, 10]) -def test_randomsearch(n_points_randomsearch): - # test that initial randomsearch of hull samplers produce better fekete values - - bounds = Bounds(lower=-np.ones(3), upper=np.ones(3)) - - _sampler = get_sampler("optimal_hull") - - # optimal sampling without randomsearch - _, info = _sampler( - trustregion=Region(center=np.zeros(3), radius=1, bounds=bounds), - n_points=5, - rng=np.random.default_rng(0), - return_info=True, - ) - - # optimal sampling with randomsearch - _, info_randomsearch = _sampler( - trustregion=Region(center=np.zeros(3), radius=1, bounds=bounds), - n_points=5, - rng=np.random.default_rng(0), - n_points_randomsearch=n_points_randomsearch, - return_info=True, - ) - - for key in ["start_fekete", "opt_fekete"]: - statement = info_randomsearch[key] >= info[key] or np.isclose( - info_randomsearch[key], info[key], rtol=1e-3 - ) - assert statement - - -@pytest.mark.parametrize("trustregion_shape", ("sphere", "cube")) -def test_pairwise_distance_on_hull(trustregion_shape): - # equal points imply zero distance - value = _minimal_pairwise_distance_on_hull( - x=np.ones(4), - existing_xs=None, - hardness=1, - trustregion_shape=trustregion_shape, - n_params=2, - ) - assert value == 0 - - # non-equal points imply positive distance - value = _minimal_pairwise_distance_on_hull( - x=np.arange(4), - existing_xs=None, - hardness=1, - trustregion_shape=trustregion_shape, - n_params=2, - ) - assert value > 0 - - -@pytest.mark.parametrize("trustregion_shape", ("sphere", "cube")) -def test_project_onto_unit_hull(trustregion_shape): - rng = np.random.default_rng(1234) - old = rng.uniform(-1, 1, size=10).reshape(5, 2) - new = _project_onto_unit_hull(old, trustregion_shape=trustregion_shape) - - order = 2 if trustregion_shape == "sphere" else np.inf - - norm = np.linalg.norm(old, axis=1, ord=order) - with pytest.raises(AssertionError): - aaae(1, norm) - - norm = np.linalg.norm(new, axis=1, ord=order) - aaae(1, norm) - - -@pytest.mark.parametrize("distribution", ["normal", "uniform"]) -def test_draw_from_distribution(distribution): - rng = np.random.default_rng() - draw = _draw_from_distribution(distribution, rng=rng, size=(3, 2)) - if distribution == "uniform": - assert (-1 <= draw).all() - assert (draw <= 1).all() - assert draw.shape == (3, 2) diff --git a/tests/optimization/tranquilo/test_solve_subproblem.py b/tests/optimization/tranquilo/test_solve_subproblem.py deleted file mode 100644 index 7fda79396..000000000 --- a/tests/optimization/tranquilo/test_solve_subproblem.py +++ /dev/null @@ -1,45 +0,0 @@ -import numpy as np -import pytest -from estimagic.optimization.tranquilo.models import ScalarModel -from estimagic.optimization.tranquilo.solve_subproblem import get_subsolver -from estimagic.optimization.tranquilo.region import Region -from estimagic.optimization.tranquilo.bounds import Bounds -from numpy.testing import assert_array_almost_equal as aaae - -solvers = ["gqtpar", "gqtpar_fast"] - - -@pytest.mark.slow() -@pytest.mark.parametrize("solver_name", solvers, ids=solvers) -def test_without_bounds(solver_name): - linear_terms = np.array([-0.0005429824695352, -0.1032556117176, -0.06816855282091]) - quadratic_terms = np.array( - [ - [2.05714077e-02, 7.58182390e-01, 9.00050279e-01], - [7.58182390e-01, 6.25867992e01, 4.20096648e01], - [9.00050279e-01, 4.20096648e01, 4.03810858e01], - ] - ) - - expected_x = np.array( - [ - -0.9994584757179, - -0.007713730538474, - 0.03198833730482, - ] - ) - - model = ScalarModel( - intercept=0, linear_terms=linear_terms, square_terms=quadratic_terms - ) - - trustregion = Region(center=np.zeros(3), radius=1, bounds=Bounds(None, None)) - - solve_subproblem = get_subsolver(sphere_solver=solver_name, cube_solver="bntr") - - calculated = solve_subproblem( - model=model, - trustregion=trustregion, - ) - - aaae(calculated.x, expected_x) diff --git a/tests/optimization/tranquilo/test_tranquilo.py b/tests/optimization/tranquilo/test_tranquilo.py deleted file mode 100644 index e628a3d1d..000000000 --- a/tests/optimization/tranquilo/test_tranquilo.py +++ /dev/null @@ -1,234 +0,0 @@ -import itertools - -import numpy as np -import pytest -from estimagic.optimization.optimize import minimize -from estimagic.optimization.tranquilo.tranquilo import ( - tranquilo, - tranquilo_ls, -) -from numpy.testing import assert_array_almost_equal as aaae - -# ====================================================================================== -# Test tranquilo end-to-end -# ====================================================================================== - - -def _product(sample_filter, model_fitter, model_type): - # is used to create products of test cases - return list(itertools.product(sample_filter, model_fitter, model_type)) - - -# ====================================================================================== -# Scalar Tranquilo -# ====================================================================================== - -TEST_CASES = { - "ols": { - "sample_filter": ["discard_all", "keep_all"], - "model_fitter": ["ols"], - "model_type": ["quadratic"], - }, - "ols_keep_all": { - "sample_filter": ["keep_all"], - "model_fitter": ["ols"], - "model_type": ["quadratic"], - }, - "pounders_discard_all": { - "sample_filter": ["discard_all"], - "model_fitter": ["powell"], - "model_type": ["quadratic"], - }, - "pounders_keep_all": { - "sample_filter": ["keep_all"], - "model_fitter": ["powell"], - "model_type": ["quadratic"], - }, -} - -TEST_CASES = [_product(**kwargs) for kwargs in TEST_CASES.values()] -TEST_CASES = itertools.chain.from_iterable(TEST_CASES) - - -@pytest.mark.parametrize("sample_filter, model_fitter, model_type", TEST_CASES) -def test_internal_tranquilo_scalar_sphere_defaults( - sample_filter, - model_fitter, - model_type, -): - res = tranquilo( - criterion=lambda x: x @ x, - x=np.arange(4), - sample_filter=sample_filter, - model_fitter=model_fitter, - model_type=model_type, - ) - aaae(res["solution_x"], np.zeros(4), decimal=4) - - -# ====================================================================================== -# Imprecise options for scalar tranquilo -# ====================================================================================== - -TEST_CASES = { - "ls_keep": { - "sample_filter": ["keep_all"], - "model_fitter": ["ols"], - "model_type": ["quadratic"], - }, - "pounders_discard_all": { - "sample_filter": ["discard_all"], - "model_fitter": ["powell"], - "model_type": ["quadratic"], - }, -} - -TEST_CASES = [_product(**kwargs) for kwargs in TEST_CASES.values()] -TEST_CASES = itertools.chain.from_iterable(TEST_CASES) - - -@pytest.mark.parametrize("sample_filter, model_fitter, model_type", TEST_CASES) -def test_internal_tranquilo_scalar_sphere_imprecise_defaults( - sample_filter, - model_fitter, - model_type, -): - res = tranquilo( - criterion=lambda x: x @ x, - x=np.arange(4), - sample_filter=sample_filter, - model_fitter=model_fitter, - model_type=model_type, - ) - aaae(res["solution_x"], np.zeros(4), decimal=3) - - -# ====================================================================================== -# External -# ====================================================================================== - - -def test_external_tranquilo_scalar_sphere_defaults(): - res = minimize( - criterion=lambda x: x @ x, - params=np.arange(4), - algorithm="tranquilo", - ) - - aaae(res.params, np.zeros(4), decimal=4) - - -# ====================================================================================== -# Least-squares Tranquilo -# ====================================================================================== - - -TEST_CASES = { - "ols": { - "sample_filter": ["keep_all", "discard_all"], - "model_fitter": ["ols"], - "model_type": ["linear"], - }, - "tranquilo": { - "sample_filter": ["keep_all", "discard_all"], - "model_fitter": ["tranquilo"], - "model_type": ["linear"], - }, -} - -TEST_CASES = [_product(**kwargs) for kwargs in TEST_CASES.values()] -TEST_CASES = itertools.chain.from_iterable(TEST_CASES) - - -@pytest.mark.parametrize("sample_filter, model_fitter, model_type", TEST_CASES) -def test_internal_tranquilo_ls_sphere_defaults( - sample_filter, - model_fitter, - model_type, -): - res = tranquilo_ls( - criterion=lambda x: x, - x=np.arange(5), - sample_filter=sample_filter, - model_fitter=model_fitter, - model_type=model_type, - ) - aaae(res["solution_x"], np.zeros(5), decimal=5) - - -# ====================================================================================== -# External -# ====================================================================================== - - -def test_external_tranquilo_ls_sphere_defaults(): - res = minimize( - criterion=lambda x: x, - params=np.arange(5), - algorithm="tranquilo_ls", - ) - - aaae(res.params, np.zeros(5), decimal=5) - - -# ====================================================================================== -# Noisy case -# ====================================================================================== - - -@pytest.mark.parametrize("algo", ["tranquilo", "tranquilo_ls"]) -def test_tranquilo_with_noise_handling_and_deterministic_function(algo): - def _f(x): - return {"root_contributions": x, "value": x @ x} - - res = minimize( - criterion=_f, - params=np.arange(5), - algorithm=algo, - algo_options={"noisy": True}, - ) - - aaae(res.params, np.zeros(5), decimal=3) - - -@pytest.mark.slow() -def test_tranquilo_ls_with_noise_handling_and_noisy_function(): - rng = np.random.default_rng(123) - - def _f(x): - x_n = x + rng.normal(0, 0.05, size=x.shape) - return {"root_contributions": x_n, "value": x_n @ x_n} - - res = minimize( - criterion=_f, - params=np.ones(3), - algorithm="tranquilo", - algo_options={"noisy": True, "n_evals_per_point": 10}, - ) - - aaae(res.params, np.zeros(3), decimal=1) - - -# ====================================================================================== -# Bounded case -# ====================================================================================== - - -def sum_of_squares(x): - contribs = x**2 - return {"value": contribs.sum(), "contributions": contribs, "root_contributions": x} - - -@pytest.mark.parametrize("algorithm", ["tranquilo", "tranquilo_ls"]) -def test_tranquilo_with_binding_bounds(algorithm): - res = minimize( - criterion=sum_of_squares, - params=np.array([3, 2, -3]), - lower_bounds=np.array([1, -np.inf, -np.inf]), - upper_bounds=np.array([np.inf, np.inf, -1]), - algorithm=algorithm, - collect_history=True, - skip_checks=True, - ) - assert res.success in [True, None] - aaae(res.params, np.array([1, 0, -1]), decimal=3) diff --git a/tests/optimization/tranquilo/test_volume.py b/tests/optimization/tranquilo/test_volume.py deleted file mode 100644 index e09c500fa..000000000 --- a/tests/optimization/tranquilo/test_volume.py +++ /dev/null @@ -1,104 +0,0 @@ -import numpy as np -import pytest -from estimagic.optimization.tranquilo.volume import ( - _cube_radius, - _cube_volume, - _sphere_radius, - _sphere_volume, - get_radius, - get_radius_after_volume_scaling, - get_radius_of_cube_with_volume_of_sphere, - get_radius_of_sphere_with_volume_of_cube, - get_volume, -) - -dims = dims = [1, 2, 3, 4, 12, 13, 15] -coeffs = [ - 2, - np.pi, - 4 * np.pi / 3, - np.pi**2 / 2, - np.pi**6 / 720, - 128 * np.pi**6 / 135135, - 256 * np.pi**7 / 2027025, -] - - -@pytest.mark.parametrize("dim", dims) -def test_get_radius_of_sphere_with_volume_of_cube(dim): - cube_radius = 1.5 - scaling_factor = 0.95 - vol = _cube_volume(cube_radius, dim) * scaling_factor - expected = _sphere_radius(vol, dim) - got = get_radius_of_sphere_with_volume_of_cube(cube_radius, dim, scaling_factor) - assert np.allclose(got, expected) - - -@pytest.mark.parametrize("dim", dims) -def test_get_radius_of_cube_with_volume_of_sphere(dim): - sphere_radius = 1.5 - scaling_factor = 0.95 - vol = _sphere_volume(sphere_radius, dim) * scaling_factor - expected = _cube_radius(vol, dim) - got = get_radius_of_cube_with_volume_of_sphere(sphere_radius, dim, scaling_factor) - assert np.allclose(got, expected) - - -def test_get_radius_of_sphere_with_volume_of_cube_no_scaling(): - v1 = get_radius_of_sphere_with_volume_of_cube(2.0, 2, None) - v2 = get_radius_of_sphere_with_volume_of_cube(2.0, 2, 1.0) - assert v1 == v2 - - -def test_get_radius_of_cube_with_volume_of_sphere_no_scaling(): - v1 = get_radius_of_cube_with_volume_of_sphere(2.0, 2, None) - v2 = get_radius_of_cube_with_volume_of_sphere(2.0, 2, 1.0) - assert v1 == v2 - - -@pytest.mark.parametrize("dim", dims) -def test_radius_after_volume_rescaling_scaling_factor_sphere(dim): - radius = 0.6 - scaling_factor = 0.9 - - naive = _sphere_radius(_sphere_volume(radius, dim) * scaling_factor, dim) - - got = get_radius_after_volume_scaling(radius, dim, scaling_factor) - - assert np.allclose(got, naive) - - -@pytest.mark.parametrize("dim", dims) -def test_radius_after_volume_rescaling_scaling_factor_cube(dim): - radius = 0.6 - scaling_factor = 0.9 - - naive = _cube_radius(_cube_volume(radius, dim) * scaling_factor, dim) - - got = get_radius_after_volume_scaling(radius, dim, scaling_factor) - - assert np.allclose(got, naive) - - -@pytest.mark.parametrize("dim, coeff", list(zip(dims, coeffs))) -def test_shpere_volume_and_radius(dim, coeff): - radius = 0.5 - expected_volume = coeff * radius**dim - got_volume = get_volume(radius, dim, "sphere") - assert np.allclose(got_volume, expected_volume) - - got_radius = get_radius(got_volume, dim, "sphere") - assert np.allclose(got_radius, radius) - - -@pytest.mark.parametrize("dim", dims) -def test_cube_volume_and_radius(dim): - radius = 0.6 - - expected_volume = 1.2**dim - - got_volume = get_volume(radius, dim, "cube") - assert np.allclose(got_volume, expected_volume) - - got_radius = get_radius(got_volume, dim, "cube") - assert np.allclose(got_radius, radius) diff --git a/tests/optimization/tranquilo/test_weighting.py b/tests/optimization/tranquilo/test_weighting.py deleted file mode 100644 index 201bad583..000000000 --- a/tests/optimization/tranquilo/test_weighting.py +++ /dev/null @@ -1,7 +0,0 @@ -import numpy as np -from estimagic.optimization.tranquilo.weighting import get_sample_weighter - - -def test_no_weighting(): - weight_points = get_sample_weighter(weighter="no_weights", bounds=None) - assert weight_points(np.ones((4, 3)), trustregion=None) is None diff --git a/tests/optimization/tranquilo/test_wrap_criterion.py b/tests/optimization/tranquilo/test_wrap_criterion.py deleted file mode 100644 index 3886a36cd..000000000 --- a/tests/optimization/tranquilo/test_wrap_criterion.py +++ /dev/null @@ -1,61 +0,0 @@ -import itertools - -import numpy as np -import pytest -from estimagic.optimization.tranquilo.history import History -from estimagic.optimization.tranquilo.wrap_criterion import get_wrapped_criterion -from numpy.testing import assert_array_almost_equal as aaae - -TEST_CASES = list(itertools.product(["scalar", "least_squares", "likelihood"], [1, 2])) - - -@pytest.mark.parametrize("functype, n_evals", TEST_CASES) -def test_wrapped_criterion(functype, n_evals): - # set up criterion (all should have same results) - func_dict = { - "least_squares": lambda x: x, - "likelihood": lambda x: x**2, - "scalar": lambda x: x @ x, - } - - criterion = func_dict[functype] - - # set up history - history = History(functype=functype) - for params in [np.zeros(3), np.ones(3)]: - idxs = history.add_xs(params) - history.add_evals(idxs, criterion(params)) - - assert history.get_n_fun() == 2 - - wrapped_criterion = get_wrapped_criterion( - criterion=criterion, batch_evaluator="joblib", n_cores=1, history=history - ) - - # set up params and expected results - if n_evals == 1: - params = np.arange(3) - history.add_xs(params) - expected_fvecs = criterion(params) - expected_fvals = params @ params - expected_indices = 2 - eval_info = {2: 1} - else: - params = np.arange(3 * n_evals).reshape(n_evals, 3) - history.add_xs(params) - expected_fvecs = np.array([criterion(x) for x in params]).reshape(2, -1) - expected_fvals = np.array([x @ x for x in params]) - expected_indices = np.arange(2, 2 + n_evals) - eval_info = {idx: 1 for idx in expected_indices} - - # use wrapped_criterion - wrapped_criterion(eval_info) - - assert history.get_n_fun() == 2 + n_evals - assert history.get_n_xs() == 2 + n_evals - - got_fvecs = history.fvecs[expected_indices] - aaae(got_fvecs, expected_fvecs) - - got_fvals = history.fvals[expected_indices] - aaae(got_fvals, expected_fvals) diff --git a/tests/visualization/test_visualize_tranquilo.py b/tests/visualization/test_visualize_tranquilo.py deleted file mode 100644 index 9d2d2dfd3..000000000 --- a/tests/visualization/test_visualize_tranquilo.py +++ /dev/null @@ -1,40 +0,0 @@ -import pytest -from estimagic import get_benchmark_problems, minimize -from estimagic.visualization.visualize_tranquilo import visualize_tranquilo - -cases = [] -algo_options = { - "random_hull": { - "sampler": "random_hull", - "subsolver": "gqtpar_fast", - "sample_filter": "keep_all", - "stopping.max_iterations": 10, - }, - "optimal_hull": { - "sampler": "optimal_hull", - "subsolver": "gqtpar_fast", - "sample_filter": "keep_all", - "stopping.max_iterations": 10, - }, -} -for problem in ["rosenbrock_good_start", "watson_6_good_start"]: - inputs = get_benchmark_problems("more_wild")[problem]["inputs"] - criterion = inputs["criterion"] - start_params = inputs["params"] - for algo in ["tranquilo", "tranquilo_ls"]: - results = {} - for s, options in algo_options.items(): - results[s] = minimize( - criterion=criterion, - params=start_params, - algorithm=algo, - algo_options=options, - ) - cases.append(results) - - -@pytest.mark.parametrize("results", cases) -def test_visualize_tranquilo(results): - visualize_tranquilo(results, 5) - for res in results.values(): - visualize_tranquilo(res, [1, 5])