From c1376dd5a547cdbc7d07b39297cacc1d11850190 Mon Sep 17 00:00:00 2001 From: Martin Fitzner Date: Fri, 4 Oct 2024 14:02:21 +0200 Subject: [PATCH 01/14] Add files --- docs/userguide/userguide.md | 1 + docs/userguide/utils.md | 12 ++++++++++++ 2 files changed, 13 insertions(+) create mode 100644 docs/userguide/utils.md diff --git a/docs/userguide/userguide.md b/docs/userguide/userguide.md index 07e275be5..be252f3d5 100644 --- a/docs/userguide/userguide.md +++ b/docs/userguide/userguide.md @@ -15,4 +15,5 @@ Simulation Surrogates Targets Transfer Learning +Utilities ``` \ No newline at end of file diff --git a/docs/userguide/utils.md b/docs/userguide/utils.md new file mode 100644 index 000000000..fc157b7b0 --- /dev/null +++ b/docs/userguide/utils.md @@ -0,0 +1,12 @@ +# Utilities + +BayBE comes with a set of useful functions that can make your life easier in certain +scenarios. + +## Search Space Memory Size Estimation + +## Reproducibility + +## Add Fake Target Measurements + + From 1cbf18b58d36f409603cd10cca0d962f4d360e5d Mon Sep 17 00:00:00 2001 From: Martin Fitzner Date: Fri, 4 Oct 2024 14:02:34 +0200 Subject: [PATCH 02/14] Describe memory utility --- docs/userguide/utils.md | 72 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) diff --git a/docs/userguide/utils.md b/docs/userguide/utils.md index fc157b7b0..f841fd1f2 100644 --- a/docs/userguide/utils.md +++ b/docs/userguide/utils.md @@ -4,6 +4,78 @@ BayBE comes with a set of useful functions that can make your life easier in cer scenarios. ## Search Space Memory Size Estimation +In search spaces that have discrete parts, the memory needed to store the respective +data can become excessively large as the number of points grows with the amount of +possible combinations arising form all discrete parameter values. + +The [`estimate_product_space_size`](baybe.searchspace.SearchSpace.estimate_product_space_size) +utility allows estimating the memory needed to represent the discrete subspace. +It will return a [`MemorySize`](baybe.searchspace.discrete.MemorySize) object that +contains some relevant estimates. + +```python +import numpy as np + +from baybe.parameters import NumericalDiscreteParameter +from baybe.searchspace import SearchSpace + +# This will create 10 parameters with 20 values each +# The resulting space would have 20^10 entries, requiring around 745 TB of memory for +# both experimental and computational representation of the search space +parameters = [ + NumericalDiscreteParameter(name=f"p{k+1}", values=np.linspace(0, 100, 20)) + for k in range(10) +] + +# Estimate the required memory for such a space in Bytes +mem_estimate = SearchSpace.estimate_product_space_size(parameters) + +# Print quantities of interest +print("Experimental Representation") +print(f"Estimated size: {mem_estimate.exp_rep_human_readable}") +print(f"Estimated size in Bytes: {mem_estimate.exp_rep_bytes}") +print(f"Expected data frame shape: {mem_estimate.exp_rep_shape}") + +print("Computational Representation") +print(f"Estimated size: {mem_estimate.comp_rep_human_readable}") +print(f"Estimated size in Bytes: {mem_estimate.comp_rep_bytes}") +print(f"Expected data frame shape: {mem_estimate.comp_rep_shape}") +``` + +```{admonition} Estimate with Constraints +:class: warning +`estimate_product_space_size` currently does not include the influence of potential +constraints in your search space as it is generally very hard to incorporate the effect +of arbitrary constraints without actually buidling the entire space. Hence, you should +always **treat the number you get as upper bound** of required memory. This can still be +useful - for instance if your estimate already is several Exabytes, it is unlikely that +most computers would be able to handle the result even if there are constraints present. +``` + +```{admonition} Influence of Continuous Parameters +:class: info +Continuous parameters fo not influence the size of the discrete search space part. +Hence, they are ignored by the utility. +``` + +```{admonition} Memory During Optimization +:class: warning +`estimate_product_space_size` only estimates the memory required to handle the search +space. **It does not estimate the memory required during optimization**, which can be +of a similar magnitude, but generally depends on additional factors. +``` + +```{admonition} Effective Search Space Creation for Mixtures +:class: tip +If you run into issues creating large search spaces, as for instance for mixtures, you +can try to use the [`SubspaceDiscrete.from_simplex`](baybe.searchspace.discrete.SubspaceDiscrete.from_simplex) +constructor. Instead of creating the search space completely before filtering it down +according to the constraints, this constructor includes the main mixture constraint +already during the Cartesian product, requiring substantially less memory overall. In +addition, BayBE can also be installed with an optional `polars` dependency (`pip install +baybe[polars]`) that will utilize the more efficient machinery form polars for handling +of the search space and its constraints. +``` ## Reproducibility From 5fbf715f509f53ca208ebfea54d02c100d35703e Mon Sep 17 00:00:00 2001 From: Martin Fitzner Date: Fri, 4 Oct 2024 14:47:18 +0200 Subject: [PATCH 03/14] Describe reproducibility and fake targets --- docs/userguide/utils.md | 53 +++++++++++++++++++++++++++++++++++++---- 1 file changed, 49 insertions(+), 4 deletions(-) diff --git a/docs/userguide/utils.md b/docs/userguide/utils.md index f841fd1f2..09b64d99a 100644 --- a/docs/userguide/utils.md +++ b/docs/userguide/utils.md @@ -3,13 +3,14 @@ BayBE comes with a set of useful functions that can make your life easier in certain scenarios. -## Search Space Memory Size Estimation +## Search Space Memory Estimation In search spaces that have discrete parts, the memory needed to store the respective data can become excessively large as the number of points grows with the amount of possible combinations arising form all discrete parameter values. -The [`estimate_product_space_size`](baybe.searchspace.SearchSpace.estimate_product_space_size) -utility allows estimating the memory needed to represent the discrete subspace. +The [`SearchSpace.estimate_product_space_size`](baybe.searchspace.core.SearchSpace.estimate_product_space_size) +and [`SubspaceDiscrete.estimate_product_space_size`](baybe.searchspace.discrete.SubspaceDiscrete.estimate_product_space_size) +utilities allows estimating the memory needed to represent the discrete subspace. It will return a [`MemorySize`](baybe.searchspace.discrete.MemorySize) object that contains some relevant estimates. @@ -78,7 +79,51 @@ of the search space and its constraints. ``` ## Reproducibility +In some scenarios, for instance when testing your code setup, it can be useful to fix +the random seeds for all relevant engines to generate reproducible results. BayBE offers +the [`set_random_seed`](baybe.utils.random.set_random_seed) utility for this purpose: -## Add Fake Target Measurements +```python +from baybe.utils.random import set_random_seed + +# Set the global random seed for all relevant engines +set_random_seed(1337) + +# Assuming we have a prepared campaign +campaign.recommend(5) +``` + +Setting the global random seed can be undesirable if there are other packages in your +setup. For this, BayBE offers [`temporary_seed`](baybe.utils.random.temporary_seed): + +```python +from baybe.utils.random import temporary_seed +# Set the random seed for all relevant engines temporarily within the context +with temporary_seed(1337): + campaign.recommend(5) +``` + +## Add Fake Target Measurements and Noise +When creating test scripts, it is often useful to try the recommendation loop for a few +iterations. However, this requires some arbitrary target measurements to be set. Instead +of coming up with a custom logic every time, you can use the +[`add_fake_results`](baybe.utils.dataframe.add_fake_results) utility to add fake target +measurements and the [`add_parameter_noise`](baybe.utils.dataframe.add_parameter_noise) +utility to add artificial parameter noise: + +```python +from baybe.utils.dataframe import add_fake_results, add_parameter_noise +# Get recommendations +recommendations = campaign.recommend(5) + +# Add fake target measurements and artificial parameter noise to the recommendations +# The utilities will modify the data frames inplace +measurements = recommendations.copy() +add_fake_results(measurements, campaign.targets) +add_parameter_noise(measurements, campaign.parameters) + +# Continue the loop by adding the fake results +campaign.add_measurements(measurements) +``` From 7230b642b4f95a4c7ebc03f5943aafd46866dc7b Mon Sep 17 00:00:00 2001 From: Martin Fitzner Date: Fri, 4 Oct 2024 16:26:03 +0200 Subject: [PATCH 04/14] Fix list comprehension scope --- docs/userguide/async.md | 6 ++++-- docs/userguide/utils.md | 3 +-- tests/docs/test_docs.py | 6 +++++- 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/docs/userguide/async.md b/docs/userguide/async.md index 177d4d772..831c69a0b 100644 --- a/docs/userguide/async.md +++ b/docs/userguide/async.md @@ -65,6 +65,8 @@ Akin to `measurements` or `recommendations`, `pending_experiments` is a datafram In the following example, we get a set of recommendations, add results for half of them, and start the next recommendation, marking the other half pending: ```python +from baybe.utils.dataframe import add_fake_results + # Get a set of 10 recommendation rec = campaign.recommend(batch_size=10) @@ -72,8 +74,8 @@ rec = campaign.recommend(batch_size=10) rec_finished = rec.iloc[:5] rec_pending = rec.iloc[5:] -# Add target measurements to the finished part. Here we add a random number -rec_finished["Target_max"] = 1337 +# Add target measurements to the finished part. Here we add fake results +add_fake_results(rec_finished, campaign.targets) campaign.add_measurements(rec_finished) # Get the next set of recommendations, incorporating the still unfinished experiments. diff --git a/docs/userguide/utils.md b/docs/userguide/utils.md index 09b64d99a..4660af16f 100644 --- a/docs/userguide/utils.md +++ b/docs/userguide/utils.md @@ -124,6 +124,5 @@ measurements = recommendations.copy() add_fake_results(measurements, campaign.targets) add_parameter_noise(measurements, campaign.parameters) -# Continue the loop by adding the fake results -campaign.add_measurements(measurements) +# Now continue the loop, e.g by adding the measurements... ``` diff --git a/tests/docs/test_docs.py b/tests/docs/test_docs.py index e7286d118..2d5e45d46 100644 --- a/tests/docs/test_docs.py +++ b/tests/docs/test_docs.py @@ -29,7 +29,11 @@ def test_code_executability(file: Path, campaign): test will be available in the executed code too. """ userguide_code = "\n".join(extract_code_blocks(file, include_tilde=False)) - exec(userguide_code) + + namespace = {"__builtins__": __builtins__, "campaign": campaign} + + # Execute the code in the isolated namespace + exec(userguide_code, namespace, namespace) # TODO: Needs a refactoring (files codeblocks should be auto-detected) From e202fd27851e44ea000aaa7d73e995a726b7ffa8 Mon Sep 17 00:00:00 2001 From: Martin Fitzner Date: Fri, 4 Oct 2024 16:33:58 +0200 Subject: [PATCH 05/14] Update CHANGELOG.md --- CHANGELOG.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d34e52ee2..c64e338a4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,10 +8,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added - `n_restarts` and `n_raw_samples` keywords to configure continuous optimization behavior for `BotorchRecommender` +- User guide for utilities ### Fixed - Leftover attrs-decorated classes are garbage collected before the subclass tree is - traversed, avoiding sporadic serialization problems + traversed, avoiding sporadic serialization problems ## [0.11.1] - 2024-10-01 ### Added From 0af23e79b0139fcfacdb70eefedeefe4fc3fb9f4 Mon Sep 17 00:00:00 2001 From: AdrianSosic Date: Thu, 10 Oct 2024 20:35:22 +0200 Subject: [PATCH 06/14] Generalize section on alternative creation --- docs/userguide/utils.md | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/docs/userguide/utils.md b/docs/userguide/utils.md index 4660af16f..9fdfdeb8e 100644 --- a/docs/userguide/utils.md +++ b/docs/userguide/utils.md @@ -66,16 +66,21 @@ space. **It does not estimate the memory required during optimization**, which c of a similar magnitude, but generally depends on additional factors. ``` -```{admonition} Effective Search Space Creation for Mixtures +```{admonition} Efficient Search Space Creation :class: tip -If you run into issues creating large search spaces, as for instance for mixtures, you -can try to use the [`SubspaceDiscrete.from_simplex`](baybe.searchspace.discrete.SubspaceDiscrete.from_simplex) -constructor. Instead of creating the search space completely before filtering it down -according to the constraints, this constructor includes the main mixture constraint -already during the Cartesian product, requiring substantially less memory overall. In -addition, BayBE can also be installed with an optional `polars` dependency (`pip install -baybe[polars]`) that will utilize the more efficient machinery form polars for handling -of the search space and its constraints. +If you run into issues creating large search spaces, as for instance in mixture +use cases, you should consider resorting to more specialized ways of creation by invoking alternative +search space constructors like +{meth}`~baybe.searchspace.discrete.SubspaceDiscrete.from_dataframe` +or +{meth}`~baybe.searchspace.discrete.SubspaceDiscrete.from_simplex`. +Instead of creating a product space first and then filtering it down +according to constraints, they offer a more direct and thus efficient path to the +desired result, typically requiring substantially less memory. +For example, {meth}`~baybe.searchspace.discrete.SubspaceDiscrete.from_simplex` +includes the mixture constraint already *during* the product creation. +In addition, BayBE can also be installed with its optional `polars` dependency +(`pip install baybe[polars]`) that activates efficient machinery for constraint handling. ``` ## Reproducibility From cf7c60f3f253705302b77858a3d036d78a491856 Mon Sep 17 00:00:00 2001 From: Martin Fitzner <17951239+Scienfitz@users.noreply.github.com> Date: Thu, 10 Oct 2024 23:04:18 +0200 Subject: [PATCH 07/14] Update text Co-authored-by: AdrianSosic --- docs/userguide/utils.md | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/docs/userguide/utils.md b/docs/userguide/utils.md index 9fdfdeb8e..32d87bd25 100644 --- a/docs/userguide/utils.md +++ b/docs/userguide/utils.md @@ -10,9 +10,9 @@ possible combinations arising form all discrete parameter values. The [`SearchSpace.estimate_product_space_size`](baybe.searchspace.core.SearchSpace.estimate_product_space_size) and [`SubspaceDiscrete.estimate_product_space_size`](baybe.searchspace.discrete.SubspaceDiscrete.estimate_product_space_size) -utilities allows estimating the memory needed to represent the discrete subspace. -It will return a [`MemorySize`](baybe.searchspace.discrete.MemorySize) object that -contains some relevant estimates. +utilities allow estimating the memory needed to represent the discrete subspace. +They return a [`MemorySize`](baybe.searchspace.discrete.MemorySize) object that +contains some relevant estimates: ```python import numpy as np @@ -20,15 +20,15 @@ import numpy as np from baybe.parameters import NumericalDiscreteParameter from baybe.searchspace import SearchSpace -# This will create 10 parameters with 20 values each +# This creates 10 parameters with 20 values each. # The resulting space would have 20^10 entries, requiring around 745 TB of memory for -# both experimental and computational representation of the search space +# both experimental and computational representation of the search space. parameters = [ NumericalDiscreteParameter(name=f"p{k+1}", values=np.linspace(0, 100, 20)) for k in range(10) ] -# Estimate the required memory for such a space in Bytes +# Estimate the required memory for such a space mem_estimate = SearchSpace.estimate_product_space_size(parameters) # Print quantities of interest @@ -43,22 +43,16 @@ print(f"Estimated size in Bytes: {mem_estimate.comp_rep_bytes}") print(f"Expected data frame shape: {mem_estimate.comp_rep_shape}") ``` -```{admonition} Estimate with Constraints +```{admonition} Estimation with Constraints :class: warning `estimate_product_space_size` currently does not include the influence of potential constraints in your search space as it is generally very hard to incorporate the effect -of arbitrary constraints without actually buidling the entire space. Hence, you should +of arbitrary constraints without actually building the entire space. Hence, you should always **treat the number you get as upper bound** of required memory. This can still be -useful - for instance if your estimate already is several Exabytes, it is unlikely that +useful – for instance if your estimate already is several Exabytes, it is unlikely that most computers would be able to handle the result even if there are constraints present. ``` -```{admonition} Influence of Continuous Parameters -:class: info -Continuous parameters fo not influence the size of the discrete search space part. -Hence, they are ignored by the utility. -``` - ```{admonition} Memory During Optimization :class: warning `estimate_product_space_size` only estimates the memory required to handle the search @@ -66,6 +60,12 @@ space. **It does not estimate the memory required during optimization**, which c of a similar magnitude, but generally depends on additional factors. ``` +```{admonition} Influence of Continuous Parameters +:class: info +Continuous parameters do not influence the size of the discrete search space part. +Hence, they are ignored by the utility. +``` + ```{admonition} Efficient Search Space Creation :class: tip If you run into issues creating large search spaces, as for instance in mixture @@ -109,7 +109,7 @@ with temporary_seed(1337): campaign.recommend(5) ``` -## Add Fake Target Measurements and Noise +## Adding Fake Target Measurements and Parameter Noise When creating test scripts, it is often useful to try the recommendation loop for a few iterations. However, this requires some arbitrary target measurements to be set. Instead of coming up with a custom logic every time, you can use the @@ -123,11 +123,11 @@ from baybe.utils.dataframe import add_fake_results, add_parameter_noise # Get recommendations recommendations = campaign.recommend(5) -# Add fake target measurements and artificial parameter noise to the recommendations -# The utilities will modify the data frames inplace +# Add fake target measurements and artificial parameter noise to the recommendations. +# The utilities modify the dataframes inplace. measurements = recommendations.copy() add_fake_results(measurements, campaign.targets) add_parameter_noise(measurements, campaign.parameters) -# Now continue the loop, e.g by adding the measurements... +# Now continue the loop, e.g. by adding the measurements... ``` From 2dff91b206e9b6bddd3bfe069475d7a48dc6ff9c Mon Sep 17 00:00:00 2001 From: Martin Fitzner Date: Thu, 10 Oct 2024 23:20:53 +0200 Subject: [PATCH 08/14] Fix CHANGELOG.md --- CHANGELOG.md | 2 -- 1 file changed, 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c64e338a4..064ba6ad8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -25,8 +25,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed - Unsafe name-based matching of columns in `get_comp_rep_parameter_indices` -- Leftover attrs-decorated classes are garbage collected before the subclass tree is - traversed, avoiding sporadic serialization problems ### Deprecations - `ContinuousLinearEqualityConstraint` and `ContinuousLinearInequalityConstraint` From db204f471073622d166ee4a1be8c9c5896561851 Mon Sep 17 00:00:00 2001 From: Martin Fitzner Date: Thu, 10 Oct 2024 23:25:17 +0200 Subject: [PATCH 09/14] Add explanation --- tests/docs/test_docs.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tests/docs/test_docs.py b/tests/docs/test_docs.py index 2d5e45d46..74d86796d 100644 --- a/tests/docs/test_docs.py +++ b/tests/docs/test_docs.py @@ -30,9 +30,13 @@ def test_code_executability(file: Path, campaign): """ userguide_code = "\n".join(extract_code_blocks(file, include_tilde=False)) + # Create a fixed namespace, which is provided to exec as both global and local + # name space. This ensures that all snippets are executed in their own fresh + # environment unaffected by other snippets. The space for globals and locals must + # be the same, as otherwise exec uses separate scopes for specific patterns within + # the snippet (e.g. list comprehensions) causing unknown name errors despite + # correct import. namespace = {"__builtins__": __builtins__, "campaign": campaign} - - # Execute the code in the isolated namespace exec(userguide_code, namespace, namespace) From debe1846ad548c07fa0a2cfc266fef383d5e27c7 Mon Sep 17 00:00:00 2001 From: Martin Fitzner Date: Thu, 10 Oct 2024 23:34:54 +0200 Subject: [PATCH 10/14] Add links --- docs/userguide/utils.md | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/docs/userguide/utils.md b/docs/userguide/utils.md index 32d87bd25..c558efce8 100644 --- a/docs/userguide/utils.md +++ b/docs/userguide/utils.md @@ -45,19 +45,21 @@ print(f"Expected data frame shape: {mem_estimate.comp_rep_shape}") ```{admonition} Estimation with Constraints :class: warning -`estimate_product_space_size` currently does not include the influence of potential -constraints in your search space as it is generally very hard to incorporate the effect -of arbitrary constraints without actually building the entire space. Hence, you should -always **treat the number you get as upper bound** of required memory. This can still be -useful – for instance if your estimate already is several Exabytes, it is unlikely that -most computers would be able to handle the result even if there are constraints present. +{meth}`~baybe.searchspace.core.SearchSpace.estimate_product_space_size` +currently does not include the influence of potential constraints in your search space +as it is generally very hard to incorporate the effect of arbitrary constraints without +actually building the entire space. Hence, you should always **treat the number you get +as upper bound** of required memory. This can still be useful – for instance if your +estimate already is several Exabytes, it is unlikely that most computers would be able +to handle the result even if there are constraints present. ``` ```{admonition} Memory During Optimization :class: warning -`estimate_product_space_size` only estimates the memory required to handle the search -space. **It does not estimate the memory required during optimization**, which can be -of a similar magnitude, but generally depends on additional factors. +{meth}`~baybe.searchspace.core.SearchSpace.estimate_product_space_size` +only estimates the memory required to handle the search space. **It does not estimate +the memory required during optimization**, which can be of a similar magnitude, but +generally depends on additional factors. ``` ```{admonition} Influence of Continuous Parameters @@ -69,8 +71,8 @@ Hence, they are ignored by the utility. ```{admonition} Efficient Search Space Creation :class: tip If you run into issues creating large search spaces, as for instance in mixture -use cases, you should consider resorting to more specialized ways of creation by invoking alternative -search space constructors like +use cases, you should consider resorting to more specialized ways of creation by +invoking alternative search space constructors like {meth}`~baybe.searchspace.discrete.SubspaceDiscrete.from_dataframe` or {meth}`~baybe.searchspace.discrete.SubspaceDiscrete.from_simplex`. From b3caa93d0b5d9a2ab9689d2ed2a37e00d070b4d0 Mon Sep 17 00:00:00 2001 From: Martin Fitzner Date: Thu, 10 Oct 2024 23:53:19 +0200 Subject: [PATCH 11/14] Make utilities return modified dataframes --- baybe/utils/dataframe.py | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/baybe/utils/dataframe.py b/baybe/utils/dataframe.py index 80d3c1c4a..87e4c52dd 100644 --- a/baybe/utils/dataframe.py +++ b/baybe/utils/dataframe.py @@ -72,13 +72,13 @@ def add_fake_results( good_reference_values: dict[str, list] | None = None, good_intervals: dict[str, tuple[float, float]] | None = None, bad_intervals: dict[str, tuple[float, float]] | None = None, -) -> None: +) -> pd.DataFrame: """Add fake results to a dataframe which was the result of a BayBE recommendation. It is possible to specify "good" values, which will be given a better target value. With this, the algorithm can be driven towards certain optimal values - whilst still being random. Useful for testing. Note that this does not return a - new dataframe and that the dataframe is changed in-place. + whilst still being random. Useful for testing. Note that the dataframe is changed + in-place and also returned. Args: data: A dataframe containing parameter configurations in experimental @@ -99,6 +99,9 @@ def add_fake_results( the parameters lie outside the conditions specified through ``good_reference_values``. + Returns: + The modified dataframe. + Raises: ValueError: If good values for a parameter were specified, but this parameter is not part of the dataframe. @@ -216,19 +219,21 @@ def add_fake_results( final_mask.sum(), ) + return data + def add_parameter_noise( data: pd.DataFrame, parameters: Iterable[Parameter], noise_type: Literal["absolute", "relative_percent"] = "absolute", noise_level: float = 1.0, -) -> None: +) -> pd.DataFrame: """Apply uniform noise to the parameter values of a recommendation frame. The noise can be additive or multiplicative. This can be used to simulate experimental noise or imperfect user input containing numerical parameter values that differ from the recommendations. Note that the - dataframe is modified in-place, and that no new dataframe is returned. + dataframe is changed in-place and also returned. Args: data: Output of the ``recommend`` function of a ``Campaign`` object, see @@ -239,6 +244,9 @@ def add_parameter_noise( for noise type ``absolute`` and as percentage for noise type ``relative_percent``. + Returns: + The modified dataframe. + Raises: ValueError: If ``noise_type`` is neither ``absolute`` nor ``relative_percent``. @@ -265,6 +273,8 @@ def add_parameter_noise( param.bounds.lower, param.bounds.upper ) + return data + def df_drop_single_value_columns( df: pd.DataFrame, lst_exclude: list = None From b97c2ed30f9d29e1ee71794d80f9e5a1760d1cc6 Mon Sep 17 00:00:00 2001 From: Martin Fitzner Date: Thu, 10 Oct 2024 23:56:43 +0200 Subject: [PATCH 12/14] Rename utility --- baybe/simulation/lookup.py | 4 ++-- baybe/utils/dataframe.py | 2 +- docs/userguide/async.md | 4 ++-- docs/userguide/utils.md | 6 +++--- examples/Basics/campaign.py | 6 +++--- examples/Basics/recommenders.py | 4 ++-- examples/Constraints_Discrete/custom_constraints.py | 4 ++-- .../Constraints_Discrete/dependency_constraints.py | 4 ++-- .../Constraints_Discrete/exclusion_constraints.py | 4 ++-- examples/Constraints_Discrete/mixture_constraints.py | 4 ++-- examples/Constraints_Discrete/prodsum_constraints.py | 4 ++-- examples/Custom_Surrogates/custom_pretrained.py | 4 ++-- examples/Custom_Surrogates/surrogate_params.py | 4 ++-- examples/Multi_Target/desirability.py | 4 ++-- tests/conftest.py | 4 ++-- tests/simulate_telemetry.py | 12 ++++++------ tests/test_input_output.py | 6 +++--- tests/test_pending_experiments.py | 6 +++--- tests/test_surrogate.py | 4 ++-- 19 files changed, 45 insertions(+), 45 deletions(-) diff --git a/baybe/simulation/lookup.py b/baybe/simulation/lookup.py index e10368aab..f96c68e62 100644 --- a/baybe/simulation/lookup.py +++ b/baybe/simulation/lookup.py @@ -11,7 +11,7 @@ from baybe.simulation._imputation import _impute_lookup from baybe.targets.base import Target -from baybe.utils.dataframe import add_fake_results +from baybe.utils.dataframe import add_fake_measurements _logger = logging.getLogger(__name__) @@ -68,7 +68,7 @@ def look_up_targets( 2 3 15.0 """ if lookup is None: - add_fake_results(queries, targets) + add_fake_measurements(queries, targets) elif isinstance(lookup, Callable): _look_up_targets_from_callable(queries, targets, lookup) elif isinstance(lookup, pd.DataFrame): diff --git a/baybe/utils/dataframe.py b/baybe/utils/dataframe.py index 87e4c52dd..ec11a30fd 100644 --- a/baybe/utils/dataframe.py +++ b/baybe/utils/dataframe.py @@ -66,7 +66,7 @@ def to_tensor(*x: np.ndarray | pd.DataFrame) -> Tensor | tuple[Tensor, ...]: return out -def add_fake_results( +def add_fake_measurements( data: pd.DataFrame, targets: Collection[Target], good_reference_values: dict[str, list] | None = None, diff --git a/docs/userguide/async.md b/docs/userguide/async.md index 831c69a0b..337fd273b 100644 --- a/docs/userguide/async.md +++ b/docs/userguide/async.md @@ -65,7 +65,7 @@ Akin to `measurements` or `recommendations`, `pending_experiments` is a datafram In the following example, we get a set of recommendations, add results for half of them, and start the next recommendation, marking the other half pending: ```python -from baybe.utils.dataframe import add_fake_results +from baybe.utils.dataframe import add_fake_measurements # Get a set of 10 recommendation rec = campaign.recommend(batch_size=10) @@ -75,7 +75,7 @@ rec_finished = rec.iloc[:5] rec_pending = rec.iloc[5:] # Add target measurements to the finished part. Here we add fake results -add_fake_results(rec_finished, campaign.targets) +add_fake_measurements(rec_finished, campaign.targets) campaign.add_measurements(rec_finished) # Get the next set of recommendations, incorporating the still unfinished experiments. diff --git a/docs/userguide/utils.md b/docs/userguide/utils.md index c558efce8..3d421b0a3 100644 --- a/docs/userguide/utils.md +++ b/docs/userguide/utils.md @@ -115,12 +115,12 @@ with temporary_seed(1337): When creating test scripts, it is often useful to try the recommendation loop for a few iterations. However, this requires some arbitrary target measurements to be set. Instead of coming up with a custom logic every time, you can use the -[`add_fake_results`](baybe.utils.dataframe.add_fake_results) utility to add fake target +[`add_fake_measurements`](baybe.utils.dataframe.add_fake_measurements) utility to add fake target measurements and the [`add_parameter_noise`](baybe.utils.dataframe.add_parameter_noise) utility to add artificial parameter noise: ```python -from baybe.utils.dataframe import add_fake_results, add_parameter_noise +from baybe.utils.dataframe import add_fake_measurements, add_parameter_noise # Get recommendations recommendations = campaign.recommend(5) @@ -128,7 +128,7 @@ recommendations = campaign.recommend(5) # Add fake target measurements and artificial parameter noise to the recommendations. # The utilities modify the dataframes inplace. measurements = recommendations.copy() -add_fake_results(measurements, campaign.targets) +add_fake_measurements(measurements, campaign.targets) add_parameter_noise(measurements, campaign.parameters) # Now continue the loop, e.g. by adding the measurements... diff --git a/examples/Basics/campaign.py b/examples/Basics/campaign.py index 50e2b912c..a74f1fc8a 100644 --- a/examples/Basics/campaign.py +++ b/examples/Basics/campaign.py @@ -11,7 +11,7 @@ from baybe.parameters import NumericalDiscreteParameter, SubstanceParameter from baybe.searchspace import SearchSpace from baybe.targets import NumericalTarget -from baybe.utils.dataframe import add_fake_results +from baybe.utils.dataframe import add_fake_measurements ### Setup @@ -82,10 +82,10 @@ # Adding target values is done by creating a new column in the `recommendation` # dataframe named after the target. -# In this example, we use the `add_fake_results()` utility to create fake results. +# In this example, we use the `add_fake_measurements()` utility to create fake results. # We then update the campaign by adding the measurements. -add_fake_results(recommendation, campaign.targets) +add_fake_measurements(recommendation, campaign.targets) print("\n\nRecommended experiments with fake measured values: ") print(recommendation) diff --git a/examples/Basics/recommenders.py b/examples/Basics/recommenders.py index 4759041df..80ac03f41 100644 --- a/examples/Basics/recommenders.py +++ b/examples/Basics/recommenders.py @@ -29,7 +29,7 @@ from baybe.surrogates.base import Surrogate from baybe.targets import NumericalTarget from baybe.utils.basic import get_subclasses -from baybe.utils.dataframe import add_fake_results +from baybe.utils.dataframe import add_fake_measurements ### Available recommenders suitable for initial recommendation @@ -179,7 +179,7 @@ print("\n\nRecommended experiments: ") print(recommendation) -add_fake_results(recommendation, campaign.targets) +add_fake_measurements(recommendation, campaign.targets) print("\n\nRecommended experiments with fake measured values: ") print(recommendation) diff --git a/examples/Constraints_Discrete/custom_constraints.py b/examples/Constraints_Discrete/custom_constraints.py index 866688336..4b1ce6f7e 100644 --- a/examples/Constraints_Discrete/custom_constraints.py +++ b/examples/Constraints_Discrete/custom_constraints.py @@ -23,7 +23,7 @@ ) from baybe.searchspace import SearchSpace from baybe.targets import NumericalTarget -from baybe.utils.dataframe import add_fake_results +from baybe.utils.dataframe import add_fake_measurements ### Experiment setup @@ -156,5 +156,5 @@ def custom_function(df: pd.DataFrame) -> pd.Series: ) rec = campaign.recommend(batch_size=5) - add_fake_results(rec, campaign.targets) + add_fake_measurements(rec, campaign.targets) campaign.add_measurements(rec) diff --git a/examples/Constraints_Discrete/dependency_constraints.py b/examples/Constraints_Discrete/dependency_constraints.py index 737bedc33..b61e9e090 100644 --- a/examples/Constraints_Discrete/dependency_constraints.py +++ b/examples/Constraints_Discrete/dependency_constraints.py @@ -23,7 +23,7 @@ ) from baybe.searchspace import SearchSpace from baybe.targets import NumericalTarget -from baybe.utils.dataframe import add_fake_results +from baybe.utils.dataframe import add_fake_measurements ### Experiment setup @@ -113,5 +113,5 @@ ) rec = campaign.recommend(batch_size=5) - add_fake_results(rec, campaign.targets) + add_fake_measurements(rec, campaign.targets) campaign.add_measurements(rec) diff --git a/examples/Constraints_Discrete/exclusion_constraints.py b/examples/Constraints_Discrete/exclusion_constraints.py index eb81ef2fc..c776f660e 100644 --- a/examples/Constraints_Discrete/exclusion_constraints.py +++ b/examples/Constraints_Discrete/exclusion_constraints.py @@ -24,7 +24,7 @@ ) from baybe.searchspace import SearchSpace from baybe.targets import NumericalTarget -from baybe.utils.dataframe import add_fake_results +from baybe.utils.dataframe import add_fake_measurements ### Experiment setup @@ -144,5 +144,5 @@ ) rec = campaign.recommend(batch_size=5) - add_fake_results(rec, campaign.targets) + add_fake_measurements(rec, campaign.targets) campaign.add_measurements(rec) diff --git a/examples/Constraints_Discrete/mixture_constraints.py b/examples/Constraints_Discrete/mixture_constraints.py index 7848043cc..3a07922a1 100644 --- a/examples/Constraints_Discrete/mixture_constraints.py +++ b/examples/Constraints_Discrete/mixture_constraints.py @@ -27,7 +27,7 @@ from baybe.parameters import NumericalDiscreteParameter, SubstanceParameter from baybe.searchspace import SearchSpace from baybe.targets import NumericalTarget -from baybe.utils.dataframe import add_fake_results +from baybe.utils.dataframe import add_fake_measurements ### Experiment setup @@ -175,5 +175,5 @@ ) rec = campaign.recommend(batch_size=5) - add_fake_results(rec, campaign.targets) + add_fake_measurements(rec, campaign.targets) campaign.add_measurements(rec) diff --git a/examples/Constraints_Discrete/prodsum_constraints.py b/examples/Constraints_Discrete/prodsum_constraints.py index 2d547e61b..e1e85d72f 100644 --- a/examples/Constraints_Discrete/prodsum_constraints.py +++ b/examples/Constraints_Discrete/prodsum_constraints.py @@ -25,7 +25,7 @@ ) from baybe.searchspace import SearchSpace from baybe.targets import NumericalTarget -from baybe.utils.dataframe import add_fake_results +from baybe.utils.dataframe import add_fake_measurements ### Experiment setup @@ -141,5 +141,5 @@ ) rec = campaign.recommend(batch_size=5) - add_fake_results(rec, campaign.targets) + add_fake_measurements(rec, campaign.targets) campaign.add_measurements(rec) diff --git a/examples/Custom_Surrogates/custom_pretrained.py b/examples/Custom_Surrogates/custom_pretrained.py index 5bdb89657..0e7ef2fcc 100644 --- a/examples/Custom_Surrogates/custom_pretrained.py +++ b/examples/Custom_Surrogates/custom_pretrained.py @@ -26,7 +26,7 @@ from baybe.searchspace import SearchSpace from baybe.surrogates import CustomONNXSurrogate from baybe.targets import NumericalTarget -from baybe.utils.dataframe import add_fake_results, to_tensor +from baybe.utils.dataframe import add_fake_measurements, to_tensor ### Experiment Setup @@ -117,7 +117,7 @@ # Add some fake results -add_fake_results(recommendation, campaign.targets) +add_fake_measurements(recommendation, campaign.targets) campaign.add_measurements(recommendation) ### Model Outputs diff --git a/examples/Custom_Surrogates/surrogate_params.py b/examples/Custom_Surrogates/surrogate_params.py index 88fe380ad..406371887 100644 --- a/examples/Custom_Surrogates/surrogate_params.py +++ b/examples/Custom_Surrogates/surrogate_params.py @@ -26,7 +26,7 @@ from baybe.searchspace import SearchSpace from baybe.surrogates import NGBoostSurrogate from baybe.targets import NumericalTarget -from baybe.utils.dataframe import add_fake_results +from baybe.utils.dataframe import add_fake_measurements ### Experiment Setup @@ -103,7 +103,7 @@ print(recommendation) # Add some fake results -add_fake_results(recommendation, campaign.targets) +add_fake_measurements(recommendation, campaign.targets) campaign.add_measurements(recommendation) ### Model Outputs diff --git a/examples/Multi_Target/desirability.py b/examples/Multi_Target/desirability.py index d5b90c0d8..28eca2a40 100644 --- a/examples/Multi_Target/desirability.py +++ b/examples/Multi_Target/desirability.py @@ -15,7 +15,7 @@ from baybe.parameters import CategoricalParameter, NumericalDiscreteParameter from baybe.searchspace import SearchSpace from baybe.targets import NumericalTarget -from baybe.utils.dataframe import add_fake_results +from baybe.utils.dataframe import add_fake_measurements ### Experiment setup and creating the searchspace @@ -107,7 +107,7 @@ for kIter in range(N_ITERATIONS): rec = campaign.recommend(batch_size=3) - add_fake_results(rec, campaign.targets) + add_fake_measurements(rec, campaign.targets) campaign.add_measurements(rec) desirability = campaign.objective.transform(campaign.measurements) diff --git a/tests/conftest.py b/tests/conftest.py index 75f15ada6..c82630cd4 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -77,7 +77,7 @@ ) from baybe.utils.basic import hilberts_factory from baybe.utils.boolean import strtobool -from baybe.utils.dataframe import add_fake_results, add_parameter_noise +from baybe.utils.dataframe import add_fake_measurements, add_parameter_noise from baybe.utils.random import temporary_seed # Hypothesis settings @@ -913,7 +913,7 @@ def run_iterations( rec = campaign.recommend(batch_size=batch_size) # dont use parameter noise for these tests - add_fake_results(rec, campaign.targets) + add_fake_measurements(rec, campaign.targets) if add_noise and (k % 2): add_parameter_noise(rec, campaign.parameters, noise_level=0.02) diff --git a/tests/simulate_telemetry.py b/tests/simulate_telemetry.py index ce26db3a8..495f043d9 100644 --- a/tests/simulate_telemetry.py +++ b/tests/simulate_telemetry.py @@ -21,7 +21,7 @@ VARNAME_TELEMETRY_USERNAME, get_user_details, ) -from baybe.utils.dataframe import add_fake_results +from baybe.utils.dataframe import add_fake_measurements dict_solvent = { "DMAc": r"CC(N(C)C)=O", @@ -85,7 +85,7 @@ campaign = Campaign(**config) for k in range(randint(4, 6)): dat = campaign.recommend(randint(2, 3)) - add_fake_results(dat, campaign.targets) + add_fake_measurements(dat, campaign.targets) campaign.add_measurements(dat) # Fake User1 - 5 iterations @@ -94,7 +94,7 @@ campaign = Campaign(**config) for k in range(randint(2, 3)): dat = campaign.recommend(randint(3, 4)) - add_fake_results(dat, campaign.targets) + add_fake_measurements(dat, campaign.targets) campaign.add_measurements(dat) # Fake User1a - Adds recommenations before calling recommend @@ -104,7 +104,7 @@ campaign.add_measurements(dat) for k in range(randint(2, 3)): dat = campaign.recommend(randint(3, 4)) - add_fake_results(dat, campaign.targets) + add_fake_measurements(dat, campaign.targets) campaign.add_measurements(dat) # Fake User2 - 2 iterations @@ -113,7 +113,7 @@ campaign = Campaign(**config) for k in range(2): dat = campaign.recommend(4) - add_fake_results(dat, campaign.targets) + add_fake_measurements(dat, campaign.targets) campaign.add_measurements(dat) # Fake User3 - no telemetry @@ -123,7 +123,7 @@ campaign = Campaign(**config) for k in range(randint(5, 7)): dat = campaign.recommend(randint(2, 3)) - add_fake_results(dat, campaign.targets) + add_fake_measurements(dat, campaign.targets) campaign.add_measurements(dat) # Cleanup diff --git a/tests/test_input_output.py b/tests/test_input_output.py index c8a69192c..bda924b7c 100644 --- a/tests/test_input_output.py +++ b/tests/test_input_output.py @@ -8,7 +8,7 @@ from baybe.recommenders import BotorchRecommender from baybe.searchspace import SearchSpace from baybe.targets import NumericalTarget -from baybe.utils.dataframe import add_fake_results +from baybe.utils.dataframe import add_fake_measurements # List of tests that are expected to fail (still missing implementation etc) param_xfails = [] @@ -26,7 +26,7 @@ def test_bad_parameter_input_value(campaign, good_reference_values, bad_val, req pytest.xfail() rec = campaign.recommend(batch_size=3) - add_fake_results( + add_fake_measurements( rec, campaign.targets, good_reference_values=good_reference_values, @@ -49,7 +49,7 @@ def test_bad_target_input_value(campaign, good_reference_values, bad_val, reques pytest.xfail() rec = campaign.recommend(batch_size=3) - add_fake_results( + add_fake_measurements( rec, campaign.targets, good_reference_values=good_reference_values, diff --git a/tests/test_pending_experiments.py b/tests/test_pending_experiments.py index dc20cbfa5..961dd8a39 100644 --- a/tests/test_pending_experiments.py +++ b/tests/test_pending_experiments.py @@ -18,7 +18,7 @@ TwoPhaseMetaRecommender, ) from baybe.utils.basic import get_subclasses -from baybe.utils.dataframe import add_fake_results, add_parameter_noise +from baybe.utils.dataframe import add_fake_measurements, add_parameter_noise from baybe.utils.random import temporary_seed _discrete_params = ["Categorical_1", "Switch_1", "Num_disc_1"] @@ -117,7 +117,7 @@ def test_pending_points(campaign, batch_size): # Perform a fake first iteration rec = campaign.recommend(batch_size) - add_fake_results(rec, campaign.targets) + add_fake_measurements(rec, campaign.targets) campaign.add_measurements(rec) # Get recommendations and set them as pending experiments while getting another set @@ -161,7 +161,7 @@ def test_invalid_acqf(searchspace, recommender, objective, batch_size, acqf): # Get recommendation and add a fake results rec1 = recommender.recommend(batch_size, searchspace, objective) - add_fake_results(rec1, objective.targets) + add_fake_measurements(rec1, objective.targets) # Create fake pending experiments rec2 = rec1.copy() diff --git a/tests/test_surrogate.py b/tests/test_surrogate.py index 5f6869bc7..463725443 100644 --- a/tests/test_surrogate.py +++ b/tests/test_surrogate.py @@ -4,7 +4,7 @@ from baybe.recommenders.pure.nonpredictive.sampling import RandomRecommender from baybe.surrogates.gaussian_process.core import GaussianProcessSurrogate -from baybe.utils.dataframe import add_fake_results +from baybe.utils.dataframe import add_fake_measurements @patch.object(GaussianProcessSurrogate, "_fit") @@ -12,7 +12,7 @@ def test_caching(patched, searchspace, objective): """A second fit call with the same context does not trigger retraining.""" # Prepare the setting measurements = RandomRecommender().recommend(3, searchspace, objective) - add_fake_results(measurements, objective.targets) + add_fake_measurements(measurements, objective.targets) surrogate = GaussianProcessSurrogate() # First call From 46bb75a9c0018e7e6d4a56d14cd7b65cf4f2d74e Mon Sep 17 00:00:00 2001 From: Martin Fitzner Date: Fri, 11 Oct 2024 00:06:31 +0200 Subject: [PATCH 13/14] Update CHANGELOG.md --- CHANGELOG.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 064ba6ad8..cd3a5e3a8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 behavior for `BotorchRecommender` - User guide for utilities +### Changed +- Utility `add_fake_results` renamed to `add_fake_measurements` +- Utilities `add_fake_measurements` and `add_parameter_noise` now also return the + dataframe they modified in-place + ### Fixed - Leftover attrs-decorated classes are garbage collected before the subclass tree is traversed, avoiding sporadic serialization problems From 4493a344825e4847604d4da2fd168d2892d0950c Mon Sep 17 00:00:00 2001 From: Martin Fitzner Date: Fri, 11 Oct 2024 15:53:49 +0200 Subject: [PATCH 14/14] Update docstring --- baybe/utils/dataframe.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/baybe/utils/dataframe.py b/baybe/utils/dataframe.py index ec11a30fd..e365c5bc3 100644 --- a/baybe/utils/dataframe.py +++ b/baybe/utils/dataframe.py @@ -73,7 +73,7 @@ def add_fake_measurements( good_intervals: dict[str, tuple[float, float]] | None = None, bad_intervals: dict[str, tuple[float, float]] | None = None, ) -> pd.DataFrame: - """Add fake results to a dataframe which was the result of a BayBE recommendation. + """Add fake measurements to a dataframe which was the result of a recommendation. It is possible to specify "good" values, which will be given a better target value. With this, the algorithm can be driven towards certain optimal values