From eff5ed221cb4bd323a9e5afbf7e41182e9e112af Mon Sep 17 00:00:00 2001 From: Anselm Hahn Date: Wed, 24 Jan 2024 19:03:06 +0100 Subject: [PATCH] =?UTF-8?q?feat:=20=E2=9C=A8=20Introduce=20`min=5Frel`=20f?= =?UTF-8?q?or=20confidence=20integrals=20(#1142)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat: :sparkles: Refactor confidence interval calculation and transform nested types * chore: :recycle: Update type hints and function signature Update data conversion method to return a MutableMapping[str, Any] Update function name in notebook.py * chore: :page_facing_up: Update vendor URL in Dockerfile * chore: :arrow_up: Update submodules in vendor directory * feat: :sparkles: Add conda environment setup scripts for fish and zsh shells * test: :white_check_mark: Add min_rel_change parameter to `args_1()` function * test: :white_check_mark: Add "min_rel_change": 10e-6 for testing * fix: :test_tube: Use `pytest` marker for testing `ConfidenceInterval` --- Dockerfile | 2 +- spectrafit/plugins/data_converter.py | 6 +- spectrafit/plugins/notebook.py | 4 +- spectrafit/report.py | 9 +-- spectrafit/test/scripts/test_input_1.json | 22 ++++-- spectrafit/test/test_tools.py | 87 ++++++++++++++++++++--- spectrafit/tools.py | 32 ++++++--- tools/conda_env.fish | 22 ++++++ tools/conda_env.zsh | 18 +++++ vendor/docker-stacks | 2 +- vendor/lmfit-py | 2 +- 11 files changed, 169 insertions(+), 37 deletions(-) create mode 100755 tools/conda_env.fish create mode 100644 tools/conda_env.zsh diff --git a/Dockerfile b/Dockerfile index 870522e43..9e88f5402 100644 --- a/Dockerfile +++ b/Dockerfile @@ -11,7 +11,7 @@ LABEL project="SpectraFit" LABEL description="📊📈🔬 SpectraFit is a command-line and Jupyter-notebook tool for quick data-fitting based on the regular expression of distribution functions." LABEL license = "BSD-3-Clause" LABEL url = "https://github.com/Anselmoo/spectrafit" -LABEL vendor = "https://github.com/jupyter/docker-stacks/blob/main/scipy-notebook/Dockerfile" +LABEL vendor = "https://github.com/jupyter/docker-stacks/tree/main/images/scipy-notebook" # Fix: https://github.com/hadolint/hadolint/wiki/DL4006 # Fix: https://github.com/koalaman/shellcheck/wiki/SC3014 diff --git a/spectrafit/plugins/data_converter.py b/spectrafit/plugins/data_converter.py index 9f3d59008..ab3a36cde 100644 --- a/spectrafit/plugins/data_converter.py +++ b/spectrafit/plugins/data_converter.py @@ -8,6 +8,7 @@ from typing import Any from typing import Dict from typing import List +from typing import MutableMapping from typing import Optional import pandas as pd @@ -123,7 +124,7 @@ def get_args(self) -> Dict[str, Any]: return vars(parser.parse_args()) @staticmethod - def convert(infile: Path, file_format: str) -> pd.DataFrame: + def convert(infile: Path, file_format: str) -> MutableMapping[str, Any]: """Convert the input file to the target file format. Args: @@ -134,7 +135,8 @@ def convert(infile: Path, file_format: str) -> pd.DataFrame: ValueError: If the file format is not supported. Returns: - pd.DataFrame: The converted data as a pandas DataFrame. + MutableMapping[str, Any]: The converted data as a MutableMapping[str, Any], + which belongs to DataFrame. """ if file_format.upper() not in choices: raise ValueError(f"File format '{file_format}' is not supported.") diff --git a/spectrafit/plugins/notebook.py b/spectrafit/plugins/notebook.py index 3d992b770..3aaa50db5 100644 --- a/spectrafit/plugins/notebook.py +++ b/spectrafit/plugins/notebook.py @@ -44,7 +44,7 @@ from spectrafit.tools import PostProcessing from spectrafit.tools import PreProcessing from spectrafit.tools import exclude_none_dictionary -from spectrafit.tools import transform_numpy_dictionary +from spectrafit.tools import transform_nested_types from spectrafit.utilities.transformer import list2dict @@ -772,7 +772,7 @@ def __call__(self) -> Dict[str, Any]: output=self.make_output_contribution, ).model_dump(exclude_none=True) report = exclude_none_dictionary(report) - report = transform_numpy_dictionary(report) + report = transform_nested_types(report) return report diff --git a/spectrafit/report.py b/spectrafit/report.py index 2b70aeb17..c75fae83a 100644 --- a/spectrafit/report.py +++ b/spectrafit/report.py @@ -18,7 +18,6 @@ from lmfit import Minimizer from lmfit import Parameter from lmfit import Parameters -from lmfit import conf_interval from lmfit import report_ci from lmfit import report_fit from lmfit.minimizer import MinimizerException @@ -456,13 +455,9 @@ def print_confidence_interval(self) -> None: print("\nConfidence Interval:\n") if self.args["conf_interval"]: try: - report_ci( - conf_interval( - self.minimizer, self.result, **self.args["conf_interval"] - ) - ) + report_ci(self.args["confidence_interval"][0]) except (MinimizerException, ValueError, KeyError, TypeError) as exc: - print(f"Error: {exc} -> No confidence interval could be calculated!") + warn(f"Error: {exc} -> No confidence interval could be calculated!") self.args["confidence_interval"] = {} def print_linear_correlation(self) -> None: diff --git a/spectrafit/test/scripts/test_input_1.json b/spectrafit/test/scripts/test_input_1.json index 2d51b5362..e7b4f7bf4 100644 --- a/spectrafit/test/scripts/test_input_1.json +++ b/spectrafit/test/scripts/test_input_1.json @@ -1,6 +1,9 @@ { "settings": { - "column": [0, 1], + "column": [ + 0, + 1 + ], "decimal": ".", "energy_start": -1, "energy_stop": 8, @@ -27,16 +30,25 @@ ] }, "parameters": { - "minimizer": { "nan_policy": "propagate", "calc_covar": true }, - "optimizer": { "max_nfev": 1000, "method": "leastsq" }, - "report": { "min_correl": 0.0 }, + "minimizer": { + "nan_policy": "propagate", + "calc_covar": true + }, + "optimizer": { + "max_nfev": 1000, + "method": "leastsq" + }, + "report": { + "min_correl": 0.0 + }, "conf_interval": { "p_names": null, "sigmas": null, "trace": false, "maxiter": 200, "verbose": 1, - "prob_func": null + "prob_func": null, + "min_rel_change": 10e-6 } }, "peaks": { diff --git a/spectrafit/test/test_tools.py b/spectrafit/test/test_tools.py index e00a57f3c..a369a134f 100644 --- a/spectrafit/test/test_tools.py +++ b/spectrafit/test/test_tools.py @@ -11,6 +11,7 @@ import pytest from pandas._testing import assert_frame_equal +from spectrafit.models import DistributionModels from spectrafit.models import SolverModels from spectrafit.tools import PostProcessing from spectrafit.tools import PreProcessing @@ -19,7 +20,7 @@ from spectrafit.tools import exclude_none_dictionary from spectrafit.tools import pkl2any from spectrafit.tools import pure_fname -from spectrafit.tools import transform_numpy_dictionary +from spectrafit.tools import transform_nested_types from spectrafit.tools import unicode_check @@ -93,6 +94,7 @@ def args_1() -> Dict[str, Any]: "maxiter": 20, "verbose": 1, "prob_func": None, + "min_rel_change": 10e-6, }, "peaks": { "1": { @@ -134,6 +136,35 @@ def args_2() -> Dict[str, Any]: } +@pytest.fixture(name="args__min_rel_change") +def args_3() -> Dict[str, Any]: + """Args fixture.""" + return { + "autopeak": False, + "global_": 0, + "column": ["energy", "intensity"], + "minimizer": {"nan_policy": "propagate", "calc_covar": False}, + "optimizer": {"max_nfev": 100, "method": "leastsq"}, + "conf_interval": { + "p_names": None, + "sigmas": None, + "maxiter": 100, + "verbose": 0, + "prob_func": None, + "min_rel_change": 0.001, + }, + "peaks": { + "1": { + "gaussian": { + "center": {"vary": True, "value": 1}, + "fwhmg": {"vary": True, "value": 1}, + "amplitude": {"vary": True, "value": 1}, + } + }, + }, + } + + class TestPreProcessing: """Test Pre-Processing tool.""" @@ -364,6 +395,36 @@ def test_insight_report_empty_conv( pp.make_insight_report() assert pp.args["confidence_interval"] == {} + @pytest.mark.parametrize("trace_value", [True, False]) + def test_insight_report_new_min_rel_change( + self, + trace_value: bool, + args__min_rel_change: Dict[str, Any], + ) -> None: + """Testing insight report for no report of the confidence interval.""" + x = np.linspace(0, 2, 100, dtype=np.float64) + df = pd.DataFrame( + { + "energy": x, + "intensity": DistributionModels.gaussian(x, 1, 1, 1), + } + ) + + args__min_rel_change["conf_interval"]["trace"] = trace_value + minimizer, result = SolverModels(df=df, args=args__min_rel_change)() + pp = PostProcessing( + df=df, + args=args__min_rel_change, + minimizer=minimizer, + result=result, + ) + pp.make_insight_report() + assert pp.args["confidence_interval"] == {} + + pp.args["confidence_interval"]["trace"] = False + pp.make_insight_report() + assert pp.args["confidence_interval"] == {} + class TestPickle: """Test Pickle tool.""" @@ -443,19 +504,27 @@ def test_exclude_none_dictionary() -> None: } -def test_transform_numpy_dictionary() -> None: - """Testing transform_numpy_dictionary.""" - assert transform_numpy_dictionary( +def test_transform_nested_types() -> None: + """Testing transform_nested_types.""" + assert transform_nested_types( {"a": np.int32(1), "b": np.float64(2.0), "c": np.bool_(True)} ) == {"a": 1, "b": 2.0, "c": True} - assert transform_numpy_dictionary( - {"a": {"b": np.int32(1)}, "c": np.float64(2.0)} - ) == {"a": {"b": 1}, "c": 2.0} - assert transform_numpy_dictionary( + assert transform_nested_types({"a": {"b": np.int32(1)}, "c": np.float64(2.0)}) == { + "a": {"b": 1}, + "c": 2.0, + } + assert transform_nested_types( {"a": 1, "b": [np.int64(2)], "c": np.float64(3.0)} ) == { "a": 1, "b": [2], "c": 3.0, } - assert transform_numpy_dictionary({"a": np.array([1, 2, 3])}) == {"a": [1, 2, 3]} + assert transform_nested_types({"a": np.array([1, 2, 3])}) == {"a": [1, 2, 3]} + + assert transform_nested_types( + {"a": (np.int32(1), np.int64(4)), "b": np.float64(2.0)} + ) == { + "a": (1, 4), + "b": 2.0, + } diff --git a/spectrafit/tools.py b/spectrafit/tools.py index 8e3cca3e5..451b934e5 100644 --- a/spectrafit/tools.py +++ b/spectrafit/tools.py @@ -19,7 +19,7 @@ import yaml from lmfit import Minimizer -from lmfit import conf_interval +from lmfit.confidence import ConfidenceInterval from lmfit.minimizer import MinimizerException from spectrafit.api.tools_model import ColumnNamesAPI from spectrafit.models import calculated_model @@ -289,9 +289,21 @@ def make_insight_report(self) -> None: ) if self.args["conf_interval"]: try: - self.args["confidence_interval"] = conf_interval( + _min_rel_change = self.args["conf_interval"].pop("min_rel_change", None) + ci = ConfidenceInterval( self.minimizer, self.result, **self.args["conf_interval"] ) + if _min_rel_change is not None: + ci.min_rel_change = _min_rel_change + self.args["conf_interval"]["min_rel_change"] = _min_rel_change + + trace = self.args["conf_interval"].get("trace") + + if trace is True: + self.args["confidence_interval"] = (ci.calc_all_ci(), ci.trace_dict) + else: + self.args["confidence_interval"] = ci.calc_all_ci() + except (MinimizerException, ValueError, KeyError) as exc: print(f"Error: {exc} -> No confidence interval could be calculated!") self.args["confidence_interval"] = {} @@ -464,7 +476,7 @@ def __init__(self, df: pd.DataFrame, args: Dict[str, Any]) -> None: additional information beyond the command line arguments. """ self.df = df - self.args = transform_numpy_dictionary(args) + self.args = transform_nested_types(args) def __call__(self) -> None: """Call the SaveResult class.""" @@ -501,7 +513,7 @@ def save_as_json(self) -> None: with open( Path(f"{self.args['outfile']}_summary.json"), "w", encoding="utf-8" ) as f: - json.dump(self.args, f, indent=4) + json.dump(transform_nested_types(self.args), f, indent=4) else: raise FileNotFoundError("No output file provided!") @@ -691,8 +703,8 @@ def exclude_none_dictionary(value: Dict[str, Any]) -> Dict[str, Any]: return value -def transform_numpy_dictionary(value: Dict[str, Any]) -> Dict[str, Any]: - """Transform numpy values to python values. +def transform_nested_types(value: Dict[str, Any]) -> Dict[str, Any]: + """Transform nested types numpy values to python values. Args: value (Dict[str, Any]): Dictionary to be processed to @@ -702,11 +714,13 @@ def transform_numpy_dictionary(value: Dict[str, Any]) -> Dict[str, Any]: Dict[str, Any]: Dictionary with python values. """ if isinstance(value, list): - return [transform_numpy_dictionary(v) for v in value] + return [transform_nested_types(v) for v in value] + elif isinstance(value, tuple): + return tuple(transform_nested_types(v) for v in value) elif isinstance(value, dict): - return {k: transform_numpy_dictionary(v) for k, v in value.items()} + return {k: transform_nested_types(v) for k, v in value.items()} elif isinstance(value, np.ndarray): - return transform_numpy_dictionary(value.tolist()) + return transform_nested_types(value.tolist()) elif isinstance(value, np.int32): return int(value) elif isinstance(value, np.int64): diff --git a/tools/conda_env.fish b/tools/conda_env.fish new file mode 100755 index 000000000..0838944d8 --- /dev/null +++ b/tools/conda_env.fish @@ -0,0 +1,22 @@ +#!/usr/bin/env fish + +set ENV_NAME $argv[1] +set PACKAGE_NAME $argv[2] +set PYTHON_VERSION $argv[3] + +if test -z "$PYTHON_VERSION" + set PYTHON_VERSION 3.11 +end + +conda create -n $ENV_NAME python=$PYTHON_VERSION --no-default-packages -y +conda activate $ENV_NAME +conda config --add channels conda-forge +conda config --set channel_priority strict +conda install mamba -y + +# Install spectrafit-all package +mamba install spectrafit-all -y +# Install an dditional package if provided +if test -n "$PACKAGE_NAME" + mamba install $PACKAGE_NAME -y +end diff --git a/tools/conda_env.zsh b/tools/conda_env.zsh new file mode 100644 index 000000000..e46bd4d44 --- /dev/null +++ b/tools/conda_env.zsh @@ -0,0 +1,18 @@ +#!/usr/bin/env zsh + +ENV_NAME=$1 +PACKAGE_NAME=$2 +PYTHON_VERSION=${3:-3.11} + +conda create -n $ENV_NAME python=$PYTHON_VERSION --no-default-packages +conda activate $ENV_NAME +conda config --add channels conda-forge +conda config --set channel_priority strict +conda install mamba -y + +# Install spectrafit-all package +mamba install spectrafit-all -y +# Install an dditional package if provided +if [[ -n $PACKAGE_NAME ]]; then + mamba install $PACKAGE_NAME -y +fi diff --git a/vendor/docker-stacks b/vendor/docker-stacks index bfe5f2091..7a5990be1 160000 --- a/vendor/docker-stacks +++ b/vendor/docker-stacks @@ -1 +1 @@ -Subproject commit bfe5f20914d0b049c10f65392ac7c9371f3406be +Subproject commit 7a5990be1f90766de2d59194bdec73327b0885c0 diff --git a/vendor/lmfit-py b/vendor/lmfit-py index 8b2038a16..493107b85 160000 --- a/vendor/lmfit-py +++ b/vendor/lmfit-py @@ -1 +1 @@ -Subproject commit 8b2038a16cf3ce87c25c35d224093ce3e8dc2af3 +Subproject commit 493107b85dac867d19a3a74e9d26946aab386a7c