From eff5ed221cb4bd323a9e5afbf7e41182e9e112af Mon Sep 17 00:00:00 2001
From: Anselm Hahn <Anselm.Hahn@gmail.com>
Date: Wed, 24 Jan 2024 19:03:06 +0100
Subject: [PATCH] =?UTF-8?q?feat:=20=E2=9C=A8=20Introduce=20`min=5Frel`=20f?=
 =?UTF-8?q?or=20confidence=20integrals=20(#1142)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* feat: :sparkles: Refactor confidence interval calculation and transform nested types

* chore: :recycle: Update type hints and function signature

Update data conversion method to return a MutableMapping[str, Any]

Update function name in notebook.py

* chore: :page_facing_up: Update vendor URL in Dockerfile

* chore: :arrow_up: Update submodules in vendor directory

* feat: :sparkles: Add conda environment setup scripts for fish and zsh shells

* test: :white_check_mark: Add min_rel_change parameter to `args_1()` function

* test: :white_check_mark: Add "min_rel_change": 10e-6 for testing

* fix: :test_tube: Use `pytest` marker for testing `ConfidenceInterval`
---
 Dockerfile                                |  2 +-
 spectrafit/plugins/data_converter.py      |  6 +-
 spectrafit/plugins/notebook.py            |  4 +-
 spectrafit/report.py                      |  9 +--
 spectrafit/test/scripts/test_input_1.json | 22 ++++--
 spectrafit/test/test_tools.py             | 87 ++++++++++++++++++++---
 spectrafit/tools.py                       | 32 ++++++---
 tools/conda_env.fish                      | 22 ++++++
 tools/conda_env.zsh                       | 18 +++++
 vendor/docker-stacks                      |  2 +-
 vendor/lmfit-py                           |  2 +-
 11 files changed, 169 insertions(+), 37 deletions(-)
 create mode 100755 tools/conda_env.fish
 create mode 100644 tools/conda_env.zsh

diff --git a/Dockerfile b/Dockerfile
index 870522e43..9e88f5402 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -11,7 +11,7 @@ LABEL project="SpectraFit"
 LABEL description="📊📈🔬 SpectraFit is a command-line and Jupyter-notebook tool for quick data-fitting based on the regular expression of distribution functions."
 LABEL license = "BSD-3-Clause"
 LABEL url = "https://github.com/Anselmoo/spectrafit"
-LABEL vendor = "https://github.com/jupyter/docker-stacks/blob/main/scipy-notebook/Dockerfile"
+LABEL vendor = "https://github.com/jupyter/docker-stacks/tree/main/images/scipy-notebook"
 
 # Fix: https://github.com/hadolint/hadolint/wiki/DL4006
 # Fix: https://github.com/koalaman/shellcheck/wiki/SC3014
diff --git a/spectrafit/plugins/data_converter.py b/spectrafit/plugins/data_converter.py
index 9f3d59008..ab3a36cde 100644
--- a/spectrafit/plugins/data_converter.py
+++ b/spectrafit/plugins/data_converter.py
@@ -8,6 +8,7 @@
 from typing import Any
 from typing import Dict
 from typing import List
+from typing import MutableMapping
 from typing import Optional
 
 import pandas as pd
@@ -123,7 +124,7 @@ def get_args(self) -> Dict[str, Any]:
         return vars(parser.parse_args())
 
     @staticmethod
-    def convert(infile: Path, file_format: str) -> pd.DataFrame:
+    def convert(infile: Path, file_format: str) -> MutableMapping[str, Any]:
         """Convert the input file to the target file format.
 
         Args:
@@ -134,7 +135,8 @@ def convert(infile: Path, file_format: str) -> pd.DataFrame:
             ValueError: If the file format is not supported.
 
         Returns:
-            pd.DataFrame: The converted data as a pandas DataFrame.
+            MutableMapping[str, Any]: The converted data as a MutableMapping[str, Any],
+                which belongs to DataFrame.
         """
         if file_format.upper() not in choices:
             raise ValueError(f"File format '{file_format}' is not supported.")
diff --git a/spectrafit/plugins/notebook.py b/spectrafit/plugins/notebook.py
index 3d992b770..3aaa50db5 100644
--- a/spectrafit/plugins/notebook.py
+++ b/spectrafit/plugins/notebook.py
@@ -44,7 +44,7 @@
 from spectrafit.tools import PostProcessing
 from spectrafit.tools import PreProcessing
 from spectrafit.tools import exclude_none_dictionary
-from spectrafit.tools import transform_numpy_dictionary
+from spectrafit.tools import transform_nested_types
 from spectrafit.utilities.transformer import list2dict
 
 
@@ -772,7 +772,7 @@ def __call__(self) -> Dict[str, Any]:
             output=self.make_output_contribution,
         ).model_dump(exclude_none=True)
         report = exclude_none_dictionary(report)
-        report = transform_numpy_dictionary(report)
+        report = transform_nested_types(report)
         return report
 
 
diff --git a/spectrafit/report.py b/spectrafit/report.py
index 2b70aeb17..c75fae83a 100644
--- a/spectrafit/report.py
+++ b/spectrafit/report.py
@@ -18,7 +18,6 @@
 from lmfit import Minimizer
 from lmfit import Parameter
 from lmfit import Parameters
-from lmfit import conf_interval
 from lmfit import report_ci
 from lmfit import report_fit
 from lmfit.minimizer import MinimizerException
@@ -456,13 +455,9 @@ def print_confidence_interval(self) -> None:
         print("\nConfidence Interval:\n")
         if self.args["conf_interval"]:
             try:
-                report_ci(
-                    conf_interval(
-                        self.minimizer, self.result, **self.args["conf_interval"]
-                    )
-                )
+                report_ci(self.args["confidence_interval"][0])
             except (MinimizerException, ValueError, KeyError, TypeError) as exc:
-                print(f"Error: {exc} -> No confidence interval could be calculated!")
+                warn(f"Error: {exc} -> No confidence interval could be calculated!")
                 self.args["confidence_interval"] = {}
 
     def print_linear_correlation(self) -> None:
diff --git a/spectrafit/test/scripts/test_input_1.json b/spectrafit/test/scripts/test_input_1.json
index 2d51b5362..e7b4f7bf4 100644
--- a/spectrafit/test/scripts/test_input_1.json
+++ b/spectrafit/test/scripts/test_input_1.json
@@ -1,6 +1,9 @@
 {
   "settings": {
-    "column": [0, 1],
+    "column": [
+      0,
+      1
+    ],
     "decimal": ".",
     "energy_start": -1,
     "energy_stop": 8,
@@ -27,16 +30,25 @@
       ]
     },
     "parameters": {
-      "minimizer": { "nan_policy": "propagate", "calc_covar": true },
-      "optimizer": { "max_nfev": 1000, "method": "leastsq" },
-      "report": { "min_correl": 0.0 },
+      "minimizer": {
+        "nan_policy": "propagate",
+        "calc_covar": true
+      },
+      "optimizer": {
+        "max_nfev": 1000,
+        "method": "leastsq"
+      },
+      "report": {
+        "min_correl": 0.0
+      },
       "conf_interval": {
         "p_names": null,
         "sigmas": null,
         "trace": false,
         "maxiter": 200,
         "verbose": 1,
-        "prob_func": null
+        "prob_func": null,
+        "min_rel_change": 10e-6
       }
     },
     "peaks": {
diff --git a/spectrafit/test/test_tools.py b/spectrafit/test/test_tools.py
index e00a57f3c..a369a134f 100644
--- a/spectrafit/test/test_tools.py
+++ b/spectrafit/test/test_tools.py
@@ -11,6 +11,7 @@
 import pytest
 
 from pandas._testing import assert_frame_equal
+from spectrafit.models import DistributionModels
 from spectrafit.models import SolverModels
 from spectrafit.tools import PostProcessing
 from spectrafit.tools import PreProcessing
@@ -19,7 +20,7 @@
 from spectrafit.tools import exclude_none_dictionary
 from spectrafit.tools import pkl2any
 from spectrafit.tools import pure_fname
-from spectrafit.tools import transform_numpy_dictionary
+from spectrafit.tools import transform_nested_types
 from spectrafit.tools import unicode_check
 
 
@@ -93,6 +94,7 @@ def args_1() -> Dict[str, Any]:
             "maxiter": 20,
             "verbose": 1,
             "prob_func": None,
+            "min_rel_change": 10e-6,
         },
         "peaks": {
             "1": {
@@ -134,6 +136,35 @@ def args_2() -> Dict[str, Any]:
     }
 
 
+@pytest.fixture(name="args__min_rel_change")
+def args_3() -> Dict[str, Any]:
+    """Args fixture."""
+    return {
+        "autopeak": False,
+        "global_": 0,
+        "column": ["energy", "intensity"],
+        "minimizer": {"nan_policy": "propagate", "calc_covar": False},
+        "optimizer": {"max_nfev": 100, "method": "leastsq"},
+        "conf_interval": {
+            "p_names": None,
+            "sigmas": None,
+            "maxiter": 100,
+            "verbose": 0,
+            "prob_func": None,
+            "min_rel_change": 0.001,
+        },
+        "peaks": {
+            "1": {
+                "gaussian": {
+                    "center": {"vary": True, "value": 1},
+                    "fwhmg": {"vary": True, "value": 1},
+                    "amplitude": {"vary": True, "value": 1},
+                }
+            },
+        },
+    }
+
+
 class TestPreProcessing:
     """Test Pre-Processing tool."""
 
@@ -364,6 +395,36 @@ def test_insight_report_empty_conv(
         pp.make_insight_report()
         assert pp.args["confidence_interval"] == {}
 
+    @pytest.mark.parametrize("trace_value", [True, False])
+    def test_insight_report_new_min_rel_change(
+        self,
+        trace_value: bool,
+        args__min_rel_change: Dict[str, Any],
+    ) -> None:
+        """Testing insight report for no report of the confidence interval."""
+        x = np.linspace(0, 2, 100, dtype=np.float64)
+        df = pd.DataFrame(
+            {
+                "energy": x,
+                "intensity": DistributionModels.gaussian(x, 1, 1, 1),
+            }
+        )
+
+        args__min_rel_change["conf_interval"]["trace"] = trace_value
+        minimizer, result = SolverModels(df=df, args=args__min_rel_change)()
+        pp = PostProcessing(
+            df=df,
+            args=args__min_rel_change,
+            minimizer=minimizer,
+            result=result,
+        )
+        pp.make_insight_report()
+        assert pp.args["confidence_interval"] == {}
+
+        pp.args["confidence_interval"]["trace"] = False
+        pp.make_insight_report()
+        assert pp.args["confidence_interval"] == {}
+
 
 class TestPickle:
     """Test Pickle tool."""
@@ -443,19 +504,27 @@ def test_exclude_none_dictionary() -> None:
     }
 
 
-def test_transform_numpy_dictionary() -> None:
-    """Testing transform_numpy_dictionary."""
-    assert transform_numpy_dictionary(
+def test_transform_nested_types() -> None:
+    """Testing transform_nested_types."""
+    assert transform_nested_types(
         {"a": np.int32(1), "b": np.float64(2.0), "c": np.bool_(True)}
     ) == {"a": 1, "b": 2.0, "c": True}
-    assert transform_numpy_dictionary(
-        {"a": {"b": np.int32(1)}, "c": np.float64(2.0)}
-    ) == {"a": {"b": 1}, "c": 2.0}
-    assert transform_numpy_dictionary(
+    assert transform_nested_types({"a": {"b": np.int32(1)}, "c": np.float64(2.0)}) == {
+        "a": {"b": 1},
+        "c": 2.0,
+    }
+    assert transform_nested_types(
         {"a": 1, "b": [np.int64(2)], "c": np.float64(3.0)}
     ) == {
         "a": 1,
         "b": [2],
         "c": 3.0,
     }
-    assert transform_numpy_dictionary({"a": np.array([1, 2, 3])}) == {"a": [1, 2, 3]}
+    assert transform_nested_types({"a": np.array([1, 2, 3])}) == {"a": [1, 2, 3]}
+
+    assert transform_nested_types(
+        {"a": (np.int32(1), np.int64(4)), "b": np.float64(2.0)}
+    ) == {
+        "a": (1, 4),
+        "b": 2.0,
+    }
diff --git a/spectrafit/tools.py b/spectrafit/tools.py
index 8e3cca3e5..451b934e5 100644
--- a/spectrafit/tools.py
+++ b/spectrafit/tools.py
@@ -19,7 +19,7 @@
 import yaml
 
 from lmfit import Minimizer
-from lmfit import conf_interval
+from lmfit.confidence import ConfidenceInterval
 from lmfit.minimizer import MinimizerException
 from spectrafit.api.tools_model import ColumnNamesAPI
 from spectrafit.models import calculated_model
@@ -289,9 +289,21 @@ def make_insight_report(self) -> None:
         )
         if self.args["conf_interval"]:
             try:
-                self.args["confidence_interval"] = conf_interval(
+                _min_rel_change = self.args["conf_interval"].pop("min_rel_change", None)
+                ci = ConfidenceInterval(
                     self.minimizer, self.result, **self.args["conf_interval"]
                 )
+                if _min_rel_change is not None:
+                    ci.min_rel_change = _min_rel_change
+                    self.args["conf_interval"]["min_rel_change"] = _min_rel_change
+
+                trace = self.args["conf_interval"].get("trace")
+
+                if trace is True:
+                    self.args["confidence_interval"] = (ci.calc_all_ci(), ci.trace_dict)
+                else:
+                    self.args["confidence_interval"] = ci.calc_all_ci()
+
             except (MinimizerException, ValueError, KeyError) as exc:
                 print(f"Error: {exc} -> No confidence interval could be calculated!")
                 self.args["confidence_interval"] = {}
@@ -464,7 +476,7 @@ def __init__(self, df: pd.DataFrame, args: Dict[str, Any]) -> None:
                  additional information beyond the command line arguments.
         """
         self.df = df
-        self.args = transform_numpy_dictionary(args)
+        self.args = transform_nested_types(args)
 
     def __call__(self) -> None:
         """Call the SaveResult class."""
@@ -501,7 +513,7 @@ def save_as_json(self) -> None:
             with open(
                 Path(f"{self.args['outfile']}_summary.json"), "w", encoding="utf-8"
             ) as f:
-                json.dump(self.args, f, indent=4)
+                json.dump(transform_nested_types(self.args), f, indent=4)
         else:
             raise FileNotFoundError("No output file provided!")
 
@@ -691,8 +703,8 @@ def exclude_none_dictionary(value: Dict[str, Any]) -> Dict[str, Any]:
         return value
 
 
-def transform_numpy_dictionary(value: Dict[str, Any]) -> Dict[str, Any]:
-    """Transform numpy values to python values.
+def transform_nested_types(value: Dict[str, Any]) -> Dict[str, Any]:
+    """Transform nested types numpy values to python values.
 
     Args:
         value (Dict[str, Any]): Dictionary to be processed to
@@ -702,11 +714,13 @@ def transform_numpy_dictionary(value: Dict[str, Any]) -> Dict[str, Any]:
         Dict[str, Any]: Dictionary with python values.
     """
     if isinstance(value, list):
-        return [transform_numpy_dictionary(v) for v in value]
+        return [transform_nested_types(v) for v in value]
+    elif isinstance(value, tuple):
+        return tuple(transform_nested_types(v) for v in value)
     elif isinstance(value, dict):
-        return {k: transform_numpy_dictionary(v) for k, v in value.items()}
+        return {k: transform_nested_types(v) for k, v in value.items()}
     elif isinstance(value, np.ndarray):
-        return transform_numpy_dictionary(value.tolist())
+        return transform_nested_types(value.tolist())
     elif isinstance(value, np.int32):
         return int(value)
     elif isinstance(value, np.int64):
diff --git a/tools/conda_env.fish b/tools/conda_env.fish
new file mode 100755
index 000000000..0838944d8
--- /dev/null
+++ b/tools/conda_env.fish
@@ -0,0 +1,22 @@
+#!/usr/bin/env fish
+
+set ENV_NAME $argv[1]
+set PACKAGE_NAME $argv[2]
+set PYTHON_VERSION $argv[3]
+
+if test -z "$PYTHON_VERSION"
+    set PYTHON_VERSION 3.11
+end
+
+conda create -n $ENV_NAME python=$PYTHON_VERSION --no-default-packages -y
+conda activate $ENV_NAME
+conda config --add channels conda-forge
+conda config --set channel_priority strict
+conda install mamba -y
+
+# Install spectrafit-all package
+mamba install spectrafit-all -y
+# Install an dditional package if provided
+if test -n "$PACKAGE_NAME"
+    mamba install $PACKAGE_NAME -y
+end
diff --git a/tools/conda_env.zsh b/tools/conda_env.zsh
new file mode 100644
index 000000000..e46bd4d44
--- /dev/null
+++ b/tools/conda_env.zsh
@@ -0,0 +1,18 @@
+#!/usr/bin/env zsh
+
+ENV_NAME=$1
+PACKAGE_NAME=$2
+PYTHON_VERSION=${3:-3.11}
+
+conda create -n $ENV_NAME python=$PYTHON_VERSION --no-default-packages
+conda activate $ENV_NAME
+conda config --add channels conda-forge
+conda config --set channel_priority strict
+conda install mamba -y
+
+# Install spectrafit-all package
+mamba install spectrafit-all -y
+# Install an dditional package if provided
+if [[ -n $PACKAGE_NAME ]]; then
+    mamba install $PACKAGE_NAME -y
+fi
diff --git a/vendor/docker-stacks b/vendor/docker-stacks
index bfe5f2091..7a5990be1 160000
--- a/vendor/docker-stacks
+++ b/vendor/docker-stacks
@@ -1 +1 @@
-Subproject commit bfe5f20914d0b049c10f65392ac7c9371f3406be
+Subproject commit 7a5990be1f90766de2d59194bdec73327b0885c0
diff --git a/vendor/lmfit-py b/vendor/lmfit-py
index 8b2038a16..493107b85 160000
--- a/vendor/lmfit-py
+++ b/vendor/lmfit-py
@@ -1 +1 @@
-Subproject commit 8b2038a16cf3ce87c25c35d224093ce3e8dc2af3
+Subproject commit 493107b85dac867d19a3a74e9d26946aab386a7c