From a20d3121c9b8c0386a7201d7a1bb374dbb058531 Mon Sep 17 00:00:00 2001
From: Dasun Abeykoon <Dasun20202020@gmail.com>
Date: Fri, 27 Jun 2025 12:16:20 -0700
Subject: [PATCH 1/2] skpkg: apply black to all files in the project dir

---
 .github/ISSUE_TEMPLATE/release_checklist.md |  31 +++--
 doc/source/conf.py                          |  18 ++-
 src/__init__.py                             |   3 +-
 src/diffpy/nmf_mapping/main.py              | 146 +++++++++++++++-----
 src/diffpy/nmf_mapping/nmf_mapping_code.py  |  54 ++++++--
 src/diffpy/version.py                       |   7 +-
 tests/test_NMF_analysis_code.py             |   6 +-
 tests/test_load_data.py                     |  14 +-
 tests/test_version.py                       |   3 +-
 9 files changed, 212 insertions(+), 70 deletions(-)
diff --git a/.github/ISSUE_TEMPLATE/release_checklist.md b/.github/ISSUE_TEMPLATE/release_checklist.md
index 0f56027..6107962 100644
--- a/.github/ISSUE_TEMPLATE/release_checklist.md
+++ b/.github/ISSUE_TEMPLATE/release_checklist.md
@@ -6,30 +6,41 @@ labels: "release"
 assignees: ""
 ---
 
-### PyPI/GitHub release checklist:
+### PyPI/GitHub rc-release preparation checklist:
 
 - [ ] All PRs/issues attached to the release are merged.
 - [ ] All the badges on the README are passing.
 - [ ] License information is verified as correct. If you are unsure, please comment below.
 - [ ] Locally rendered documentation contains all appropriate pages, including API references (check no modules are
-  missing), tutorials, and other human written text is up-to-date with any changes in the code.
-- [ ] Installation instructions in the README, documentation and on the website (e.g., diffpy.org) are updated.
+      missing), tutorials, and other human-written text is up-to-date with any changes in the code.
+- [ ] Installation instructions in the README, documentation, and the website are updated.
 - [ ] Successfully run any tutorial examples or do functional testing with the latest Python version.
 - [ ] Grammar and writing quality are checked (no typos).
+- [ ] Install `pip install build twine`, run `python -m build` and `twine check dist/*` to ensure that the package can be built and is correctly formatted for PyPI release.
 
-Please mention @sbillinge here when you are ready for PyPI/GitHub release. Include any additional comments necessary, such as
-version information and details about the pre-release here:
+Please tag the maintainer (e.g., @username) in the comment here when you are ready for the PyPI/GitHub release. Include any additional comments necessary, such as version information and details about the pre-release here:
 
-### conda-forge release checklist:
+### PyPI/GitHub full-release preparation checklist:
 
-<!-- After @sbillinge releases the PyPI package, please check the following when creating a PR for conda-forge release.-->
+- [ ] Create a new conda environment and install the rc from PyPI (`pip install <package-name>==??`)
+- [ ] License information on PyPI is correct.
+- [ ] Docs are deployed successfully to `https://<github-username-or-orgname>/<package-name>`.
+- [ ] Successfully run all tests, tutorial examples or do functional testing.
 
+Please let the maintainer know that all checks are done and the package is ready for full release.
+
+### conda-forge release preparation checklist:
+
+<!-- After the maintainer releases the PyPI package, please check the following when creating a PR for conda-forge release.-->
+
+- [ ] Ensure that the full release has appeared on PyPI successfully.
 - [ ] New package dependencies listed in `conda.txt` and `test.txt` are added to `meta.yaml` in the feedstock.
-- [ ] All relevant issues in the feedstock are addressed in the release PR.
+- [ ] Close any open issues on the feedstock. Reach out to the maintainer if you have questions.
+- [ ] Tag the maintainer for conda-forge release.
 
 ### Post-release checklist
 
 <!-- Before closing this issue, please complete the following: -->
 
-- [ ]  Run tutorial examples and conduct functional testing using the installation guide in the README. Attach screenshots/results as comments.
-- [ ]  Documentation (README, tutorials, API references, and websites) is deployed without broken links or missing figures.
+- [ ] Run tutorial examples and conduct functional testing using the installation guide in the README. Attach screenshots/results as comments.
+- [ ] Documentation (README, tutorials, API references, and websites) is deployed without broken links or missing figures.
diff --git a/doc/source/conf.py b/doc/source/conf.py
index 287c517..4245ea3 100644
--- a/doc/source/conf.py
+++ b/doc/source/conf.py
@@ -221,7 +221,13 @@
 # (source start file, target name, title,
 # author, documentclass [howto, manual, or own class]).
 latex_documents = [
-    ("index", "diffpy.nmf_mapping.tex", "diffpy.nmf_mapping Documentation", ab_authors, "manual"),
+    (
+        "index",
+        "diffpy.nmf_mapping.tex",
+        "diffpy.nmf_mapping Documentation",
+        ab_authors,
+        "manual",
+    ),
 ]
 
 # The name of an image file (relative to this directory) to place at the top of
@@ -249,7 +255,15 @@
 
 # One entry per manual page. List of tuples
 # (source start file, name, description, authors, manual section).
-man_pages = [("index", "diffpy.nmf_mapping", "diffpy.nmf_mapping Documentation", ab_authors, 1)]
+man_pages = [
+    (
+        "index",
+        "diffpy.nmf_mapping",
+        "diffpy.nmf_mapping Documentation",
+        ab_authors,
+        1,
+    )
+]
 
 # If true, show URL addresses after external links.
 # man_show_urls = False
diff --git a/src/__init__.py b/src/__init__.py
index 521e31a..736037d 100644
--- a/src/__init__.py
+++ b/src/__init__.py
@@ -13,8 +13,7 @@
 #
 ##############################################################################
 
-"""nmf_mapping - tools for performing NMF on PDF and XRD data.
-"""
+"""nmf_mapping - tools for performing NMF on PDF and XRD data."""
 
 
 __import__("pkg_resources").declare_namespace(__name__)
diff --git a/src/diffpy/nmf_mapping/main.py b/src/diffpy/nmf_mapping/main.py
index 06daff0..69aa27e 100644
--- a/src/diffpy/nmf_mapping/main.py
+++ b/src/diffpy/nmf_mapping/main.py
@@ -31,21 +31,32 @@ def main(args=None):
     as well as the reconstruction error as a fxn of component
     """
 
-    parser = ArgumentParser(prog="nmf_mapping", description=_BANNER, formatter_class=RawTextHelpFormatter)
+    parser = ArgumentParser(
+        prog="nmf_mapping",
+        description=_BANNER,
+        formatter_class=RawTextHelpFormatter,
+    )
 
     def tup(s):
         if not isinstance(s, str):
-            raise TypeError("Input must be a string of two integers separated by a comma.")
+            raise TypeError(
+                "Input must be a string of two integers separated by a comma."
+            )
 
         try:
             l, h = map(int, s.split(","))
             return l, h
         except ValueError:
-            raise ValueError("Input must be two integers separated by a comma (e.g., '1,5')")
+            raise ValueError(
+                "Input must be two integers separated by a comma (e.g., '1,5')"
+            )
 
     # args
     parser.add_argument(
-        "directory", default=None, type=str, help="a directory of PDFs to calculate NMF decomposition"
+        "directory",
+        default=None,
+        type=str,
+        help="a directory of PDFs to calculate NMF decomposition",
     )
     group = parser.add_mutually_exclusive_group()
     parser.add_argument(
@@ -88,7 +99,9 @@ def tup(s):
         "--xrd",
         default=False,
         type=boolean_string,
-        help="whether to look for .xy files rather than .gr files\n" "default: False\n" "e.g. --xrd True",
+        help="whether to look for .xy files rather than .gr files\n"
+        "default: False\n"
+        "e.g. --xrd True",
     )
     parser.add_argument(
         "--x_units",
@@ -96,7 +109,9 @@ def tup(s):
         type=str,
         choices=["twotheta", "q"],
         required="--xrd" in sys.argv,
-        help="x axis units for XRD data\n" "default: None\n" "e.g. --x_units twotheta",
+        help="x axis units for XRD data\n"
+        "default: None\n"
+        "e.g. --x_units twotheta",
     )
     parser.add_argument(
         "--xrange",
@@ -105,25 +120,37 @@ def tup(s):
         nargs="*",
         help="the x-range over which to calculate NMF, can be multiple ranges (e.g. --xrange 5,10 12,15)",
     )
-    parser.add_argument("--show", default=True, type=boolean_string, help="whether to show the plot")
+    parser.add_argument(
+        "--show",
+        default=True,
+        type=boolean_string,
+        help="whether to show the plot",
+    )
     args0 = Namespace()
     args1, _ = parser.parse_known_args(args, namespace=args0)
 
     input_list, data_list = nmf.load_data(args1.directory, args1.xrd)
     if args1.pca_thresh:
-        df_components, df_component_weight_timeseries, df_reconstruction_error, df_explained_var_ratio = (
-            nmf.NMF_decomposition(
-                input_list,
-                args1.xrange,
-                args1.threshold,
-                additional_comp=False,
-                improve_thresh=args1.improve_thresh,
-                n_iter=args1.n_iter,
-                pca_thresh=args1.pca_thresh,
-            )
+        (
+            df_components,
+            df_component_weight_timeseries,
+            df_reconstruction_error,
+            df_explained_var_ratio,
+        ) = nmf.NMF_decomposition(
+            input_list,
+            args1.xrange,
+            args1.threshold,
+            additional_comp=False,
+            improve_thresh=args1.improve_thresh,
+            n_iter=args1.n_iter,
+            pca_thresh=args1.pca_thresh,
         )
     else:
-        df_components, df_component_weight_timeseries, df_reconstruction_error = nmf.NMF_decomposition(
+        (
+            df_components,
+            df_component_weight_timeseries,
+            df_reconstruction_error,
+        ) = nmf.NMF_decomposition(
             input_list,
             args1.xrange,
             args1.threshold,
@@ -134,7 +161,9 @@ def tup(s):
 
     print(f"Number of components: {len(df_components.columns)}")
 
-    fig1 = nmf.component_plot(df_components, args1.xrd, args1.x_units, args1.show)
+    fig1 = nmf.component_plot(
+        df_components, args1.xrd, args1.x_units, args1.show
+    )
     fig2 = nmf.component_ratio_plot(df_component_weight_timeseries, args1.show)
     fig3 = nmf.reconstruction_error_plot(df_reconstruction_error, args1.show)
     if args1.pca_thresh:
@@ -143,40 +172,72 @@ def tup(s):
     if args1.save_files:
         if not os.path.exists(os.path.join(os.getcwd(), "nmf_result")):
             os.mkdir(os.path.join(os.getcwd(), "nmf_result"))
-        output_fn = datetime.fromtimestamp(time.time()).strftime("%Y%m%d%H%M%S%f")
-        df_components.to_json(os.path.join(os.getcwd(), "nmf_result", "x_index_vs_y_col_components.json"))
+        output_fn = datetime.fromtimestamp(time.time()).strftime(
+            "%Y%m%d%H%M%S%f"
+        )
+        df_components.to_json(
+            os.path.join(
+                os.getcwd(), "nmf_result", "x_index_vs_y_col_components.json"
+            )
+        )
         df_component_weight_timeseries.to_json(
-            os.path.join(os.getcwd(), "nmf_result", "component_index_vs_pratio_col.json")
+            os.path.join(
+                os.getcwd(), "nmf_result", "component_index_vs_pratio_col.json"
+            )
         )
         df_component_weight_timeseries.to_csv(
-            os.path.join(os.getcwd(), "nmf_result", output_fn + "component_row_pratio_col.txt"),
+            os.path.join(
+                os.getcwd(),
+                "nmf_result",
+                output_fn + "component_row_pratio_col.txt",
+            ),
             header=None,
             index=False,
             sep=" ",
             mode="a",
         )
         df_reconstruction_error.to_json(
-            os.path.join(os.getcwd(), "nmf_result", "component_index_vs_RE_value.json")
+            os.path.join(
+                os.getcwd(), "nmf_result", "component_index_vs_RE_value.json"
+            )
+        )
+        plot_file1 = os.path.join(
+            os.getcwd(), "nmf_result", output_fn + "comp_plot.png"
+        )
+        plot_file2 = os.path.join(
+            os.getcwd(), "nmf_result", output_fn + "ratio_plot.png"
+        )
+        plot_file3 = os.path.join(
+            os.getcwd(), "nmf_result", output_fn + "loss_plot.png"
         )
-        plot_file1 = os.path.join(os.getcwd(), "nmf_result", output_fn + "comp_plot.png")
-        plot_file2 = os.path.join(os.getcwd(), "nmf_result", output_fn + "ratio_plot.png")
-        plot_file3 = os.path.join(os.getcwd(), "nmf_result", output_fn + "loss_plot.png")
         if args1.pca_thresh:
-            plot_file7 = os.path.join(os.getcwd(), "nmf_result", output_fn + "pca_var_plot.png")
+            plot_file7 = os.path.join(
+                os.getcwd(), "nmf_result", output_fn + "pca_var_plot.png"
+            )
         plot_file4 = os.path.splitext(plot_file1)[0] + ".pdf"
         plot_file5 = os.path.splitext(plot_file2)[0] + ".pdf"
         plot_file6 = os.path.splitext(plot_file3)[0] + ".pdf"
         if args1.pca_thresh:
             plot_file8 = os.path.splitext(plot_file7)[0] + ".pdf"
-        txt_file = os.path.join(os.getcwd(), "nmf_result", output_fn + "_meta" + ".txt")
+        txt_file = os.path.join(
+            os.getcwd(), "nmf_result", output_fn + "_meta" + ".txt"
+        )
         with open(txt_file, "w+") as fi:
             fi.write("NMF Analysis\n\n")
-            fi.write(f"{len(df_component_weight_timeseries.columns)} files uploaded for analysis.\n\n")
+            fi.write(
+                f"{len(df_component_weight_timeseries.columns)} files uploaded for analysis.\n\n"
+            )
             fi.write(f"The selected active r ranges are:  {args1.xrange} \n\n")
             fi.write("Thesholding:\n")
-            fi.write(f"\tThe input component threshold was: {args1.threshold}\n")
-            fi.write(f"\tThe input improvement threshold was: {args1.improve_thresh}\n")
-            fi.write(f"\tThe input # of iterations to run was: {args1.n_iter}\n")
+            fi.write(
+                f"\tThe input component threshold was: {args1.threshold}\n"
+            )
+            fi.write(
+                f"\tThe input improvement threshold was: {args1.improve_thresh}\n"
+            )
+            fi.write(
+                f"\tThe input # of iterations to run was: {args1.n_iter}\n"
+            )
             fi.write(f"\tWas PCA thresholding used?: {args1.pca_thresh}\n")
             fi.write(f"{len(df_components.columns)} components were extracted")
 
@@ -192,11 +253,17 @@ def tup(s):
             fig4.savefig(plot_file8)
         columns = df_components.columns
         for i, col in enumerate(columns):
-            data = np.column_stack([df_components.index.to_list(), df_components[col].to_list()])
+            data = np.column_stack(
+                [df_components.index.to_list(), df_components[col].to_list()]
+            )
 
             if args1.xrd:
                 np.savetxt(
-                    os.path.join(os.getcwd(), "nmf_result", output_fn + f"_comp{i}" + ".xy"),
+                    os.path.join(
+                        os.getcwd(),
+                        "nmf_result",
+                        output_fn + f"_comp{i}" + ".xy",
+                    ),
                     data,
                     header=f"NMF Generated XRD\nSource = nmfMapping\n"
                     f"Date = {output_fn}\n{args1.x_units} Intensity\n",
@@ -205,9 +272,14 @@ def tup(s):
                 )
             else:
                 np.savetxt(
-                    os.path.join(os.getcwd(), "nmf_result", output_fn + f"_comp{i}" + ".cgr"),
+                    os.path.join(
+                        os.getcwd(),
+                        "nmf_result",
+                        output_fn + f"_comp{i}" + ".cgr",
+                    ),
                     data,
-                    header=f"NMF Generated PDF\nSource: nmfMapping\n" f"Date: {output_fn}\nr g",
+                    header=f"NMF Generated PDF\nSource: nmfMapping\n"
+                    f"Date: {output_fn}\nr g",
                     fmt="%s",
                 )
 
diff --git a/src/diffpy/nmf_mapping/nmf_mapping_code.py b/src/diffpy/nmf_mapping/nmf_mapping_code.py
index 3c73ebf..2bcf9e2 100644
--- a/src/diffpy/nmf_mapping/nmf_mapping_code.py
+++ b/src/diffpy/nmf_mapping/nmf_mapping_code.py
@@ -81,7 +81,9 @@ def load_data(dir, xrd=False):
         x = new_dat[:, 0]
         y = new_dat[:, 1]
         if len(x) != len(x_set) or not all(x == x_set):
-            f = interpolate.interp1d(x, y, bounds_error=False, fill_value="extrapolate")
+            f = interpolate.interp1d(
+                x, y, bounds_error=False, fill_value="extrapolate"
+            )
             data_arr[i][:, 1] = f(x_set)
             data_arr[i][:, 0] = x_set
         else:
@@ -98,7 +100,13 @@ def load_data(dir, xrd=False):
 
 # TODO Add regularization on the frobenius norm in order to prevent creation of an excessive number of components
 def NMF_decomposition(
-    data_arr, x_range=None, thresh=None, additional_comp=False, improve_thresh=None, n_iter=None, pca_thresh=None
+    data_arr,
+    x_range=None,
+    thresh=None,
+    additional_comp=False,
+    improve_thresh=None,
+    n_iter=None,
+    pca_thresh=None,
 ):
     """
     Takes a 3D array of PDFs and returns the structurally significant
@@ -146,7 +154,8 @@ def NMF_decomposition(
             else:
                 df_list.append(
                     x_vs_y_df_preprocess[
-                        (x_vs_y_df_preprocess.index >= x_low) & (x_vs_y_df_preprocess.index <= x_high)
+                        (x_vs_y_df_preprocess.index >= x_low)
+                        & (x_vs_y_df_preprocess.index <= x_high)
                     ]
                 )
         x_vs_y_df = pd.concat(df_list)
@@ -169,7 +178,9 @@ def NMF_decomposition(
         pca.fit(x_vs_y_df.to_numpy().T)
         pca_number_components = len(pca.components_)
         pca_explained_variance = pca.explained_variance_ratio_
-        df_explained_var_ratio = pd.DataFrame(pd.Series(pca_explained_variance))
+        df_explained_var_ratio = pd.DataFrame(
+            pd.Series(pca_explained_variance)
+        )
         df_explained_var_ratio.index = df_explained_var_ratio.index + 1
     sweeping_grid = range(1, max_comp + 1, 1)
     for i in sweeping_grid:
@@ -182,7 +193,9 @@ def NMF_decomposition(
     if thresh is None:
         if improve_thresh is not None:
             if improve_thresh > 1 or improve_thresh < 0:
-                raise ValueError("Invalid improvement threshold ratio. Must be between 0 and 1.")
+                raise ValueError(
+                    "Invalid improvement threshold ratio. Must be between 0 and 1."
+                )
             thresh = nmf_ncomp_selection(nmf_loss, rtol=improve_thresh)
         elif pca_thresh:
             thresh = pca_number_components
@@ -205,11 +218,22 @@ def NMF_decomposition(
     nmf_weight /= nmf_weight.sum(1)[:, np.newaxis]
     nmf_weight = nmf_weight.T
     nmf_weight = np.array([nmf_weight[s, :] for s in range(n_comp)])
-    df_component_weight_timeseries = pd.DataFrame(nmf_weight, index=range(n_comp))
+    df_component_weight_timeseries = pd.DataFrame(
+        nmf_weight, index=range(n_comp)
+    )
 
     if pca_thresh:
-        return df_components, df_component_weight_timeseries, df_reconstruction_error, df_explained_var_ratio
-    return df_components, df_component_weight_timeseries, df_reconstruction_error
+        return (
+            df_components,
+            df_component_weight_timeseries,
+            df_reconstruction_error,
+            df_explained_var_ratio,
+        )
+    return (
+        df_components,
+        df_component_weight_timeseries,
+        df_reconstruction_error,
+    )
 
 
 def component_plot(df_components, xrd=False, x_units=None, show=True):
@@ -244,7 +268,11 @@ def component_plot(df_components, xrd=False, x_units=None, show=True):
     shift = max_range
     # seq to align with input phase
     for i, s in enumerate(data_list):
-        ax.plot(df.index.to_numpy(dtype=np.single), df[s].to_numpy() + i * shift, label=s)
+        ax.plot(
+            df.index.to_numpy(dtype=np.single),
+            df[s].to_numpy() + i * shift,
+            label=s,
+        )
     ax.legend(loc="best")
     if xrd:
         if x_units == "twotheta" or x_units == "ttheta":
@@ -397,10 +425,14 @@ def nmf_ncomp_selection(loss, rtol=None):
         rtol = 1e-2
         (inds,) = np.where(imp_ratio <= rtol)
         if not list(inds):
-            print("Improvement ratio of 1E-2 not met. Inspect data and impose manual cutoff")
+            print(
+                "Improvement ratio of 1E-2 not met. Inspect data and impose manual cutoff"
+            )
             len(loss)
             return starting_len
     if not list(inds):
-        print(f"Improvement ratio of {rtol} not met. Inspect data and impose manual cutoff")
+        print(
+            f"Improvement ratio of {rtol} not met. Inspect data and impose manual cutoff"
+        )
         return starting_len
     return inds[0] + 1
diff --git a/src/diffpy/version.py b/src/diffpy/version.py
index 2200d1c..c3bac34 100644
--- a/src/diffpy/version.py
+++ b/src/diffpy/version.py
@@ -13,8 +13,7 @@
 #
 ##############################################################################
 
-"""Definition of __version__ and __date__ for diffpy.nmf_mapping.
-"""
+"""Definition of __version__ and __date__ for diffpy.nmf_mapping."""
 
 __id__ = "$Id$"
 
@@ -24,6 +23,8 @@
 __version__ = get_distribution("diffpy.nmf_mapping").version
 
 # we assume that tag_date was used and __version__ ends in YYYYMMDD
-__date__ = __version__[-8:-4] + "-" + __version__[-4:-2] + "-" + __version__[-2:]
+__date__ = (
+    __version__[-8:-4] + "-" + __version__[-4:-2] + "-" + __version__[-2:]
+)
 
 # End of file
diff --git a/tests/test_NMF_analysis_code.py b/tests/test_NMF_analysis_code.py
index 971ca95..b4151fa 100644
--- a/tests/test_NMF_analysis_code.py
+++ b/tests/test_NMF_analysis_code.py
@@ -14,7 +14,11 @@
 test_map = [
     ([data_dir, "--xrange", "5,10"], "output_1", "Number of components: 3\n"),
     ([data_dir], "output_2", "Number of components: 3\n"),
-    ([data_dir, "--xrange", "5,10", "12,15"], "output_3", "Number of components: 3\n"),
+    (
+        [data_dir, "--xrange", "5,10", "12,15"],
+        "output_3",
+        "Number of components: 3\n",
+    ),
 ]
 
 
diff --git a/tests/test_load_data.py b/tests/test_load_data.py
index 8c2f6a3..6ef96c4 100644
--- a/tests/test_load_data.py
+++ b/tests/test_load_data.py
@@ -13,9 +13,19 @@ def test_load_data():
     bfto_dir = os.path.join(dir, "data/synthetic_r_vs_gr")
     bfto_expected_filenames = np.array([f"synthetic{i}" for i in range(50)])
     loaded_filenames = nmf.load_data(bfto_dir)[1]
-    assert np.testing.assert_array_equal(loaded_filenames, bfto_expected_filenames) is None
+    assert (
+        np.testing.assert_array_equal(
+            loaded_filenames, bfto_expected_filenames
+        )
+        is None
+    )
 
     # make sure interpolation to same r-grid is working
     diff_r_grid_dir = os.path.join(dir, "data", "different_r_grid")
     diff_r_grid_arr = nmf.load_data(diff_r_grid_dir)[0]
-    assert np.testing.assert_array_equal(diff_r_grid_arr[0][:, 0], diff_r_grid_arr[1][:, 0]) is None
+    assert (
+        np.testing.assert_array_equal(
+            diff_r_grid_arr[0][:, 0], diff_r_grid_arr[1][:, 0]
+        )
+        is None
+    )
diff --git a/tests/test_version.py b/tests/test_version.py
index 1167910..8513431 100644
--- a/tests/test_version.py
+++ b/tests/test_version.py
@@ -1,5 +1,4 @@
-"""Unit tests for __version__.py
-"""
+"""Unit tests for __version__.py"""
 
 import diffpy.nmf_mapping
 

From 1eb4bd60f8ee7ca3062338568d1a11df60f96876 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Fri, 27 Jun 2025 23:05:03 +0000
Subject: [PATCH 2/2] [pre-commit.ci] auto fixes from pre-commit hooks

---
 src/diffpy/nmf_mapping/main.py             | 81 ++++++----------------
 src/diffpy/nmf_mapping/nmf_mapping_code.py | 27 ++------
 src/diffpy/version.py                      |  4 +-
 tests/test_load_data.py                    | 14 +---
 4 files changed, 30 insertions(+), 96 deletions(-)

diff --git a/src/diffpy/nmf_mapping/main.py b/src/diffpy/nmf_mapping/main.py
index 69aa27e..639e624 100644
--- a/src/diffpy/nmf_mapping/main.py
+++ b/src/diffpy/nmf_mapping/main.py
@@ -39,17 +39,13 @@ def main(args=None):
 
     def tup(s):
         if not isinstance(s, str):
-            raise TypeError(
-                "Input must be a string of two integers separated by a comma."
-            )
+            raise TypeError("Input must be a string of two integers separated by a comma.")
 
         try:
             l, h = map(int, s.split(","))
             return l, h
         except ValueError:
-            raise ValueError(
-                "Input must be two integers separated by a comma (e.g., '1,5')"
-            )
+            raise ValueError("Input must be two integers separated by a comma (e.g., '1,5')")
 
     # args
     parser.add_argument(
@@ -99,9 +95,7 @@ def tup(s):
         "--xrd",
         default=False,
         type=boolean_string,
-        help="whether to look for .xy files rather than .gr files\n"
-        "default: False\n"
-        "e.g. --xrd True",
+        help="whether to look for .xy files rather than .gr files\n" "default: False\n" "e.g. --xrd True",
     )
     parser.add_argument(
         "--x_units",
@@ -109,9 +103,7 @@ def tup(s):
         type=str,
         choices=["twotheta", "q"],
         required="--xrd" in sys.argv,
-        help="x axis units for XRD data\n"
-        "default: None\n"
-        "e.g. --x_units twotheta",
+        help="x axis units for XRD data\n" "default: None\n" "e.g. --x_units twotheta",
     )
     parser.add_argument(
         "--xrange",
@@ -161,9 +153,7 @@ def tup(s):
 
     print(f"Number of components: {len(df_components.columns)}")
 
-    fig1 = nmf.component_plot(
-        df_components, args1.xrd, args1.x_units, args1.show
-    )
+    fig1 = nmf.component_plot(df_components, args1.xrd, args1.x_units, args1.show)
     fig2 = nmf.component_ratio_plot(df_component_weight_timeseries, args1.show)
     fig3 = nmf.reconstruction_error_plot(df_reconstruction_error, args1.show)
     if args1.pca_thresh:
@@ -172,18 +162,10 @@ def tup(s):
     if args1.save_files:
         if not os.path.exists(os.path.join(os.getcwd(), "nmf_result")):
             os.mkdir(os.path.join(os.getcwd(), "nmf_result"))
-        output_fn = datetime.fromtimestamp(time.time()).strftime(
-            "%Y%m%d%H%M%S%f"
-        )
-        df_components.to_json(
-            os.path.join(
-                os.getcwd(), "nmf_result", "x_index_vs_y_col_components.json"
-            )
-        )
+        output_fn = datetime.fromtimestamp(time.time()).strftime("%Y%m%d%H%M%S%f")
+        df_components.to_json(os.path.join(os.getcwd(), "nmf_result", "x_index_vs_y_col_components.json"))
         df_component_weight_timeseries.to_json(
-            os.path.join(
-                os.getcwd(), "nmf_result", "component_index_vs_pratio_col.json"
-            )
+            os.path.join(os.getcwd(), "nmf_result", "component_index_vs_pratio_col.json")
         )
         df_component_weight_timeseries.to_csv(
             os.path.join(
@@ -197,47 +179,27 @@ def tup(s):
             mode="a",
         )
         df_reconstruction_error.to_json(
-            os.path.join(
-                os.getcwd(), "nmf_result", "component_index_vs_RE_value.json"
-            )
-        )
-        plot_file1 = os.path.join(
-            os.getcwd(), "nmf_result", output_fn + "comp_plot.png"
-        )
-        plot_file2 = os.path.join(
-            os.getcwd(), "nmf_result", output_fn + "ratio_plot.png"
-        )
-        plot_file3 = os.path.join(
-            os.getcwd(), "nmf_result", output_fn + "loss_plot.png"
+            os.path.join(os.getcwd(), "nmf_result", "component_index_vs_RE_value.json")
         )
+        plot_file1 = os.path.join(os.getcwd(), "nmf_result", output_fn + "comp_plot.png")
+        plot_file2 = os.path.join(os.getcwd(), "nmf_result", output_fn + "ratio_plot.png")
+        plot_file3 = os.path.join(os.getcwd(), "nmf_result", output_fn + "loss_plot.png")
         if args1.pca_thresh:
-            plot_file7 = os.path.join(
-                os.getcwd(), "nmf_result", output_fn + "pca_var_plot.png"
-            )
+            plot_file7 = os.path.join(os.getcwd(), "nmf_result", output_fn + "pca_var_plot.png")
         plot_file4 = os.path.splitext(plot_file1)[0] + ".pdf"
         plot_file5 = os.path.splitext(plot_file2)[0] + ".pdf"
         plot_file6 = os.path.splitext(plot_file3)[0] + ".pdf"
         if args1.pca_thresh:
             plot_file8 = os.path.splitext(plot_file7)[0] + ".pdf"
-        txt_file = os.path.join(
-            os.getcwd(), "nmf_result", output_fn + "_meta" + ".txt"
-        )
+        txt_file = os.path.join(os.getcwd(), "nmf_result", output_fn + "_meta" + ".txt")
         with open(txt_file, "w+") as fi:
             fi.write("NMF Analysis\n\n")
-            fi.write(
-                f"{len(df_component_weight_timeseries.columns)} files uploaded for analysis.\n\n"
-            )
+            fi.write(f"{len(df_component_weight_timeseries.columns)} files uploaded for analysis.\n\n")
             fi.write(f"The selected active r ranges are:  {args1.xrange} \n\n")
             fi.write("Thesholding:\n")
-            fi.write(
-                f"\tThe input component threshold was: {args1.threshold}\n"
-            )
-            fi.write(
-                f"\tThe input improvement threshold was: {args1.improve_thresh}\n"
-            )
-            fi.write(
-                f"\tThe input # of iterations to run was: {args1.n_iter}\n"
-            )
+            fi.write(f"\tThe input component threshold was: {args1.threshold}\n")
+            fi.write(f"\tThe input improvement threshold was: {args1.improve_thresh}\n")
+            fi.write(f"\tThe input # of iterations to run was: {args1.n_iter}\n")
             fi.write(f"\tWas PCA thresholding used?: {args1.pca_thresh}\n")
             fi.write(f"{len(df_components.columns)} components were extracted")
 
@@ -253,9 +215,7 @@ def tup(s):
             fig4.savefig(plot_file8)
         columns = df_components.columns
         for i, col in enumerate(columns):
-            data = np.column_stack(
-                [df_components.index.to_list(), df_components[col].to_list()]
-            )
+            data = np.column_stack([df_components.index.to_list(), df_components[col].to_list()])
 
             if args1.xrd:
                 np.savetxt(
@@ -278,8 +238,7 @@ def tup(s):
                         output_fn + f"_comp{i}" + ".cgr",
                     ),
                     data,
-                    header=f"NMF Generated PDF\nSource: nmfMapping\n"
-                    f"Date: {output_fn}\nr g",
+                    header=f"NMF Generated PDF\nSource: nmfMapping\n" f"Date: {output_fn}\nr g",
                     fmt="%s",
                 )
 
diff --git a/src/diffpy/nmf_mapping/nmf_mapping_code.py b/src/diffpy/nmf_mapping/nmf_mapping_code.py
index 2bcf9e2..638abd4 100644
--- a/src/diffpy/nmf_mapping/nmf_mapping_code.py
+++ b/src/diffpy/nmf_mapping/nmf_mapping_code.py
@@ -81,9 +81,7 @@ def load_data(dir, xrd=False):
         x = new_dat[:, 0]
         y = new_dat[:, 1]
         if len(x) != len(x_set) or not all(x == x_set):
-            f = interpolate.interp1d(
-                x, y, bounds_error=False, fill_value="extrapolate"
-            )
+            f = interpolate.interp1d(x, y, bounds_error=False, fill_value="extrapolate")
             data_arr[i][:, 1] = f(x_set)
             data_arr[i][:, 0] = x_set
         else:
@@ -154,8 +152,7 @@ def NMF_decomposition(
             else:
                 df_list.append(
                     x_vs_y_df_preprocess[
-                        (x_vs_y_df_preprocess.index >= x_low)
-                        & (x_vs_y_df_preprocess.index <= x_high)
+                        (x_vs_y_df_preprocess.index >= x_low) & (x_vs_y_df_preprocess.index <= x_high)
                     ]
                 )
         x_vs_y_df = pd.concat(df_list)
@@ -178,9 +175,7 @@ def NMF_decomposition(
         pca.fit(x_vs_y_df.to_numpy().T)
         pca_number_components = len(pca.components_)
         pca_explained_variance = pca.explained_variance_ratio_
-        df_explained_var_ratio = pd.DataFrame(
-            pd.Series(pca_explained_variance)
-        )
+        df_explained_var_ratio = pd.DataFrame(pd.Series(pca_explained_variance))
         df_explained_var_ratio.index = df_explained_var_ratio.index + 1
     sweeping_grid = range(1, max_comp + 1, 1)
     for i in sweeping_grid:
@@ -193,9 +188,7 @@ def NMF_decomposition(
     if thresh is None:
         if improve_thresh is not None:
             if improve_thresh > 1 or improve_thresh < 0:
-                raise ValueError(
-                    "Invalid improvement threshold ratio. Must be between 0 and 1."
-                )
+                raise ValueError("Invalid improvement threshold ratio. Must be between 0 and 1.")
             thresh = nmf_ncomp_selection(nmf_loss, rtol=improve_thresh)
         elif pca_thresh:
             thresh = pca_number_components
@@ -218,9 +211,7 @@ def NMF_decomposition(
     nmf_weight /= nmf_weight.sum(1)[:, np.newaxis]
     nmf_weight = nmf_weight.T
     nmf_weight = np.array([nmf_weight[s, :] for s in range(n_comp)])
-    df_component_weight_timeseries = pd.DataFrame(
-        nmf_weight, index=range(n_comp)
-    )
+    df_component_weight_timeseries = pd.DataFrame(nmf_weight, index=range(n_comp))
 
     if pca_thresh:
         return (
@@ -425,14 +416,10 @@ def nmf_ncomp_selection(loss, rtol=None):
         rtol = 1e-2
         (inds,) = np.where(imp_ratio <= rtol)
         if not list(inds):
-            print(
-                "Improvement ratio of 1E-2 not met. Inspect data and impose manual cutoff"
-            )
+            print("Improvement ratio of 1E-2 not met. Inspect data and impose manual cutoff")
             len(loss)
             return starting_len
     if not list(inds):
-        print(
-            f"Improvement ratio of {rtol} not met. Inspect data and impose manual cutoff"
-        )
+        print(f"Improvement ratio of {rtol} not met. Inspect data and impose manual cutoff")
         return starting_len
     return inds[0] + 1
diff --git a/src/diffpy/version.py b/src/diffpy/version.py
index c3bac34..45c8a92 100644
--- a/src/diffpy/version.py
+++ b/src/diffpy/version.py
@@ -23,8 +23,6 @@
 __version__ = get_distribution("diffpy.nmf_mapping").version
 
 # we assume that tag_date was used and __version__ ends in YYYYMMDD
-__date__ = (
-    __version__[-8:-4] + "-" + __version__[-4:-2] + "-" + __version__[-2:]
-)
+__date__ = __version__[-8:-4] + "-" + __version__[-4:-2] + "-" + __version__[-2:]
 
 # End of file
diff --git a/tests/test_load_data.py b/tests/test_load_data.py
index 6ef96c4..8c2f6a3 100644
--- a/tests/test_load_data.py
+++ b/tests/test_load_data.py
@@ -13,19 +13,9 @@ def test_load_data():
     bfto_dir = os.path.join(dir, "data/synthetic_r_vs_gr")
     bfto_expected_filenames = np.array([f"synthetic{i}" for i in range(50)])
     loaded_filenames = nmf.load_data(bfto_dir)[1]
-    assert (
-        np.testing.assert_array_equal(
-            loaded_filenames, bfto_expected_filenames
-        )
-        is None
-    )
+    assert np.testing.assert_array_equal(loaded_filenames, bfto_expected_filenames) is None
 
     # make sure interpolation to same r-grid is working
     diff_r_grid_dir = os.path.join(dir, "data", "different_r_grid")
     diff_r_grid_arr = nmf.load_data(diff_r_grid_dir)[0]
-    assert (
-        np.testing.assert_array_equal(
-            diff_r_grid_arr[0][:, 0], diff_r_grid_arr[1][:, 0]
-        )
-        is None
-    )
+    assert np.testing.assert_array_equal(diff_r_grid_arr[0][:, 0], diff_r_grid_arr[1][:, 0]) is None