From a20d3121c9b8c0386a7201d7a1bb374dbb058531 Mon Sep 17 00:00:00 2001 From: Dasun Abeykoon Date: Fri, 27 Jun 2025 12:16:20 -0700 Subject: [PATCH 1/2] skpkg: apply black to all files in the project dir --- .github/ISSUE_TEMPLATE/release_checklist.md | 31 +++-- doc/source/conf.py | 18 ++- src/__init__.py | 3 +- src/diffpy/nmf_mapping/main.py | 146 +++++++++++++++----- src/diffpy/nmf_mapping/nmf_mapping_code.py | 54 ++++++-- src/diffpy/version.py | 7 +- tests/test_NMF_analysis_code.py | 6 +- tests/test_load_data.py | 14 +- tests/test_version.py | 3 +- 9 files changed, 212 insertions(+), 70 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/release_checklist.md b/.github/ISSUE_TEMPLATE/release_checklist.md index 0f56027..6107962 100644 --- a/.github/ISSUE_TEMPLATE/release_checklist.md +++ b/.github/ISSUE_TEMPLATE/release_checklist.md @@ -6,30 +6,41 @@ labels: "release" assignees: "" --- -### PyPI/GitHub release checklist: +### PyPI/GitHub rc-release preparation checklist: - [ ] All PRs/issues attached to the release are merged. - [ ] All the badges on the README are passing. - [ ] License information is verified as correct. If you are unsure, please comment below. - [ ] Locally rendered documentation contains all appropriate pages, including API references (check no modules are - missing), tutorials, and other human written text is up-to-date with any changes in the code. -- [ ] Installation instructions in the README, documentation and on the website (e.g., diffpy.org) are updated. + missing), tutorials, and other human-written text is up-to-date with any changes in the code. +- [ ] Installation instructions in the README, documentation, and the website are updated. - [ ] Successfully run any tutorial examples or do functional testing with the latest Python version. - [ ] Grammar and writing quality are checked (no typos). +- [ ] Install `pip install build twine`, run `python -m build` and `twine check dist/*` to ensure that the package can be built and is correctly formatted for PyPI release. -Please mention @sbillinge here when you are ready for PyPI/GitHub release. Include any additional comments necessary, such as -version information and details about the pre-release here: +Please tag the maintainer (e.g., @username) in the comment here when you are ready for the PyPI/GitHub release. Include any additional comments necessary, such as version information and details about the pre-release here: -### conda-forge release checklist: +### PyPI/GitHub full-release preparation checklist: - +- [ ] Create a new conda environment and install the rc from PyPI (`pip install ==??`) +- [ ] License information on PyPI is correct. +- [ ] Docs are deployed successfully to `https:///`. +- [ ] Successfully run all tests, tutorial examples or do functional testing. +Please let the maintainer know that all checks are done and the package is ready for full release. + +### conda-forge release preparation checklist: + + + +- [ ] Ensure that the full release has appeared on PyPI successfully. - [ ] New package dependencies listed in `conda.txt` and `test.txt` are added to `meta.yaml` in the feedstock. -- [ ] All relevant issues in the feedstock are addressed in the release PR. +- [ ] Close any open issues on the feedstock. Reach out to the maintainer if you have questions. +- [ ] Tag the maintainer for conda-forge release. ### Post-release checklist -- [ ] Run tutorial examples and conduct functional testing using the installation guide in the README. Attach screenshots/results as comments. -- [ ] Documentation (README, tutorials, API references, and websites) is deployed without broken links or missing figures. +- [ ] Run tutorial examples and conduct functional testing using the installation guide in the README. Attach screenshots/results as comments. +- [ ] Documentation (README, tutorials, API references, and websites) is deployed without broken links or missing figures. diff --git a/doc/source/conf.py b/doc/source/conf.py index 287c517..4245ea3 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -221,7 +221,13 @@ # (source start file, target name, title, # author, documentclass [howto, manual, or own class]). latex_documents = [ - ("index", "diffpy.nmf_mapping.tex", "diffpy.nmf_mapping Documentation", ab_authors, "manual"), + ( + "index", + "diffpy.nmf_mapping.tex", + "diffpy.nmf_mapping Documentation", + ab_authors, + "manual", + ), ] # The name of an image file (relative to this directory) to place at the top of @@ -249,7 +255,15 @@ # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). -man_pages = [("index", "diffpy.nmf_mapping", "diffpy.nmf_mapping Documentation", ab_authors, 1)] +man_pages = [ + ( + "index", + "diffpy.nmf_mapping", + "diffpy.nmf_mapping Documentation", + ab_authors, + 1, + ) +] # If true, show URL addresses after external links. # man_show_urls = False diff --git a/src/__init__.py b/src/__init__.py index 521e31a..736037d 100644 --- a/src/__init__.py +++ b/src/__init__.py @@ -13,8 +13,7 @@ # ############################################################################## -"""nmf_mapping - tools for performing NMF on PDF and XRD data. -""" +"""nmf_mapping - tools for performing NMF on PDF and XRD data.""" __import__("pkg_resources").declare_namespace(__name__) diff --git a/src/diffpy/nmf_mapping/main.py b/src/diffpy/nmf_mapping/main.py index 06daff0..69aa27e 100644 --- a/src/diffpy/nmf_mapping/main.py +++ b/src/diffpy/nmf_mapping/main.py @@ -31,21 +31,32 @@ def main(args=None): as well as the reconstruction error as a fxn of component """ - parser = ArgumentParser(prog="nmf_mapping", description=_BANNER, formatter_class=RawTextHelpFormatter) + parser = ArgumentParser( + prog="nmf_mapping", + description=_BANNER, + formatter_class=RawTextHelpFormatter, + ) def tup(s): if not isinstance(s, str): - raise TypeError("Input must be a string of two integers separated by a comma.") + raise TypeError( + "Input must be a string of two integers separated by a comma." + ) try: l, h = map(int, s.split(",")) return l, h except ValueError: - raise ValueError("Input must be two integers separated by a comma (e.g., '1,5')") + raise ValueError( + "Input must be two integers separated by a comma (e.g., '1,5')" + ) # args parser.add_argument( - "directory", default=None, type=str, help="a directory of PDFs to calculate NMF decomposition" + "directory", + default=None, + type=str, + help="a directory of PDFs to calculate NMF decomposition", ) group = parser.add_mutually_exclusive_group() parser.add_argument( @@ -88,7 +99,9 @@ def tup(s): "--xrd", default=False, type=boolean_string, - help="whether to look for .xy files rather than .gr files\n" "default: False\n" "e.g. --xrd True", + help="whether to look for .xy files rather than .gr files\n" + "default: False\n" + "e.g. --xrd True", ) parser.add_argument( "--x_units", @@ -96,7 +109,9 @@ def tup(s): type=str, choices=["twotheta", "q"], required="--xrd" in sys.argv, - help="x axis units for XRD data\n" "default: None\n" "e.g. --x_units twotheta", + help="x axis units for XRD data\n" + "default: None\n" + "e.g. --x_units twotheta", ) parser.add_argument( "--xrange", @@ -105,25 +120,37 @@ def tup(s): nargs="*", help="the x-range over which to calculate NMF, can be multiple ranges (e.g. --xrange 5,10 12,15)", ) - parser.add_argument("--show", default=True, type=boolean_string, help="whether to show the plot") + parser.add_argument( + "--show", + default=True, + type=boolean_string, + help="whether to show the plot", + ) args0 = Namespace() args1, _ = parser.parse_known_args(args, namespace=args0) input_list, data_list = nmf.load_data(args1.directory, args1.xrd) if args1.pca_thresh: - df_components, df_component_weight_timeseries, df_reconstruction_error, df_explained_var_ratio = ( - nmf.NMF_decomposition( - input_list, - args1.xrange, - args1.threshold, - additional_comp=False, - improve_thresh=args1.improve_thresh, - n_iter=args1.n_iter, - pca_thresh=args1.pca_thresh, - ) + ( + df_components, + df_component_weight_timeseries, + df_reconstruction_error, + df_explained_var_ratio, + ) = nmf.NMF_decomposition( + input_list, + args1.xrange, + args1.threshold, + additional_comp=False, + improve_thresh=args1.improve_thresh, + n_iter=args1.n_iter, + pca_thresh=args1.pca_thresh, ) else: - df_components, df_component_weight_timeseries, df_reconstruction_error = nmf.NMF_decomposition( + ( + df_components, + df_component_weight_timeseries, + df_reconstruction_error, + ) = nmf.NMF_decomposition( input_list, args1.xrange, args1.threshold, @@ -134,7 +161,9 @@ def tup(s): print(f"Number of components: {len(df_components.columns)}") - fig1 = nmf.component_plot(df_components, args1.xrd, args1.x_units, args1.show) + fig1 = nmf.component_plot( + df_components, args1.xrd, args1.x_units, args1.show + ) fig2 = nmf.component_ratio_plot(df_component_weight_timeseries, args1.show) fig3 = nmf.reconstruction_error_plot(df_reconstruction_error, args1.show) if args1.pca_thresh: @@ -143,40 +172,72 @@ def tup(s): if args1.save_files: if not os.path.exists(os.path.join(os.getcwd(), "nmf_result")): os.mkdir(os.path.join(os.getcwd(), "nmf_result")) - output_fn = datetime.fromtimestamp(time.time()).strftime("%Y%m%d%H%M%S%f") - df_components.to_json(os.path.join(os.getcwd(), "nmf_result", "x_index_vs_y_col_components.json")) + output_fn = datetime.fromtimestamp(time.time()).strftime( + "%Y%m%d%H%M%S%f" + ) + df_components.to_json( + os.path.join( + os.getcwd(), "nmf_result", "x_index_vs_y_col_components.json" + ) + ) df_component_weight_timeseries.to_json( - os.path.join(os.getcwd(), "nmf_result", "component_index_vs_pratio_col.json") + os.path.join( + os.getcwd(), "nmf_result", "component_index_vs_pratio_col.json" + ) ) df_component_weight_timeseries.to_csv( - os.path.join(os.getcwd(), "nmf_result", output_fn + "component_row_pratio_col.txt"), + os.path.join( + os.getcwd(), + "nmf_result", + output_fn + "component_row_pratio_col.txt", + ), header=None, index=False, sep=" ", mode="a", ) df_reconstruction_error.to_json( - os.path.join(os.getcwd(), "nmf_result", "component_index_vs_RE_value.json") + os.path.join( + os.getcwd(), "nmf_result", "component_index_vs_RE_value.json" + ) + ) + plot_file1 = os.path.join( + os.getcwd(), "nmf_result", output_fn + "comp_plot.png" + ) + plot_file2 = os.path.join( + os.getcwd(), "nmf_result", output_fn + "ratio_plot.png" + ) + plot_file3 = os.path.join( + os.getcwd(), "nmf_result", output_fn + "loss_plot.png" ) - plot_file1 = os.path.join(os.getcwd(), "nmf_result", output_fn + "comp_plot.png") - plot_file2 = os.path.join(os.getcwd(), "nmf_result", output_fn + "ratio_plot.png") - plot_file3 = os.path.join(os.getcwd(), "nmf_result", output_fn + "loss_plot.png") if args1.pca_thresh: - plot_file7 = os.path.join(os.getcwd(), "nmf_result", output_fn + "pca_var_plot.png") + plot_file7 = os.path.join( + os.getcwd(), "nmf_result", output_fn + "pca_var_plot.png" + ) plot_file4 = os.path.splitext(plot_file1)[0] + ".pdf" plot_file5 = os.path.splitext(plot_file2)[0] + ".pdf" plot_file6 = os.path.splitext(plot_file3)[0] + ".pdf" if args1.pca_thresh: plot_file8 = os.path.splitext(plot_file7)[0] + ".pdf" - txt_file = os.path.join(os.getcwd(), "nmf_result", output_fn + "_meta" + ".txt") + txt_file = os.path.join( + os.getcwd(), "nmf_result", output_fn + "_meta" + ".txt" + ) with open(txt_file, "w+") as fi: fi.write("NMF Analysis\n\n") - fi.write(f"{len(df_component_weight_timeseries.columns)} files uploaded for analysis.\n\n") + fi.write( + f"{len(df_component_weight_timeseries.columns)} files uploaded for analysis.\n\n" + ) fi.write(f"The selected active r ranges are: {args1.xrange} \n\n") fi.write("Thesholding:\n") - fi.write(f"\tThe input component threshold was: {args1.threshold}\n") - fi.write(f"\tThe input improvement threshold was: {args1.improve_thresh}\n") - fi.write(f"\tThe input # of iterations to run was: {args1.n_iter}\n") + fi.write( + f"\tThe input component threshold was: {args1.threshold}\n" + ) + fi.write( + f"\tThe input improvement threshold was: {args1.improve_thresh}\n" + ) + fi.write( + f"\tThe input # of iterations to run was: {args1.n_iter}\n" + ) fi.write(f"\tWas PCA thresholding used?: {args1.pca_thresh}\n") fi.write(f"{len(df_components.columns)} components were extracted") @@ -192,11 +253,17 @@ def tup(s): fig4.savefig(plot_file8) columns = df_components.columns for i, col in enumerate(columns): - data = np.column_stack([df_components.index.to_list(), df_components[col].to_list()]) + data = np.column_stack( + [df_components.index.to_list(), df_components[col].to_list()] + ) if args1.xrd: np.savetxt( - os.path.join(os.getcwd(), "nmf_result", output_fn + f"_comp{i}" + ".xy"), + os.path.join( + os.getcwd(), + "nmf_result", + output_fn + f"_comp{i}" + ".xy", + ), data, header=f"NMF Generated XRD\nSource = nmfMapping\n" f"Date = {output_fn}\n{args1.x_units} Intensity\n", @@ -205,9 +272,14 @@ def tup(s): ) else: np.savetxt( - os.path.join(os.getcwd(), "nmf_result", output_fn + f"_comp{i}" + ".cgr"), + os.path.join( + os.getcwd(), + "nmf_result", + output_fn + f"_comp{i}" + ".cgr", + ), data, - header=f"NMF Generated PDF\nSource: nmfMapping\n" f"Date: {output_fn}\nr g", + header=f"NMF Generated PDF\nSource: nmfMapping\n" + f"Date: {output_fn}\nr g", fmt="%s", ) diff --git a/src/diffpy/nmf_mapping/nmf_mapping_code.py b/src/diffpy/nmf_mapping/nmf_mapping_code.py index 3c73ebf..2bcf9e2 100644 --- a/src/diffpy/nmf_mapping/nmf_mapping_code.py +++ b/src/diffpy/nmf_mapping/nmf_mapping_code.py @@ -81,7 +81,9 @@ def load_data(dir, xrd=False): x = new_dat[:, 0] y = new_dat[:, 1] if len(x) != len(x_set) or not all(x == x_set): - f = interpolate.interp1d(x, y, bounds_error=False, fill_value="extrapolate") + f = interpolate.interp1d( + x, y, bounds_error=False, fill_value="extrapolate" + ) data_arr[i][:, 1] = f(x_set) data_arr[i][:, 0] = x_set else: @@ -98,7 +100,13 @@ def load_data(dir, xrd=False): # TODO Add regularization on the frobenius norm in order to prevent creation of an excessive number of components def NMF_decomposition( - data_arr, x_range=None, thresh=None, additional_comp=False, improve_thresh=None, n_iter=None, pca_thresh=None + data_arr, + x_range=None, + thresh=None, + additional_comp=False, + improve_thresh=None, + n_iter=None, + pca_thresh=None, ): """ Takes a 3D array of PDFs and returns the structurally significant @@ -146,7 +154,8 @@ def NMF_decomposition( else: df_list.append( x_vs_y_df_preprocess[ - (x_vs_y_df_preprocess.index >= x_low) & (x_vs_y_df_preprocess.index <= x_high) + (x_vs_y_df_preprocess.index >= x_low) + & (x_vs_y_df_preprocess.index <= x_high) ] ) x_vs_y_df = pd.concat(df_list) @@ -169,7 +178,9 @@ def NMF_decomposition( pca.fit(x_vs_y_df.to_numpy().T) pca_number_components = len(pca.components_) pca_explained_variance = pca.explained_variance_ratio_ - df_explained_var_ratio = pd.DataFrame(pd.Series(pca_explained_variance)) + df_explained_var_ratio = pd.DataFrame( + pd.Series(pca_explained_variance) + ) df_explained_var_ratio.index = df_explained_var_ratio.index + 1 sweeping_grid = range(1, max_comp + 1, 1) for i in sweeping_grid: @@ -182,7 +193,9 @@ def NMF_decomposition( if thresh is None: if improve_thresh is not None: if improve_thresh > 1 or improve_thresh < 0: - raise ValueError("Invalid improvement threshold ratio. Must be between 0 and 1.") + raise ValueError( + "Invalid improvement threshold ratio. Must be between 0 and 1." + ) thresh = nmf_ncomp_selection(nmf_loss, rtol=improve_thresh) elif pca_thresh: thresh = pca_number_components @@ -205,11 +218,22 @@ def NMF_decomposition( nmf_weight /= nmf_weight.sum(1)[:, np.newaxis] nmf_weight = nmf_weight.T nmf_weight = np.array([nmf_weight[s, :] for s in range(n_comp)]) - df_component_weight_timeseries = pd.DataFrame(nmf_weight, index=range(n_comp)) + df_component_weight_timeseries = pd.DataFrame( + nmf_weight, index=range(n_comp) + ) if pca_thresh: - return df_components, df_component_weight_timeseries, df_reconstruction_error, df_explained_var_ratio - return df_components, df_component_weight_timeseries, df_reconstruction_error + return ( + df_components, + df_component_weight_timeseries, + df_reconstruction_error, + df_explained_var_ratio, + ) + return ( + df_components, + df_component_weight_timeseries, + df_reconstruction_error, + ) def component_plot(df_components, xrd=False, x_units=None, show=True): @@ -244,7 +268,11 @@ def component_plot(df_components, xrd=False, x_units=None, show=True): shift = max_range # seq to align with input phase for i, s in enumerate(data_list): - ax.plot(df.index.to_numpy(dtype=np.single), df[s].to_numpy() + i * shift, label=s) + ax.plot( + df.index.to_numpy(dtype=np.single), + df[s].to_numpy() + i * shift, + label=s, + ) ax.legend(loc="best") if xrd: if x_units == "twotheta" or x_units == "ttheta": @@ -397,10 +425,14 @@ def nmf_ncomp_selection(loss, rtol=None): rtol = 1e-2 (inds,) = np.where(imp_ratio <= rtol) if not list(inds): - print("Improvement ratio of 1E-2 not met. Inspect data and impose manual cutoff") + print( + "Improvement ratio of 1E-2 not met. Inspect data and impose manual cutoff" + ) len(loss) return starting_len if not list(inds): - print(f"Improvement ratio of {rtol} not met. Inspect data and impose manual cutoff") + print( + f"Improvement ratio of {rtol} not met. Inspect data and impose manual cutoff" + ) return starting_len return inds[0] + 1 diff --git a/src/diffpy/version.py b/src/diffpy/version.py index 2200d1c..c3bac34 100644 --- a/src/diffpy/version.py +++ b/src/diffpy/version.py @@ -13,8 +13,7 @@ # ############################################################################## -"""Definition of __version__ and __date__ for diffpy.nmf_mapping. -""" +"""Definition of __version__ and __date__ for diffpy.nmf_mapping.""" __id__ = "$Id$" @@ -24,6 +23,8 @@ __version__ = get_distribution("diffpy.nmf_mapping").version # we assume that tag_date was used and __version__ ends in YYYYMMDD -__date__ = __version__[-8:-4] + "-" + __version__[-4:-2] + "-" + __version__[-2:] +__date__ = ( + __version__[-8:-4] + "-" + __version__[-4:-2] + "-" + __version__[-2:] +) # End of file diff --git a/tests/test_NMF_analysis_code.py b/tests/test_NMF_analysis_code.py index 971ca95..b4151fa 100644 --- a/tests/test_NMF_analysis_code.py +++ b/tests/test_NMF_analysis_code.py @@ -14,7 +14,11 @@ test_map = [ ([data_dir, "--xrange", "5,10"], "output_1", "Number of components: 3\n"), ([data_dir], "output_2", "Number of components: 3\n"), - ([data_dir, "--xrange", "5,10", "12,15"], "output_3", "Number of components: 3\n"), + ( + [data_dir, "--xrange", "5,10", "12,15"], + "output_3", + "Number of components: 3\n", + ), ] diff --git a/tests/test_load_data.py b/tests/test_load_data.py index 8c2f6a3..6ef96c4 100644 --- a/tests/test_load_data.py +++ b/tests/test_load_data.py @@ -13,9 +13,19 @@ def test_load_data(): bfto_dir = os.path.join(dir, "data/synthetic_r_vs_gr") bfto_expected_filenames = np.array([f"synthetic{i}" for i in range(50)]) loaded_filenames = nmf.load_data(bfto_dir)[1] - assert np.testing.assert_array_equal(loaded_filenames, bfto_expected_filenames) is None + assert ( + np.testing.assert_array_equal( + loaded_filenames, bfto_expected_filenames + ) + is None + ) # make sure interpolation to same r-grid is working diff_r_grid_dir = os.path.join(dir, "data", "different_r_grid") diff_r_grid_arr = nmf.load_data(diff_r_grid_dir)[0] - assert np.testing.assert_array_equal(diff_r_grid_arr[0][:, 0], diff_r_grid_arr[1][:, 0]) is None + assert ( + np.testing.assert_array_equal( + diff_r_grid_arr[0][:, 0], diff_r_grid_arr[1][:, 0] + ) + is None + ) diff --git a/tests/test_version.py b/tests/test_version.py index 1167910..8513431 100644 --- a/tests/test_version.py +++ b/tests/test_version.py @@ -1,5 +1,4 @@ -"""Unit tests for __version__.py -""" +"""Unit tests for __version__.py""" import diffpy.nmf_mapping From 1eb4bd60f8ee7ca3062338568d1a11df60f96876 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 27 Jun 2025 23:05:03 +0000 Subject: [PATCH 2/2] [pre-commit.ci] auto fixes from pre-commit hooks --- src/diffpy/nmf_mapping/main.py | 81 ++++++---------------- src/diffpy/nmf_mapping/nmf_mapping_code.py | 27 ++------ src/diffpy/version.py | 4 +- tests/test_load_data.py | 14 +--- 4 files changed, 30 insertions(+), 96 deletions(-) diff --git a/src/diffpy/nmf_mapping/main.py b/src/diffpy/nmf_mapping/main.py index 69aa27e..639e624 100644 --- a/src/diffpy/nmf_mapping/main.py +++ b/src/diffpy/nmf_mapping/main.py @@ -39,17 +39,13 @@ def main(args=None): def tup(s): if not isinstance(s, str): - raise TypeError( - "Input must be a string of two integers separated by a comma." - ) + raise TypeError("Input must be a string of two integers separated by a comma.") try: l, h = map(int, s.split(",")) return l, h except ValueError: - raise ValueError( - "Input must be two integers separated by a comma (e.g., '1,5')" - ) + raise ValueError("Input must be two integers separated by a comma (e.g., '1,5')") # args parser.add_argument( @@ -99,9 +95,7 @@ def tup(s): "--xrd", default=False, type=boolean_string, - help="whether to look for .xy files rather than .gr files\n" - "default: False\n" - "e.g. --xrd True", + help="whether to look for .xy files rather than .gr files\n" "default: False\n" "e.g. --xrd True", ) parser.add_argument( "--x_units", @@ -109,9 +103,7 @@ def tup(s): type=str, choices=["twotheta", "q"], required="--xrd" in sys.argv, - help="x axis units for XRD data\n" - "default: None\n" - "e.g. --x_units twotheta", + help="x axis units for XRD data\n" "default: None\n" "e.g. --x_units twotheta", ) parser.add_argument( "--xrange", @@ -161,9 +153,7 @@ def tup(s): print(f"Number of components: {len(df_components.columns)}") - fig1 = nmf.component_plot( - df_components, args1.xrd, args1.x_units, args1.show - ) + fig1 = nmf.component_plot(df_components, args1.xrd, args1.x_units, args1.show) fig2 = nmf.component_ratio_plot(df_component_weight_timeseries, args1.show) fig3 = nmf.reconstruction_error_plot(df_reconstruction_error, args1.show) if args1.pca_thresh: @@ -172,18 +162,10 @@ def tup(s): if args1.save_files: if not os.path.exists(os.path.join(os.getcwd(), "nmf_result")): os.mkdir(os.path.join(os.getcwd(), "nmf_result")) - output_fn = datetime.fromtimestamp(time.time()).strftime( - "%Y%m%d%H%M%S%f" - ) - df_components.to_json( - os.path.join( - os.getcwd(), "nmf_result", "x_index_vs_y_col_components.json" - ) - ) + output_fn = datetime.fromtimestamp(time.time()).strftime("%Y%m%d%H%M%S%f") + df_components.to_json(os.path.join(os.getcwd(), "nmf_result", "x_index_vs_y_col_components.json")) df_component_weight_timeseries.to_json( - os.path.join( - os.getcwd(), "nmf_result", "component_index_vs_pratio_col.json" - ) + os.path.join(os.getcwd(), "nmf_result", "component_index_vs_pratio_col.json") ) df_component_weight_timeseries.to_csv( os.path.join( @@ -197,47 +179,27 @@ def tup(s): mode="a", ) df_reconstruction_error.to_json( - os.path.join( - os.getcwd(), "nmf_result", "component_index_vs_RE_value.json" - ) - ) - plot_file1 = os.path.join( - os.getcwd(), "nmf_result", output_fn + "comp_plot.png" - ) - plot_file2 = os.path.join( - os.getcwd(), "nmf_result", output_fn + "ratio_plot.png" - ) - plot_file3 = os.path.join( - os.getcwd(), "nmf_result", output_fn + "loss_plot.png" + os.path.join(os.getcwd(), "nmf_result", "component_index_vs_RE_value.json") ) + plot_file1 = os.path.join(os.getcwd(), "nmf_result", output_fn + "comp_plot.png") + plot_file2 = os.path.join(os.getcwd(), "nmf_result", output_fn + "ratio_plot.png") + plot_file3 = os.path.join(os.getcwd(), "nmf_result", output_fn + "loss_plot.png") if args1.pca_thresh: - plot_file7 = os.path.join( - os.getcwd(), "nmf_result", output_fn + "pca_var_plot.png" - ) + plot_file7 = os.path.join(os.getcwd(), "nmf_result", output_fn + "pca_var_plot.png") plot_file4 = os.path.splitext(plot_file1)[0] + ".pdf" plot_file5 = os.path.splitext(plot_file2)[0] + ".pdf" plot_file6 = os.path.splitext(plot_file3)[0] + ".pdf" if args1.pca_thresh: plot_file8 = os.path.splitext(plot_file7)[0] + ".pdf" - txt_file = os.path.join( - os.getcwd(), "nmf_result", output_fn + "_meta" + ".txt" - ) + txt_file = os.path.join(os.getcwd(), "nmf_result", output_fn + "_meta" + ".txt") with open(txt_file, "w+") as fi: fi.write("NMF Analysis\n\n") - fi.write( - f"{len(df_component_weight_timeseries.columns)} files uploaded for analysis.\n\n" - ) + fi.write(f"{len(df_component_weight_timeseries.columns)} files uploaded for analysis.\n\n") fi.write(f"The selected active r ranges are: {args1.xrange} \n\n") fi.write("Thesholding:\n") - fi.write( - f"\tThe input component threshold was: {args1.threshold}\n" - ) - fi.write( - f"\tThe input improvement threshold was: {args1.improve_thresh}\n" - ) - fi.write( - f"\tThe input # of iterations to run was: {args1.n_iter}\n" - ) + fi.write(f"\tThe input component threshold was: {args1.threshold}\n") + fi.write(f"\tThe input improvement threshold was: {args1.improve_thresh}\n") + fi.write(f"\tThe input # of iterations to run was: {args1.n_iter}\n") fi.write(f"\tWas PCA thresholding used?: {args1.pca_thresh}\n") fi.write(f"{len(df_components.columns)} components were extracted") @@ -253,9 +215,7 @@ def tup(s): fig4.savefig(plot_file8) columns = df_components.columns for i, col in enumerate(columns): - data = np.column_stack( - [df_components.index.to_list(), df_components[col].to_list()] - ) + data = np.column_stack([df_components.index.to_list(), df_components[col].to_list()]) if args1.xrd: np.savetxt( @@ -278,8 +238,7 @@ def tup(s): output_fn + f"_comp{i}" + ".cgr", ), data, - header=f"NMF Generated PDF\nSource: nmfMapping\n" - f"Date: {output_fn}\nr g", + header=f"NMF Generated PDF\nSource: nmfMapping\n" f"Date: {output_fn}\nr g", fmt="%s", ) diff --git a/src/diffpy/nmf_mapping/nmf_mapping_code.py b/src/diffpy/nmf_mapping/nmf_mapping_code.py index 2bcf9e2..638abd4 100644 --- a/src/diffpy/nmf_mapping/nmf_mapping_code.py +++ b/src/diffpy/nmf_mapping/nmf_mapping_code.py @@ -81,9 +81,7 @@ def load_data(dir, xrd=False): x = new_dat[:, 0] y = new_dat[:, 1] if len(x) != len(x_set) or not all(x == x_set): - f = interpolate.interp1d( - x, y, bounds_error=False, fill_value="extrapolate" - ) + f = interpolate.interp1d(x, y, bounds_error=False, fill_value="extrapolate") data_arr[i][:, 1] = f(x_set) data_arr[i][:, 0] = x_set else: @@ -154,8 +152,7 @@ def NMF_decomposition( else: df_list.append( x_vs_y_df_preprocess[ - (x_vs_y_df_preprocess.index >= x_low) - & (x_vs_y_df_preprocess.index <= x_high) + (x_vs_y_df_preprocess.index >= x_low) & (x_vs_y_df_preprocess.index <= x_high) ] ) x_vs_y_df = pd.concat(df_list) @@ -178,9 +175,7 @@ def NMF_decomposition( pca.fit(x_vs_y_df.to_numpy().T) pca_number_components = len(pca.components_) pca_explained_variance = pca.explained_variance_ratio_ - df_explained_var_ratio = pd.DataFrame( - pd.Series(pca_explained_variance) - ) + df_explained_var_ratio = pd.DataFrame(pd.Series(pca_explained_variance)) df_explained_var_ratio.index = df_explained_var_ratio.index + 1 sweeping_grid = range(1, max_comp + 1, 1) for i in sweeping_grid: @@ -193,9 +188,7 @@ def NMF_decomposition( if thresh is None: if improve_thresh is not None: if improve_thresh > 1 or improve_thresh < 0: - raise ValueError( - "Invalid improvement threshold ratio. Must be between 0 and 1." - ) + raise ValueError("Invalid improvement threshold ratio. Must be between 0 and 1.") thresh = nmf_ncomp_selection(nmf_loss, rtol=improve_thresh) elif pca_thresh: thresh = pca_number_components @@ -218,9 +211,7 @@ def NMF_decomposition( nmf_weight /= nmf_weight.sum(1)[:, np.newaxis] nmf_weight = nmf_weight.T nmf_weight = np.array([nmf_weight[s, :] for s in range(n_comp)]) - df_component_weight_timeseries = pd.DataFrame( - nmf_weight, index=range(n_comp) - ) + df_component_weight_timeseries = pd.DataFrame(nmf_weight, index=range(n_comp)) if pca_thresh: return ( @@ -425,14 +416,10 @@ def nmf_ncomp_selection(loss, rtol=None): rtol = 1e-2 (inds,) = np.where(imp_ratio <= rtol) if not list(inds): - print( - "Improvement ratio of 1E-2 not met. Inspect data and impose manual cutoff" - ) + print("Improvement ratio of 1E-2 not met. Inspect data and impose manual cutoff") len(loss) return starting_len if not list(inds): - print( - f"Improvement ratio of {rtol} not met. Inspect data and impose manual cutoff" - ) + print(f"Improvement ratio of {rtol} not met. Inspect data and impose manual cutoff") return starting_len return inds[0] + 1 diff --git a/src/diffpy/version.py b/src/diffpy/version.py index c3bac34..45c8a92 100644 --- a/src/diffpy/version.py +++ b/src/diffpy/version.py @@ -23,8 +23,6 @@ __version__ = get_distribution("diffpy.nmf_mapping").version # we assume that tag_date was used and __version__ ends in YYYYMMDD -__date__ = ( - __version__[-8:-4] + "-" + __version__[-4:-2] + "-" + __version__[-2:] -) +__date__ = __version__[-8:-4] + "-" + __version__[-4:-2] + "-" + __version__[-2:] # End of file diff --git a/tests/test_load_data.py b/tests/test_load_data.py index 6ef96c4..8c2f6a3 100644 --- a/tests/test_load_data.py +++ b/tests/test_load_data.py @@ -13,19 +13,9 @@ def test_load_data(): bfto_dir = os.path.join(dir, "data/synthetic_r_vs_gr") bfto_expected_filenames = np.array([f"synthetic{i}" for i in range(50)]) loaded_filenames = nmf.load_data(bfto_dir)[1] - assert ( - np.testing.assert_array_equal( - loaded_filenames, bfto_expected_filenames - ) - is None - ) + assert np.testing.assert_array_equal(loaded_filenames, bfto_expected_filenames) is None # make sure interpolation to same r-grid is working diff_r_grid_dir = os.path.join(dir, "data", "different_r_grid") diff_r_grid_arr = nmf.load_data(diff_r_grid_dir)[0] - assert ( - np.testing.assert_array_equal( - diff_r_grid_arr[0][:, 0], diff_r_grid_arr[1][:, 0] - ) - is None - ) + assert np.testing.assert_array_equal(diff_r_grid_arr[0][:, 0], diff_r_grid_arr[1][:, 0]) is None