From b29aa97d2f995692c331b3637679506570eaa4e4 Mon Sep 17 00:00:00 2001 From: naik-aakash Date: Wed, 11 Sep 2024 17:04:28 +0200 Subject: [PATCH] add batch unique bonds df method --- src/lobsterpy/featurize/batch.py | 41 ++++++++++++++++++++++++++++++++ src/lobsterpy/featurize/core.py | 4 ++-- 2 files changed, 43 insertions(+), 2 deletions(-) diff --git a/src/lobsterpy/featurize/batch.py b/src/lobsterpy/featurize/batch.py index 3bbb8c46..e3635834 100644 --- a/src/lobsterpy/featurize/batch.py +++ b/src/lobsterpy/featurize/batch.py @@ -128,6 +128,14 @@ def _featurizelobsterpy(self, file_name_or_path: str | Path) -> pd.DataFrame: return featurize_lobsterpy.get_df() + def _featurizeuniquebonds(self, path: str | Path) -> pd.DataFrame: + """ + Featurize Unique bonds identified by Lobsterpy. + + :param path: path to root directory consisting of all lobster calc files + """ + return FeaturizeLobsterpy.get_unique_bonds_df(path_to_lobster_calc=path, bonds=self.bonds) + def _featurizecoxx(self, path_to_lobster_calc: str | Path) -> pd.DataFrame: """ Featurize COHP/COBI/COOPCAR data using FeaturizeCOXX. @@ -310,6 +318,39 @@ def get_df(self) -> pd.DataFrame: return pd.concat([df_lobsterpy, df_coxx, df_charges], axis=1) + def get_unique_bonds_df(self) -> pd.DataFrame: + """ + Generate a pandas dataframe with unique relevant bonds extracted from LOBSTER files. + + Uses multiprocessing to speed up the process. + + Returns: + Returns a pandas dataframe + + """ + paths = [ + os.path.join(self.path_to_lobster_calcs, f) + for f in os.listdir(self.path_to_lobster_calcs) + if not f.startswith("t") + and not f.startswith(".") + and os.path.isdir(os.path.join(self.path_to_lobster_calcs, f)) + ] + + row = [] + with ( + mp.Pool(processes=self.n_jobs, maxtasksperchild=1) as pool, + tqdm(total=len(paths), desc="Generating COHP unique bonds dataframe") as pbar, + ): + for _, result in enumerate(pool.imap_unordered(self._featurizeuniquebonds, paths, chunksize=1)): + pbar.update() + row.append(result) + + df = pd.concat(row) + df.sort_index(inplace=True) # noqa: PD002 + df.fillna(0, inplace=True) # noqa: PD002 + + return df + class BatchCoxxFingerprint: """ diff --git a/src/lobsterpy/featurize/core.py b/src/lobsterpy/featurize/core.py index e1d35f93..8ede5d90 100644 --- a/src/lobsterpy/featurize/core.py +++ b/src/lobsterpy/featurize/core.py @@ -322,8 +322,8 @@ def get_lobsterpy_cba_dict(path_to_lobster_calc: str | Path, bonds: str, orbital def get_unique_bonds_df( path_to_lobster_calc: str | Path, bonds: str, - summed_icohps: bool, - rm_weighted_icohps: bool, + summed_icohps: bool = False, + rm_weighted_icohps: bool = False, ids: str | None = None, ) -> pd.DataFrame: """