Skip to content

Commit

Permalink
add batch unique bonds df method
Browse files Browse the repository at this point in the history
  • Loading branch information
naik-aakash committed Sep 11, 2024
1 parent 6a3f177 commit b29aa97
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 2 deletions.
41 changes: 41 additions & 0 deletions src/lobsterpy/featurize/batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,14 @@ def _featurizelobsterpy(self, file_name_or_path: str | Path) -> pd.DataFrame:

return featurize_lobsterpy.get_df()

def _featurizeuniquebonds(self, path: str | Path) -> pd.DataFrame:
"""
Featurize Unique bonds identified by Lobsterpy.
:param path: path to root directory consisting of all lobster calc files
"""
return FeaturizeLobsterpy.get_unique_bonds_df(path_to_lobster_calc=path, bonds=self.bonds)

def _featurizecoxx(self, path_to_lobster_calc: str | Path) -> pd.DataFrame:
"""
Featurize COHP/COBI/COOPCAR data using FeaturizeCOXX.
Expand Down Expand Up @@ -310,6 +318,39 @@ def get_df(self) -> pd.DataFrame:

return pd.concat([df_lobsterpy, df_coxx, df_charges], axis=1)

def get_unique_bonds_df(self) -> pd.DataFrame:
"""
Generate a pandas dataframe with unique relevant bonds extracted from LOBSTER files.
Uses multiprocessing to speed up the process.
Returns:
Returns a pandas dataframe
"""
paths = [
os.path.join(self.path_to_lobster_calcs, f)
for f in os.listdir(self.path_to_lobster_calcs)
if not f.startswith("t")
and not f.startswith(".")
and os.path.isdir(os.path.join(self.path_to_lobster_calcs, f))
]

row = []
with (
mp.Pool(processes=self.n_jobs, maxtasksperchild=1) as pool,
tqdm(total=len(paths), desc="Generating COHP unique bonds dataframe") as pbar,
):
for _, result in enumerate(pool.imap_unordered(self._featurizeuniquebonds, paths, chunksize=1)):
pbar.update()
row.append(result)

df = pd.concat(row)
df.sort_index(inplace=True) # noqa: PD002
df.fillna(0, inplace=True) # noqa: PD002

return df


class BatchCoxxFingerprint:
"""
Expand Down
4 changes: 2 additions & 2 deletions src/lobsterpy/featurize/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -322,8 +322,8 @@ def get_lobsterpy_cba_dict(path_to_lobster_calc: str | Path, bonds: str, orbital
def get_unique_bonds_df(
path_to_lobster_calc: str | Path,
bonds: str,
summed_icohps: bool,
rm_weighted_icohps: bool,
summed_icohps: bool = False,
rm_weighted_icohps: bool = False,
ids: str | None = None,
) -> pd.DataFrame:
"""
Expand Down

0 comments on commit b29aa97

Please sign in to comment.