-
Notifications
You must be signed in to change notification settings - Fork 28
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #242 from kaueltzen/structure_path
[WIP] File path handling in featurizer module
- Loading branch information
Showing
9 changed files
with
324 additions
and
403 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,86 @@ | ||
# Copyright (c) lobsterpy development team | ||
# Distributed under the terms of a BSD 3-Clause "New" or "Revised" License | ||
|
||
"""This package provides the modules for featurzing Lobster data ready for ML studies.""" | ||
from __future__ import annotations | ||
|
||
from pathlib import Path | ||
|
||
from monty.os.path import zpath | ||
|
||
|
||
def get_file_paths( | ||
path_to_lobster_calc: str | Path = "", requested_files: list[str] = [], use_lso_dos: bool = True | ||
) -> dict: | ||
""" | ||
Get file paths for LobsterPy featurizations, raise Exception if not all of requested paths exist. | ||
:param path_to_lobster_calc: path to root LOBSTER calc directory | ||
:param requested_files: files to return paths for. | ||
:param use_lso_dos: solely required for BatchDosFeaturizer. | ||
Will force featurizer to use DOSCAR.LSO.lobster instead of DOSCAR.lobster. | ||
:return: dict that assigns each item of requested_files its path | ||
""" | ||
default_values = { | ||
"poscar": "POSCAR", | ||
"cohpcar": "COHPCAR.lobster", | ||
"icohplist": "ICOHPLIST.lobster", | ||
"cobicar": "COBICAR.lobster", | ||
"icobilist": "ICOBILIST.lobster", | ||
"coopcar": "COOPCAR.lobster", | ||
"icooplist": "ICOOPLIST.lobster", | ||
"charge": "CHARGE.lobster", | ||
"madelung": "MadelungEnergies.lobster", | ||
"doscar": ("DOSCAR.LSO.lobster" if use_lso_dos else "DOSCAR.lobster"), | ||
} | ||
|
||
lobster_path = Path(path_to_lobster_calc) | ||
file_paths = {} | ||
missing_files = [] | ||
|
||
for file in requested_files: | ||
file_str = default_values.get(file) | ||
file_str = file_str if isinstance(file_str, str) else file | ||
if file == "poscar": | ||
try: | ||
file_paths[file] = get_structure_path(lobster_path=lobster_path) | ||
except Exception: | ||
missing_files.append(default_values["poscar"]) | ||
else: | ||
file_path = lobster_path / file_str | ||
if file_path.exists(): | ||
file_paths[file] = file_path | ||
else: | ||
gz_file_path = Path(zpath(file_path)) | ||
if gz_file_path.exists(): | ||
file_paths[file] = gz_file_path | ||
else: | ||
missing_files.append(default_values[file]) | ||
|
||
if missing_files: | ||
raise Exception(f"Files {missing_files} not found in {lobster_path.name}.") | ||
|
||
return file_paths | ||
|
||
|
||
def get_structure_path(lobster_path: Path) -> Path: | ||
""" | ||
Search iteratively for (unzipped / zipped) structure file. | ||
POSCAR is prioritized over POSCAR.lobster. | ||
:param lobster_path: path to root LOBSTER calc directory | ||
:return: path to structure file | ||
""" | ||
for filename in ["POSCAR", "POSCAR.lobster"]: | ||
poscar_path = lobster_path / filename | ||
if poscar_path.exists(): | ||
return poscar_path | ||
gz_file_path = Path(zpath(poscar_path)) | ||
if gz_file_path.exists(): | ||
return gz_file_path | ||
|
||
raise Exception |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
import gzip | ||
import shutil | ||
from pathlib import Path | ||
|
||
from pymatgen.core import Structure | ||
|
||
from lobsterpy.featurize import get_file_paths, get_structure_path | ||
|
||
CurrentDir = Path(__file__).absolute().parent | ||
TestDir = CurrentDir / "../" | ||
|
||
|
||
def test_get_structure_path(tmp_path): | ||
""" | ||
Tests that POSCAR path is returned by get_structure_path function. | ||
Tests that in case of both LOBSTER and VASP structure files present, the VASP | ||
file is read. | ||
""" | ||
with ( | ||
gzip.open(TestDir / "test_data/test_structure_path_handling/POSCAR.gz", "rb") as zipped_poscar, | ||
open(tmp_path / "POSCAR", "wb") as unzipped_poscar, | ||
): | ||
shutil.copyfileobj(zipped_poscar, unzipped_poscar) | ||
|
||
poscar_path_unzipped = get_structure_path(lobster_path=tmp_path) | ||
assert isinstance(poscar_path_unzipped, Path) | ||
|
||
poscar_path_both = get_structure_path(lobster_path=TestDir / "test_data/test_structure_path_handling") | ||
assert isinstance(poscar_path_both, Path) | ||
|
||
elements = Structure.from_file(poscar_path_both).elements | ||
assert "Zn" not in [el.symbol for el in elements] | ||
|
||
|
||
def test_get_file_paths(tmp_path): | ||
""" | ||
Tests that dict of str: Path is returned by get_file_paths(). | ||
""" | ||
file_paths_zipped = get_file_paths( | ||
path_to_lobster_calc=TestDir / "test_data/BaTaO2N1", | ||
requested_files=["poscar", "cohpcar", "charge", "icohplist"], | ||
) | ||
for key, value in file_paths_zipped.items(): | ||
assert isinstance(key, str) | ||
assert isinstance(value, Path) | ||
|
||
for file in ["COHPCAR.lobster", "ICOHPLIST.lobster"]: | ||
with ( | ||
gzip.open(TestDir / f"test_data/BaTaO2N1/{file}.gz", "rb") as zipped_file, | ||
open(tmp_path / file, "wb") as unzipped_file, | ||
): | ||
shutil.copyfileobj(zipped_file, unzipped_file) | ||
|
||
file_paths_unzipped = get_file_paths(path_to_lobster_calc=tmp_path, requested_files=["cohpcar", "icohplist"]) | ||
for key, value in file_paths_unzipped.items(): | ||
assert isinstance(key, str) | ||
assert isinstance(value, Path) |
Binary file not shown.
Binary file not shown.