Skip to content

Commit

Permalink
fix parse_vasp_dir checking for gzipped VASP files
Browse files Browse the repository at this point in the history
plus code clean up
  • Loading branch information
janosh committed May 4, 2024
1 parent 6408de5 commit 455f4d8
Show file tree
Hide file tree
Showing 6 changed files with 49 additions and 66 deletions.
4 changes: 2 additions & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ default_install_hook_types: [pre-commit, commit-msg]

repos:
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.4.1
rev: v0.4.3
hooks:
- id: ruff
args: [--fix]
Expand Down Expand Up @@ -46,7 +46,7 @@ repos:
- svelte

- repo: https://github.com/pre-commit/mirrors-eslint
rev: v9.1.1
rev: v9.2.0
hooks:
- id: eslint
types: [file]
Expand Down
2 changes: 1 addition & 1 deletion chgnet/data/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ def from_vasp(
Default = True
"""
result_dict = utils.parse_vasp_dir(
file_root=file_root,
base_dir=file_root,
check_electronic_convergence=check_electronic_convergence,
save_path=save_path,
)
Expand Down
61 changes: 21 additions & 40 deletions chgnet/utils/vasp_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from typing import TYPE_CHECKING

from monty.io import reverse_readfile
from monty.os.path import zpath
from pymatgen.io.vasp.outputs import Oszicar, Vasprun

from chgnet.utils import write_json
Expand All @@ -14,7 +15,7 @@


def parse_vasp_dir(
file_root: str,
base_dir: str,
check_electronic_convergence: bool = True,
save_path: str | None = None,
) -> dict[str, list]:
Expand All @@ -23,25 +24,20 @@ def parse_vasp_dir(
plz modify the code if magnetization is for (y) and (z).
Args:
file_root (str): the directory of the VASP calculation outputs
base_dir (str): the directory of the VASP calculation outputs
check_electronic_convergence (bool): if set to True, this function will raise
Exception to VASP calculation that did not achieve electronic convergence.
Default = True
save_path (str): path to save the parsed VASP labels
"""
if os.path.exists(file_root) is False:
raise FileNotFoundError("No such file or directory")

if os.path.exists(f"{file_root}/OSZICAR"):
oszicar_path = f"{file_root}/OSZICAR"
vasprun_path = f"{file_root}/vasprun.xml"
outcar_path = f"{file_root}/OUTCAR"
elif os.path.exists(f"{file_root}/OSZICAR"):
oszicar_path = f"{file_root}/OSZICAR.gz"
vasprun_path = f"{file_root}/vasprun.xml.gz"
outcar_path = f"{file_root}/OUTCAR.gz"
else:
raise RuntimeError(f"No data parsed from {file_root}!")
if os.path.isdir(base_dir) is False:
raise FileNotFoundError(f"{base_dir=} is not a directory")

oszicar_path = zpath(f"{base_dir}/OSZICAR")
vasprun_path = zpath(f"{base_dir}/vasprun.xml")
outcar_path = zpath(f"{base_dir}/OUTCAR")
if not os.path.exists(oszicar_path) or not os.path.exists(vasprun_path):
raise RuntimeError(f"No data parsed from {base_dir}!")

oszicar = Oszicar(oszicar_path)
vasprun_orig = Vasprun(
Expand All @@ -53,12 +49,7 @@ def parse_vasp_dir(
exception_on_bad_xml=False,
)

charge = []
mag_x = []
mag_y = []
mag_z = []
header = []
all_lines = []
charge, mag_x, mag_y, mag_z, header, all_lines = [], [], [], [], [], []

for line in reverse_readfile(outcar_path):
clean = line.strip()
Expand All @@ -67,10 +58,8 @@ def parse_vasp_dir(
all_lines.reverse()
# For single atom systems, VASP doesn't print a total line, so
# reverse parsing is very difficult
read_charge = False
read_mag_x = False
read_mag_y = False # for SOC calculations only
read_mag_z = False
# for SOC calculations only
read_charge = read_mag_x = read_mag_y = read_mag_z = False
mag_x_all = []
ion_step_count = 0

Expand All @@ -97,32 +86,24 @@ def parse_vasp_dir(
elif clean.startswith("tot"):
if ion_step_count == (len(mag_x_all) + 1):
mag_x_all.append(mag_x)
read_charge = False
read_mag_x = False
read_mag_y = False
read_mag_z = False
read_charge = read_mag_x = read_mag_y = read_mag_z = False
if clean == "total charge":
read_charge = True
read_mag_x, read_mag_y, read_mag_z = False, False, False
read_mag_x = read_mag_y = read_mag_z = False
elif clean == "magnetization (x)":
mag_x = []
read_mag_x = True
read_charge, read_mag_y, read_mag_z = False, False, False
read_charge = read_mag_y = read_mag_z = False
elif clean == "magnetization (y)":
mag_y = []
read_mag_y = True
read_charge, read_mag_x, read_mag_z = False, False, False
read_charge = read_mag_x = read_mag_z = False
elif clean == "magnetization (z)":
mag_z = []
read_mag_z = True
read_charge, read_mag_x, read_mag_y = False, False, False
read_charge = read_mag_x = read_mag_y = False
elif re.search("electrostatic", clean):
read_charge, read_mag_x, read_mag_y, read_mag_z = (
False,
False,
False,
False,
)
read_charge = read_mag_x = read_mag_y = read_mag_z = False

if len(oszicar.ionic_steps) == len(mag_x_all): # unfinished VASP job
print("Unfinished OUTCAR")
Expand Down Expand Up @@ -157,7 +138,7 @@ def parse_vasp_dir(
dataset["stress"].append(ionic_step["stress"])

if dataset["uncorrected_total_energy"] == []:
raise RuntimeError(f"No data parsed from {file_root}!")
raise RuntimeError(f"No data parsed from {base_dir}!")

if save_path is not None:
save_dict = dataset.copy()
Expand Down
35 changes: 17 additions & 18 deletions examples/fine_tuning.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,9 @@
"from chgnet.utils import parse_vasp_dir\n",
"\n",
"# ./my_vasp_calc_dir contains vasprun.xml OSZICAR etc.\n",
"dataset_dict = parse_vasp_dir(file_root=\"./my_vasp_calc_dir\", save_path='./my_vasp_calc_dir/chgnet_dataset.json')\n",
"dataset_dict = parse_vasp_dir(\n",
" file_root=\"./my_vasp_calc_dir\", save_path=\"./my_vasp_calc_dir/chgnet_dataset.json\"\n",
")\n",
"print(list(dataset_dict))"
]
},
Expand All @@ -78,11 +80,11 @@
"id": "6",
"metadata": {},
"source": [
"The parsed python dictionary includes information for CHGNet inputs (structures), and CHGNet prediction labels (energy, force, stress ,magmom). \n",
"The parsed python dictionary includes information for CHGNet inputs (structures), and CHGNet prediction labels (energy, force, stress ,magmom).\n",
"\n",
"we can save the parsed structures and labels to disk, so that they can be easily reloaded during multiple rounds of training.\n",
"\n",
"The json file can be saved by providing the save_path"
"The json file can be saved by providing the save_path\n"
]
},
{
Expand All @@ -94,9 +96,7 @@
"\n",
"Below are the example codes to save the structures in either json, pickle, cif, or CHGNet graph.\n",
"\n",
"For super-large training dataset, like MPtrj dataset, we recommend [converting them to CHGNet graphs](https://github.com/CederGroupHub/chgnet/blob/main/examples/make_graphs.py). This will save significant memory and graph computing time.\n",
"\n",
"\n"
"For super-large training dataset, like MPtrj dataset, we recommend [converting them to CHGNet graphs](https://github.com/CederGroupHub/chgnet/blob/main/examples/make_graphs.py). This will save significant memory and graph computing time.\n"
]
},
{
Expand Down Expand Up @@ -166,7 +166,7 @@
"id": "11",
"metadata": {},
"source": [
"If you have parsed your VASP labels from step 0, you can reload the saved json file."
"If you have parsed your VASP labels from step 0, you can reload the saved json file.\n"
]
},
{
Expand All @@ -178,20 +178,20 @@
"source": [
"from chgnet.utils import read_json\n",
"\n",
"dataset_dict = read_json('./my_vasp_calc_dir/chgnet_dataset.json')\n",
"structures = [Structure.from_dict(struct) for struct in dataset_dict['structure']]\n",
"energies = dataset_dict['energy_per_atom']\n",
"dataset_dict = read_json(\"./my_vasp_calc_dir/chgnet_dataset.json\")\n",
"structures = [Structure.from_dict(struct) for struct in dataset_dict[\"structure\"]]\n",
"energies = dataset_dict[\"energy_per_atom\"]\n",
"forces = dataset_dict[\"force\"]\n",
"stresses = None if result_dict['stress'] in [None, []] else result_dict['stress']\n",
"magmoms = None if result_dict['magmom'] in [None, []] else result_dict['magmom']"
"stresses = None if result_dict[\"stress\"] in [None, []] else result_dict[\"stress\"]\n",
"magmoms = None if result_dict[\"magmom\"] in [None, []] else result_dict[\"magmom\"]"
]
},
{
"cell_type": "markdown",
"id": "13",
"metadata": {},
"source": [
"If you don't have any DFT calculations now, we can create a dummy fine-tuning dataset by using CHGNet prediction with some random noise."
"If you don't have any DFT calculations now, we can create a dummy fine-tuning dataset by using CHGNet prediction with some random noise.\n"
]
},
{
Expand Down Expand Up @@ -238,7 +238,7 @@
"metadata": {},
"source": [
"Note that the stress output from CHGNet is in unit of GPa, here the -10 unit conversion\n",
"modifies it to be kbar in VASP raw unit. \n",
"modifies it to be kbar in VASP raw unit.\n",
"If you're using stress labels from VASP, you don't need to do any unit conversions\n",
"StructureData dataset class takes in VASP units.\n"
]
Expand Down Expand Up @@ -294,7 +294,7 @@
"metadata": {},
"source": [
"Alternatively, the dataset can be directly created from VASP calculation dir.\n",
"This function essentially parse the VASP directory first, save the labels to json file, and create the StructureData class"
"This function essentially parse the VASP directory first, save the labels to json file, and create the StructureData class\n"
]
},
{
Expand All @@ -305,8 +305,7 @@
"outputs": [],
"source": [
"dataset = StructureData.from_vasp(\n",
" file_root=\"./my_vasp_calc_dir\", \n",
" save_path='./my_vasp_calc_dir/chgnet_dataset.json'\n",
" file_root=\"./my_vasp_calc_dir\", save_path=\"./my_vasp_calc_dir/chgnet_dataset.json\"\n",
")"
]
},
Expand Down Expand Up @@ -838,7 +837,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.7"
"version": "3.11.7"
}
},
"nbformat": 4,
Expand Down
11 changes: 7 additions & 4 deletions tests/test_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,19 +228,22 @@ def test_model_load_version_params(
assert model.version == v030_key
assert model.n_params == v030_params
stdout, stderr = capsys.readouterr()
expected_stdout = lambda version, params: (
f"CHGNet v{version} initialized with {params:,} parameters\n"

assert stdout == (
f"CHGNet v{v030_key} initialized with {v030_params:,} parameters\n"
"CHGNet will run on cpu\n"
)
assert stdout == expected_stdout(v030_key, v030_params)
assert stderr == ""

v020_key, v020_params = "0.2.0", 400_438
model = CHGNet.load(model_name=v020_key, use_device="cpu")
assert model.version == v020_key
assert model.n_params == v020_params
stdout, stderr = capsys.readouterr()
assert stdout == expected_stdout(v020_key, v020_params)
assert stdout == (
f"CHGNet v{v020_key} initialized with {v020_params:,} parameters\n"
"CHGNet will run on cpu\n"
)
assert stderr == ""

model_name = "0.1.0" # invalid
Expand Down
2 changes: 1 addition & 1 deletion tests/test_vasp_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ def test_parse_vasp_dir_without_magmoms(tmp_path: Path):

def test_parse_vasp_dir_no_data():
# test non-existing directory
with pytest.raises(FileNotFoundError, match="No such file or directory"):
with pytest.raises(FileNotFoundError, match="is not a directory"):
parse_vasp_dir(f"{ROOT}/tests/files/non-existent")

# test existing directory without VASP files
Expand Down

0 comments on commit 455f4d8

Please sign in to comment.