Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 50 additions & 4 deletions dpdata/vasp/outcar.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,35 @@
import numpy as np


def _safe_float(value: str, context: str = "") -> float:
"""Safely convert string to float with informative error message.

Parameters
----------
value : str
String value to convert to float
context : str, optional
Context information for error message

Returns
-------
float
Converted float value

Raises
------
ValueError
If conversion fails, with informative error message
"""
try:
return float(value)
except ValueError as e:
if context:
raise ValueError(f"Failed to parse {context}: {e}") from e
else:
raise ValueError(f"Failed to convert to float: {e}") from e


def atom_name_from_potcar_string(instr: str) -> str:
"""Get atom name from a potcar element name.

Expand Down Expand Up @@ -240,12 +269,20 @@ def analyze_block(lines, ntot, nelm, ml=False):
if sc_index >= nelm:
is_converge = False
elif energy_token[ml_index] in ii:
energy = float(ii.split()[energy_index[ml_index]])
try:
energy = _safe_float(ii.split()[energy_index[ml_index]], "energy value from OUTCAR")
except (ValueError, IndexError) as e:
raise ValueError(f"Failed to parse energy from OUTCAR line: '{ii.strip()}'. {e}") from e
return coord, cell, energy, force, virial, is_converge
elif cell_token[ml_index] in ii:
for dd in range(3):
tmp_l = lines[idx + cell_index[ml_index] + dd]
cell.append([float(ss) for ss in tmp_l.replace("-", " -").split()[0:3]])
try:
cell_row = [_safe_float(ss, f"cell component on line {idx + cell_index[ml_index] + dd + 1}")
for ss in tmp_l.replace("-", " -").split()[0:3]]
cell.append(cell_row)
except (ValueError, IndexError) as e:
raise ValueError(f"Failed to parse cell vectors from OUTCAR line: '{tmp_l.strip()}'. {e}") from e
elif virial_token[ml_index] in ii:
in_kB_index = virial_index[ml_index]
while idx + in_kB_index < len(lines) and (
Expand All @@ -255,7 +292,12 @@ def analyze_block(lines, ntot, nelm, ml=False):
assert idx + in_kB_index < len(lines), (
'ERROR: "in kB" is not found in OUTCAR. Unable to extract virial.'
)
tmp_v = [float(ss) for ss in lines[idx + in_kB_index].split()[2:8]]
virial_line = lines[idx + in_kB_index]
try:
tmp_v = [_safe_float(ss, "virial component from OUTCAR")
for ss in virial_line.split()[2:8]]
except (ValueError, IndexError) as e:
raise ValueError(f"Failed to parse virial from OUTCAR line: '{virial_line.strip()}'. {e}") from e
virial = np.zeros([3, 3])
virial[0][0] = tmp_v[0]
virial[1][1] = tmp_v[1]
Expand All @@ -269,7 +311,11 @@ def analyze_block(lines, ntot, nelm, ml=False):
elif "TOTAL-FORCE" in ii and (("ML" in ii) == ml):
for jj in range(idx + 2, idx + 2 + ntot):
tmp_l = lines[jj]
info = [float(ss) for ss in tmp_l.split()]
try:
info = [_safe_float(ss, "force/coordinate component from OUTCAR")
for ss in tmp_l.split()]
except (ValueError, IndexError) as e:
raise ValueError(f"Failed to parse forces/coordinates from OUTCAR line: '{tmp_l.strip()}'. {e}") from e
coord.append(info[:3])
force.append(info[3:6])
return coord, cell, energy, force, virial, is_converge
42 changes: 39 additions & 3 deletions dpdata/vasp/poscar.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,35 @@
import numpy as np


def _safe_float(value: str, context: str = "") -> float:
"""Safely convert string to float with informative error message.

Parameters
----------
value : str
String value to convert to float
context : str, optional
Context information for error message

Returns
-------
float
Converted float value

Raises
------
ValueError
If conversion fails, with informative error message
"""
try:
return float(value)
except ValueError as e:
if context:
raise ValueError(f"Failed to parse {context}: {e}") from e
else:
raise ValueError(f"Failed to convert to float: {e}") from e


def _to_system_data_lower(lines, cartesian=True, selective_dynamics=False):
def move_flag_mapper(flag):
if flag == "T":
Expand All @@ -17,19 +46,26 @@ def move_flag_mapper(flag):
system = {}
system["atom_names"] = [str(ii) for ii in lines[5].split()]
system["atom_numbs"] = [int(ii) for ii in lines[6].split()]
scale = float(lines[1])
scale = _safe_float(lines[1].strip(), "scale factor from POSCAR line 2")
cell = []
move_flags = []
for ii in range(2, 5):
boxv = [float(jj) for jj in lines[ii].split()]
line_content = lines[ii].split()
try:
boxv = [_safe_float(jj, f"cell vector component on line {ii+1}") for jj in line_content]
except ValueError as e:
raise ValueError(f"Failed to parse cell vectors in POSCAR: {e}") from e
boxv = np.array(boxv) * scale
cell.append(boxv)
system["cells"] = [np.array(cell)]
natoms = sum(system["atom_numbs"])
coord = []
for ii in range(8, 8 + natoms):
tmp = lines[ii].split()
tmpv = [float(jj) for jj in tmp[:3]]
try:
tmpv = [_safe_float(jj, f"coordinate component on line {ii+1}") for jj in tmp[:3]]
except ValueError as e:
raise ValueError(f"Failed to parse coordinates in POSCAR: {e}") from e
if cartesian:
tmpv = np.array(tmpv) * scale
else:
Expand Down
182 changes: 182 additions & 0 deletions tests/test_vasp_float_conversion_fix.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,182 @@
#!/usr/bin/env python3
"""Test suite for the string-to-float conversion bug fix in VASP parsing.

This tests the fix for issue #611: "could not convert string to float"
"""

import os
import tempfile
import unittest

from dpdata import System


class TestVaspFloatConversionFix(unittest.TestCase):
"""Test robust error handling for malformed VASP files."""

def setUp(self):
"""Set up temporary files for testing."""
self.temp_files = []

def tearDown(self):
"""Clean up temporary files."""
for temp_file in self.temp_files:
try:
os.unlink(temp_file)
except FileNotFoundError:
pass

def _create_temp_file(self, content, suffix='.poscar'):
"""Create a temporary file with given content."""
with tempfile.NamedTemporaryFile(mode='w', suffix=suffix, delete=False) as f:
f.write(content)
f.flush()
self.temp_files.append(f.name)
return f.name

def test_poscar_invalid_scale_factor(self):
"""Test POSCAR parsing with invalid scale factor."""
poscar_content = """Test
INVALID_SCALE
1.0000000000000000 0.0000000000000000 0.0000000000000000
0.0000000000000000 1.0000000000000000 0.0000000000000000
0.0000000000000000 0.0000000000000000 1.0000000000000000
Mg
1
Cartesian
0.0000000000000000 0.0000000000000000 0.0000000000000000
"""
temp_file = self._create_temp_file(poscar_content)

with self.assertRaises(ValueError) as cm:
System(temp_file, fmt='vasp/poscar')

error_msg = str(cm.exception)
self.assertIn("Failed to parse scale factor from POSCAR line 2", error_msg)
self.assertIn("INVALID_SCALE", error_msg)

def test_poscar_invalid_cell_vector(self):
"""Test POSCAR parsing with invalid cell vector component."""
poscar_content = """Test
1.0
INVALID_CELL 0.0000000000000000 0.0000000000000000
0.0000000000000000 1.0000000000000000 0.0000000000000000
0.0000000000000000 0.0000000000000000 1.0000000000000000
Mg
1
Cartesian
0.0000000000000000 0.0000000000000000 0.0000000000000000
"""
temp_file = self._create_temp_file(poscar_content)

with self.assertRaises(ValueError) as cm:
System(temp_file, fmt='vasp/poscar')

error_msg = str(cm.exception)
self.assertIn("Failed to parse cell vectors in POSCAR", error_msg)
self.assertIn("INVALID_CELL", error_msg)

def test_poscar_invalid_coordinates(self):
"""Test POSCAR parsing with invalid coordinate component."""
poscar_content = """Test
1.0
1.0000000000000000 0.0000000000000000 0.0000000000000000
0.0000000000000000 1.0000000000000000 0.0000000000000000
0.0000000000000000 0.0000000000000000 1.0000000000000000
Mg
1
Cartesian
INVALID_COORD 0.0000000000000000 0.0000000000000000
"""
temp_file = self._create_temp_file(poscar_content)

with self.assertRaises(ValueError) as cm:
System(temp_file, fmt='vasp/poscar')

error_msg = str(cm.exception)
self.assertIn("Failed to parse coordinates in POSCAR", error_msg)
self.assertIn("INVALID_COORD", error_msg)

def test_poscar_empty_scale_factor(self):
"""Test POSCAR parsing with empty scale factor."""
poscar_content = """Test

1.0000000000000000 0.0000000000000000 0.0000000000000000
0.0000000000000000 1.0000000000000000 0.0000000000000000
0.0000000000000000 0.0000000000000000 1.0000000000000000
Mg
1
Cartesian
0.0000000000000000 0.0000000000000000 0.0000000000000000
"""
temp_file = self._create_temp_file(poscar_content)

with self.assertRaises(ValueError) as cm:
System(temp_file, fmt='vasp/poscar')

error_msg = str(cm.exception)
self.assertIn("Failed to parse scale factor from POSCAR line 2", error_msg)

def test_poscar_insufficient_cell_components(self):
"""Test POSCAR parsing with insufficient cell vector components."""
poscar_content = """Test
1.0
1.0000000000000000 0.0000000000000000
0.0000000000000000 1.0000000000000000 0.0000000000000000
0.0000000000000000 0.0000000000000000 1.0000000000000000
Mg
1
Cartesian
0.0000000000000000 0.0000000000000000 0.0000000000000000
"""
temp_file = self._create_temp_file(poscar_content)

with self.assertRaises((ValueError, IndexError)):
System(temp_file, fmt='vasp/poscar')

def test_poscar_valid_file_still_works(self):
"""Test that valid POSCAR files still work correctly."""
poscar_content = """Test System
1.0
5.0000000000000000 0.0000000000000000 0.0000000000000000
0.0000000000000000 5.0000000000000000 0.0000000000000000
0.0000000000000000 0.0000000000000000 5.0000000000000000
Mg
1
Cartesian
0.0000000000000000 0.0000000000000000 0.0000000000000000
"""
temp_file = self._create_temp_file(poscar_content)

# This should not raise any exception
sys = System(temp_file, fmt='vasp/poscar')

# Verify the system was parsed correctly
self.assertEqual(len(sys), 1) # 1 frame
self.assertEqual(sys.get_natoms(), 1) # 1 atom
self.assertEqual(sys["atom_names"], ["Mg"])

def test_poscar_special_characters_in_numbers(self):
"""Test POSCAR parsing with special characters that might appear in malformed files."""
poscar_content = """Test
1.0
1.0000000000000000 0.0000000000000000 0.0000000000000000
0.0000000000000000 1.0000000000000000 0.0000000000000000
0.0000000000000000 0.0000000000000000 1.0000000000000000
Mg
1
Cartesian
0.0000000000000000 0.0000000000000000 *INVALID*
"""
temp_file = self._create_temp_file(poscar_content)

with self.assertRaises(ValueError) as cm:
System(temp_file, fmt='vasp/poscar')

error_msg = str(cm.exception)
self.assertIn("Failed to parse coordinates in POSCAR", error_msg)
self.assertIn("*INVALID*", error_msg)


if __name__ == '__main__':
unittest.main()