Fix issue with MD missing step 0 and capturing memory estimate (#127)

- Add new capture block for step 0 (**N.B.** does not have `Iteration` in line) - If `memory_est` in MD step, push it up to top level. - Add `get_only` function to retrieve sole value from sequence and check it is alone. - Add new test of MD parser.
oerc0122 · Nov 29, 2024 · 208155d · 208155d
1 parent 16ba99c
commit 208155d
Show file tree

Hide file tree

Showing 8 changed files with 2,774 additions and 42 deletions.
diff --git a/castep_outputs/parsers/castep_file_parser.py b/castep_outputs/parsers/castep_file_parser.py
@@ -59,6 +59,7 @@
     atreg_to_index,
     determine_type,
     fix_data_types,
+    get_only,
     log_factory,
     normalise_key,
     normalise_string,
@@ -458,7 +459,7 @@ def parse_castep_file(castep_file_in: TextIO,
             block = Block.from_re(line, castep_file,
                                   "",
                                   "^-+ <-- SCF", n_end=ncut*3)
-            data = parse_castep_file(block, Filters.HIGH | Filters.SCF)[0]
+            data = get_only(parse_castep_file(block, Filters.HIGH | Filters.SCF))
 
             scf = data.pop("scf")
             curr_run["bsc_energies"] = data.pop("energies")
@@ -960,21 +961,29 @@ def parse_castep_file(castep_file_in: TextIO,
             curr_run["elf"] = _process_elf(block)
 
         # MD Block
-        elif block := Block.from_re(line, castep_file,
+        elif ((block := Block.from_re(line, castep_file,  # Capture general MD step
                                     "Starting MD iteration",
-                                    "finished MD iteration"):
+                                     "finished MD iteration")) or
+              (block := Block.from_re(line, castep_file,  # Capture 0th iteration
+                                     "Starting MD",
+                                     gen_table_re("", "=+")))):
 
             if Filters.MD not in to_parse:
                 continue
 
-            logger("Found MD Block (step %d)", len(curr_run["md"])+1)
+            logger("Found MD Block (step %d)", len(curr_run["md"]))
 
             # Avoid infinite recursion
             next(block)
-            data = parse_castep_file(block)[0]
+            data = get_only(parse_castep_file(block))
             add_aliases(data, {"initial_positions": "positions",
                                "initial_cell": "cell"},
                         replace=True)
+
+            # Put memory estimate to top level
+            if "memory_estimate" in data:
+                curr_run["memory_estimate"] = data.pop("memory_estimate")
+
             curr_run["md"].append(data)
 
         elif block := Block.from_re(line, castep_file,
@@ -1025,7 +1034,7 @@ def parse_castep_file(castep_file_in: TextIO,
             logger("Found geom block (iteration %d)", len(curr_run["geom_opt"]["iterations"])+1)
             # Avoid infinite recursion
             next(block)
-            data = parse_castep_file(block)[0]
+            data = get_only(parse_castep_file(block))
 
             add_aliases(data, {"initial_positions": "positions",
                                "initial_cell": "cell"},

diff --git a/castep_outputs/test/gen_data.py b/castep_outputs/test/gen_data.py
@@ -3,13 +3,20 @@
 
 from castep_outputs.cli.castep_outputs_main import parse_all
 
-for typ in ('castep', 'bands', 'cell', 'param', 'elastic', 'md', 'ts', 'efield',
-            'den_fmt', 'chdiff_fmt', 'pot_fmt', 'elf_fmt', 'xrd_sf', 'phonon_dos',
-            'magres', 'efield', 'tddft', 'err', 'phonon'):
+for type_ in ('castep', 'bands', 'cell', 'param', 'elastic', 'md', 'ts', 'efield',
+              'den_fmt', 'chdiff_fmt', 'pot_fmt', 'elf_fmt', 'xrd_sf', 'phonon_dos',
+              'magres', 'efield', 'tddft', 'err', ('pp-md', 'castep')):
     for fmt in ('json', 'yaml'):
-        print(typ, fmt)
+        print(type_, fmt)
+        if isinstance(type_, tuple):
+            name, typ = type_
+            in_name = name
+        else:
+            name = typ = type_
+            in_name = "test"
+
         # Delete existing
-        pth = pathlib.Path(f"{typ}.{fmt}")
+        pth = pathlib.Path(f"{name}.{fmt}")
         if pth.exists():
             pth.unlink()
-        parse_all(output=str(pth), out_format=fmt, **{typ: [f"test.{typ}"]})
+        parse_all(output=str(pth), out_format=fmt, **{typ: [f"{in_name}.{typ}"]})
diff --git a/castep_outputs/test/pp-md.castep b/castep_outputs/test/pp-md.castep