From 4840bbcb575de3659e4b582dfef097aa63af53f8 Mon Sep 17 00:00:00 2001 From: robinzyb <38876805+robinzyb@users.noreply.github.com> Date: Thu, 7 Sep 2023 14:12:19 +0200 Subject: [PATCH 1/8] Update README.md for recommendation of using cp2kdata --- README.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 1e86a7196..67942ac8f 100644 --- a/README.md +++ b/README.md @@ -74,8 +74,10 @@ The `System` or `LabeledSystem` can be constructed from the following file forma | gaussian| log | True | True | LabeledSystem | 'gaussian/md' | | siesta | output | False | True | LabeledSystem | 'siesta/output'| | siesta | aimd_output | True | True | LabeledSystem | 'siesta/aimd_output' | -| cp2k | output | False | True | LabeledSystem | 'cp2k/output' | -| cp2k | aimd_output | True | True | LabeledSystem | 'cp2k/aimd_output' | +| cp2k(deprecated in future) | output | False | True | LabeledSystem | 'cp2k/output' | +| cp2k(deprecated in future) | aimd_output | True | True | LabeledSystem | 'cp2k/aimd_output' | +| cp2k([plug-in](https://github.com/robinzyb/cp2kdata#plug-in-for-dpdata)) | stdout | False | True | LabeledSystem | 'cp2kdata/e_f' | +| cp2k([plug-in](https://github.com/robinzyb/cp2kdata#plug-in-for-dpdata)) | stdout | True | True | LabeledSystem | 'cp2kdata/md' | | QE | log | False | True | LabeledSystem | 'qe/pw/scf' | | QE | log | True | False | System | 'qe/cp/traj' | | QE | log | True | True | LabeledSystem | 'qe/cp/traj' | From 03cc010a9ae73b11471b7872870482ef4b6d7345 Mon Sep 17 00:00:00 2001 From: robinzyb <38876805+robinzyb@users.noreply.github.com> Date: Sat, 21 Oct 2023 00:50:51 +0200 Subject: [PATCH 2/8] add decrecaption warning when dpdata throws errors while parsing cp2k --- dpdata/plugins/cp2k.py | 50 ++++++++++++++++++++++++++++-------------- 1 file changed, 33 insertions(+), 17 deletions(-) diff --git a/dpdata/plugins/cp2k.py b/dpdata/plugins/cp2k.py index 1f09adaef..875c344bf 100644 --- a/dpdata/plugins/cp2k.py +++ b/dpdata/plugins/cp2k.py @@ -5,28 +5,44 @@ from dpdata.format import Format +string_warning = """ +Hi, you got an error from dpdata, +please check if your cp2k files include full information, +otherwise its version is not supported by dpdata. +Try use dpdata plugin from cp2kdata package, +for details, please refer to +https://robinzyb.github.io/cp2kdata/ +""" + @Format.register("cp2k/aimd_output") class CP2KAIMDOutputFormat(Format): def from_labeled_system(self, file_name, restart=False, **kwargs): - xyz_file = sorted(glob.glob(f"{file_name}/*pos*.xyz"))[0] - log_file = sorted(glob.glob(f"{file_name}/*.log"))[0] - return tuple(Cp2kSystems(log_file, xyz_file, restart)) + try: + xyz_file = sorted(glob.glob(f"{file_name}/*pos*.xyz"))[0] + log_file = sorted(glob.glob(f"{file_name}/*.log"))[0] + return tuple(Cp2kSystems(log_file, xyz_file, restart)) + except : + raise PendingDeprecationWarning(string_warning) @Format.register("cp2k/output") class CP2KOutputFormat(Format): def from_labeled_system(self, file_name, restart=False, **kwargs): - data = {} - ( - data["atom_names"], - data["atom_numbs"], - data["atom_types"], - data["cells"], - data["coords"], - data["energies"], - data["forces"], - tmp_virial, - ) = dpdata.cp2k.output.get_frames(file_name) - if tmp_virial is not None: - data["virials"] = tmp_virial - return data + try: + data = {} + ( + data["atom_names"], + data["atom_numbs"], + data["atom_types"], + data["cells"], + data["coords"], + data["energies"], + data["forces"], + tmp_virial, + ) = dpdata.cp2k.output.get_frames(file_name) + if tmp_virial is not None: + data["virials"] = tmp_virial + return data + except: + raise PendingDeprecationWarning(string_warning) + \ No newline at end of file From 621d686ac4f1df8423ea21594584cf5f0748b932 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 20 Oct 2023 22:53:54 +0000 Subject: [PATCH 3/8] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- dpdata/plugins/cp2k.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/dpdata/plugins/cp2k.py b/dpdata/plugins/cp2k.py index 875c344bf..83e0b330b 100644 --- a/dpdata/plugins/cp2k.py +++ b/dpdata/plugins/cp2k.py @@ -4,9 +4,8 @@ from dpdata.cp2k.output import Cp2kSystems from dpdata.format import Format - string_warning = """ -Hi, you got an error from dpdata, +Hi, you got an error from dpdata, please check if your cp2k files include full information, otherwise its version is not supported by dpdata. Try use dpdata plugin from cp2kdata package, @@ -14,6 +13,7 @@ https://robinzyb.github.io/cp2kdata/ """ + @Format.register("cp2k/aimd_output") class CP2KAIMDOutputFormat(Format): def from_labeled_system(self, file_name, restart=False, **kwargs): @@ -21,7 +21,7 @@ def from_labeled_system(self, file_name, restart=False, **kwargs): xyz_file = sorted(glob.glob(f"{file_name}/*pos*.xyz"))[0] log_file = sorted(glob.glob(f"{file_name}/*.log"))[0] return tuple(Cp2kSystems(log_file, xyz_file, restart)) - except : + except: raise PendingDeprecationWarning(string_warning) @@ -45,4 +45,3 @@ def from_labeled_system(self, file_name, restart=False, **kwargs): return data except: raise PendingDeprecationWarning(string_warning) - \ No newline at end of file From 1a4b98522a462eaf08dff222cca438760561b44b Mon Sep 17 00:00:00 2001 From: robinzyb <38876805+robinzyb@users.noreply.github.com> Date: Mon, 30 Oct 2023 14:34:50 +0100 Subject: [PATCH 4/8] update error for cp2k aimdoutput if none pattern is matched --- dpdata/cp2k/output.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/dpdata/cp2k/output.py b/dpdata/cp2k/output.py index 0b08c51ef..dc4d21118 100644 --- a/dpdata/cp2k/output.py +++ b/dpdata/cp2k/output.py @@ -76,6 +76,7 @@ def __next__(self): def get_log_block_generator(self): lines = [] delimiter_flag = False + yield_flag = False while True: line = self.log_file_object.readline() if line: @@ -91,15 +92,20 @@ def get_log_block_generator(self): if any(p.match(line) for p in avail_patterns): delimiter_flag = True else: + if not yield_flag: + raise StopIteration("None of the delimiter patterns are matched") break if delimiter_flag is True: raise RuntimeError("This file lacks some content, please check") def get_xyz_block_generator(self): p3 = re.compile(r"^\s*(\d+)\s*") + yield_flag = False while True: line = self.xyz_file_object.readline() if not line: + if not yield_flag: + raise StopIteration("None of the xyz patterns are matched") break if p3.match(line): atom_num = int(p3.match(line).group(1)) From 7a09854a85505460cb7ed1cb330264fa17c4ebb0 Mon Sep 17 00:00:00 2001 From: robinzyb <38876805+robinzyb@users.noreply.github.com> Date: Mon, 30 Oct 2023 20:38:23 +0100 Subject: [PATCH 5/8] fixed typo in checking cp2k pattern match. --- dpdata/cp2k/output.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dpdata/cp2k/output.py b/dpdata/cp2k/output.py index dc4d21118..0c8df6d78 100644 --- a/dpdata/cp2k/output.py +++ b/dpdata/cp2k/output.py @@ -83,6 +83,7 @@ def get_log_block_generator(self): lines.append(line) if any(p.match(line) for p in delimiter_patterns): if delimiter_flag is True: + yield_flag = True yield lines lines = [] delimiter_flag = False @@ -108,6 +109,7 @@ def get_xyz_block_generator(self): raise StopIteration("None of the xyz patterns are matched") break if p3.match(line): + yield_flag = True atom_num = int(p3.match(line).group(1)) lines = [] lines.append(line) From cec3fc26a079f976601fe74dc83178f885502ae1 Mon Sep 17 00:00:00 2001 From: robinzyb <38876805+robinzyb@users.noreply.github.com> Date: Mon, 30 Oct 2023 20:51:26 +0100 Subject: [PATCH 6/8] add exact except for the cp2k plugin. --- dpdata/plugins/cp2k.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/dpdata/plugins/cp2k.py b/dpdata/plugins/cp2k.py index 83e0b330b..eb1b75e96 100644 --- a/dpdata/plugins/cp2k.py +++ b/dpdata/plugins/cp2k.py @@ -21,8 +21,9 @@ def from_labeled_system(self, file_name, restart=False, **kwargs): xyz_file = sorted(glob.glob(f"{file_name}/*pos*.xyz"))[0] log_file = sorted(glob.glob(f"{file_name}/*.log"))[0] return tuple(Cp2kSystems(log_file, xyz_file, restart)) - except: - raise PendingDeprecationWarning(string_warning) + except (StopIteration, RuntimeError): + # StopIteration is raised when pattern match is failed + print(string_warning) @Format.register("cp2k/output") @@ -43,5 +44,8 @@ def from_labeled_system(self, file_name, restart=False, **kwargs): if tmp_virial is not None: data["virials"] = tmp_virial return data - except: - raise PendingDeprecationWarning(string_warning) + #TODO: in the future, we should add exact error type here + #TODO: when pattern match is failed + #TODO: For now just use RuntimeError as a placeholder. + except RuntimeError: + print(string_warning) From 3ee373dd80adb9e471026c6956afc661461e37fa Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 30 Oct 2023 19:51:57 +0000 Subject: [PATCH 7/8] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- dpdata/plugins/cp2k.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dpdata/plugins/cp2k.py b/dpdata/plugins/cp2k.py index eb1b75e96..7574e5465 100644 --- a/dpdata/plugins/cp2k.py +++ b/dpdata/plugins/cp2k.py @@ -44,8 +44,8 @@ def from_labeled_system(self, file_name, restart=False, **kwargs): if tmp_virial is not None: data["virials"] = tmp_virial return data - #TODO: in the future, we should add exact error type here - #TODO: when pattern match is failed - #TODO: For now just use RuntimeError as a placeholder. + # TODO: in the future, we should add exact error type here + # TODO: when pattern match is failed + # TODO: For now just use RuntimeError as a placeholder. except RuntimeError: print(string_warning) From 5028af6961f82ed368655730e0bbaa6ed0704672 Mon Sep 17 00:00:00 2001 From: robinzyb <38876805+robinzyb@users.noreply.github.com> Date: Mon, 30 Oct 2023 21:28:21 +0100 Subject: [PATCH 8/8] raise PendingDeprecationWarning --- dpdata/plugins/cp2k.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/dpdata/plugins/cp2k.py b/dpdata/plugins/cp2k.py index 7574e5465..162098f70 100644 --- a/dpdata/plugins/cp2k.py +++ b/dpdata/plugins/cp2k.py @@ -17,13 +17,13 @@ @Format.register("cp2k/aimd_output") class CP2KAIMDOutputFormat(Format): def from_labeled_system(self, file_name, restart=False, **kwargs): + xyz_file = sorted(glob.glob(f"{file_name}/*pos*.xyz"))[0] + log_file = sorted(glob.glob(f"{file_name}/*.log"))[0] try: - xyz_file = sorted(glob.glob(f"{file_name}/*pos*.xyz"))[0] - log_file = sorted(glob.glob(f"{file_name}/*.log"))[0] return tuple(Cp2kSystems(log_file, xyz_file, restart)) - except (StopIteration, RuntimeError): + except (StopIteration, RuntimeError) as e: # StopIteration is raised when pattern match is failed - print(string_warning) + raise PendingDeprecationWarning(string_warning) from e @Format.register("cp2k/output") @@ -47,5 +47,5 @@ def from_labeled_system(self, file_name, restart=False, **kwargs): # TODO: in the future, we should add exact error type here # TODO: when pattern match is failed # TODO: For now just use RuntimeError as a placeholder. - except RuntimeError: - print(string_warning) + except RuntimeError as e: + raise PendingDeprecationWarning(string_warning) from e