diff --git a/dpgen2/entrypoint/submit.py b/dpgen2/entrypoint/submit.py index 3e1e81bc..c49a3669 100644 --- a/dpgen2/entrypoint/submit.py +++ b/dpgen2/entrypoint/submit.py @@ -713,7 +713,9 @@ def submit_concurrent_learning( "conf_selector", selector, ) - wf_config["inputs"]["do_finetune"] = False + # the modify-train-script step will be added as reuse step. + # the following hack is not needed anymore. + # wf_config["inputs"]["do_finetune"] = False # finetune will not be done again if the old process is reused. wf = Workflow(name="dpgen") @@ -759,6 +761,7 @@ def get_resubmit_keys( [ "prep-train", "run-train", + "modify-train-script", "prep-lmp", "run-lmp", "select-confs", diff --git a/dpgen2/op/run_dp_train.py b/dpgen2/op/run_dp_train.py index d3a2cc7e..aaf3f258 100644 --- a/dpgen2/op/run_dp_train.py +++ b/dpgen2/op/run_dp_train.py @@ -298,11 +298,10 @@ def skip_training( iter_data, finetune_mode, ): - # we have init model and no iter data, skip training - if finetune_mode is not None and ( - finetune_mode == "train-init" or finetune_mode == "finetune" - ): + # do not skip if we do finetuning + if finetune_mode is not None and finetune_mode == "finetune": return False + # we have init model and no iter data, skip training if (init_model is not None) and (iter_data is None or len(iter_data) == 0): with set_directory(work_dir): with open(train_script_name, "w") as fp: diff --git a/dpgen2/utils/dflow_query.py b/dpgen2/utils/dflow_query.py index f3a136de..366817f3 100644 --- a/dpgen2/utils/dflow_query.py +++ b/dpgen2/utils/dflow_query.py @@ -37,6 +37,8 @@ def matched_step_key( if ( re.match(f"iter-[0-9]*--{jj}-[0-9]*", kk) or re.match(f"iter-[0-9]*--{jj}", kk) + or re.match(f"finetune--{jj}-[0-9]*", kk) + or re.match(f"finetune--{jj}", kk) or re.match(f"init--{jj}", kk) ): ret.append(kk) @@ -116,11 +118,16 @@ def find_slice_ranges( status = "not-found" for idx, ii in enumerate(keys): if status == "not-found": - if re.match(f"iter-[0-9]*--{sliced_subkey}-[0-9]*", ii): + if re.match(f"iter-[0-9]*--{sliced_subkey}-[0-9]*", ii) or re.match( + f"finetune--{sliced_subkey}-[0-9]*", ii + ): status = "found" tmp_range.append(idx) elif status == "found": - if not re.match(f"iter-[0-9]*--{sliced_subkey}-[0-9]*", ii): + if not ( + re.match(f"iter-[0-9]*--{sliced_subkey}-[0-9]*", ii) + or re.match(f"finetune--{sliced_subkey}-[0-9]*", ii) + ): status = "not-found" tmp_range.append(idx) found_range.append(tmp_range) diff --git a/tests/fake_data_set.py b/tests/fake_data_set.py index 8b25c67e..5148b49e 100644 --- a/tests/fake_data_set.py +++ b/tests/fake_data_set.py @@ -10,7 +10,7 @@ def fake_system( ss = dpdata.LabeledSystem() ss.data["atom_names"] = [atom_name] ss.data["atom_numbs"] = [natoms] - ss.data["atom_types"] = [0 for ii in range(natoms)] + ss.data["atom_types"] = np.array([0 for ii in range(natoms)]).astype(int) # ss.data['cells'] = np.zeros([nframes, 3, 3]) ss.data["cells"] = np.tile(np.eye(3), [nframes, 1, 1]) ss.data["coords"] = np.zeros([nframes, natoms, 3]) diff --git a/tests/utils/test_dflow_query.py b/tests/utils/test_dflow_query.py index c24a8b7a..7013ce40 100644 --- a/tests/utils/test_dflow_query.py +++ b/tests/utils/test_dflow_query.py @@ -61,6 +61,12 @@ # isort: on dpgen_keys = [ + "finetune--prep-train", + "finetune--run-train-0002", + "finetune--run-train-0000", + "finetune--run-train-0001", + "finetune--modify-train-script", + "finetune--prep-run-train", "init--scheduler", "init--id", "iter-000000--prep-train", @@ -222,6 +228,12 @@ def test_sort_slice_ops(self): def test_sort_slice_ops(self): expected_output = [ + "finetune--prep-train", + "finetune--run-train-0000", + "finetune--run-train-0001", + "finetune--run-train-0002", + "finetune--modify-train-script", + "finetune--prep-run-train", "init--scheduler", "init--id", "iter-000000--prep-train", @@ -260,16 +272,20 @@ def test_sort_slice_ops(self): def test_print_keys(self): expected_output = [ - " 0 : init--scheduler", - " 1 : init--id", - " 2 : iter-000000--prep-train", - " 3 -> 5 : iter-000000--run-train-0000 -> iter-000000--run-train-0002", - " 6 : iter-000000--prep-run-train", + " 0 : finetune--prep-train", + " 1 -> 3 : finetune--run-train-0000 -> finetune--run-train-0002", + " 4 : finetune--modify-train-script", + " 5 : finetune--prep-run-train", + " 6 : init--scheduler", + " 7 : init--id", + " 8 : iter-000000--prep-train", + " 9 -> 11 : iter-000000--run-train-0000 -> iter-000000--run-train-0002", + " 12 : iter-000000--prep-run-train", ] expected_output = "\n".join(expected_output + [""]) ret = print_keys_in_nice_format( - dpgen_keys[:7], + dpgen_keys[:13], ["run-train", "run-lmp", "run-fp"], idx_fmt_len=8, )