From 815c9ce93ffd5b44e0204f35c58d4795c9414bf2 Mon Sep 17 00:00:00 2001 From: dingzhaohan Date: Mon, 26 Apr 2021 14:44:18 +0800 Subject: [PATCH 01/23] add delete_apg interface --- dpgen/dispatcher/ALI.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/dpgen/dispatcher/ALI.py b/dpgen/dispatcher/ALI.py index ef68322be..ef4f850b2 100644 --- a/dpgen/dispatcher/ALI.py +++ b/dpgen/dispatcher/ALI.py @@ -84,6 +84,21 @@ def manual_delete(stage): os.remove("apg_id.json") print("delete successfully!") +def delete_apg(stage): + fp = open("machine-ali.json") + data = json.load(fp) + mdata_machine = data[stage][0]["machine"] + mdata_resources = data[stage][0]["resources"] + cloud_resources = mdata_machine["cloud_resources"] + ali = ALI(mdata_machine, mdata_resources, "work_path", [1], 1, cloud_resources) + fp = open("apg_id.json") + data = json.load(fp) + ali.cloud_resources["apg_id"] = data["apg_id"] + ali.delete_apg() + os.remove("apg_id.json") + print("delete successfully!") + + class ALI(DispatcherList): def __init__(self, mdata_machine, mdata_resources, work_path, run_tasks, group_size, cloud_resources=None): super().__init__(mdata_machine, mdata_resources, work_path, run_tasks, group_size, cloud_resources) From 7f86e32c8b6a309cbe7569e7adbe4cbc120b8f97 Mon Sep 17 00:00:00 2001 From: tuoping <80671886+tuoping@users.noreply.github.com> Date: Wed, 28 Jul 2021 09:19:05 +0800 Subject: [PATCH 02/23] Add .github/workflow/mirror_to_gitee.yml (#475) * make doc directory for toy models; deploy doc in a branch gh-doc * add contribution guide to doc; add conf.py in doc * move doc dependence from requirements to doc/requirements * deploy on readthedocs instead of github * add mirror_gitee.yml in workflow * test action at push * test action at push * test action at push * test action at push * test action mirror * test action mirror * typo * use https in mirror workflow * use https in mirror workflow * use https in mirror workflow * mirror to gitee * mirror to gitee * Update .github/workflows/mirror_gitee.yml Co-authored-by: Jinzhe Zeng Co-authored-by: tuoping Co-authored-by: Jinzhe Zeng --- .github/workflows/mirror_gitee.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/mirror_gitee.yml b/.github/workflows/mirror_gitee.yml index 35d6d0c52..b298d071b 100644 --- a/.github/workflows/mirror_gitee.yml +++ b/.github/workflows/mirror_gitee.yml @@ -15,5 +15,5 @@ jobs: ORGANIZATION: deepmodeling SSH_PRIVATE_KEY: ${{ secrets.SYNC_GITEE_PRIVATE_KEY }} with: - source-repo: "git@github.com:deepmodeling/dpgen.git" + source-repo: "https://github.com/deepmodeling/dpgen.git" destination-repo: "git@gitee.com:deepmodeling/dpgen.git" From 6537a72e27835a782ab293e7c26d6ca1d521a65b Mon Sep 17 00:00:00 2001 From: Ericwang6 Date: Fri, 30 Jul 2021 12:14:45 +0800 Subject: [PATCH 03/23] add model_devi_engine gromacs --- dpgen/generator/run.py | 109 ++++++++++++++++++++++++----------------- 1 file changed, 64 insertions(+), 45 deletions(-) diff --git a/dpgen/generator/run.py b/dpgen/generator/run.py index ceaaed9b2..37d3753dc 100644 --- a/dpgen/generator/run.py +++ b/dpgen/generator/run.py @@ -1083,6 +1083,16 @@ def _make_model_devi_native_gromacs(iter_index, jdata, mdata, conf_systems): else: model_devi_dt = jdata['model_devi_dt'] nsteps = cur_job.get("nsteps", None) + lambdas = cur_job.get("lambdas", []) + temps = cur_job.get("temps", []) + if not lambdas: + lambdas = [1.0] + else: + for ll in lambdas: + if ll > 1: + raise RuntimeError("lambda is larger than 1.0") + if not temps: + temps = [298.0] if nsteps is None: raise RuntimeError("nsteps is None, you should set nsteps in model_devi_jobs!") # Currently Gromacs engine is not supported for different temperatures! @@ -1108,48 +1118,50 @@ def _make_model_devi_native_gromacs(iter_index, jdata, mdata, conf_systems): conf_counter = 0 task_counter = 0 for cc in ss : - task_name = make_model_devi_task_name(sys_idx[sys_counter], task_counter) - #conf_name = make_model_devi_conf_name(sys_idx[sys_counter], conf_counter) + '.lmp' - task_path = os.path.join(work_path, task_name) - # dlog.info(task_path) - create_path(task_path) - #create_path(os.path.join(task_path, 'traj')) - #loc_conf_name = 'conf.lmp' - gromacs_settings = jdata.get("gromacs_settings" , "") - for key,file in gromacs_settings.items(): - if key != "traj_filename" and key != "mdp_filename": - os.symlink(os.path.join(cc,file), os.path.join(task_path, file)) - - # input.json for DP-Gromacs - with open(os.path.join(cc, "input.json")) as f: - input_json = json.load(f) - input_json["graph_file"] = models[0] - with open(os.path.join(task_path,'input.json'), 'w') as _outfile: - json.dump(input_json, _outfile, indent = 4) - - # trj_freq - trj_freq = cur_job.get("trj_freq", 10) - mdp = MDP() - mdp.read(os.path.join(cc, gromacs_settings['mdp_filename'])) - mdp['nstcomm'] = trj_freq - mdp['nstxout'] = trj_freq - mdp['nstlog'] = trj_freq - mdp['nstenergy'] = trj_freq - # dt - mdp['dt'] = dt - mdp.write(os.path.join(task_path, gromacs_settings['mdp_filename'])) - - cwd_ = os.getcwd() - os.chdir(task_path) - job = {} - - job["model_devi_dt"] = model_devi_dt - job["nsteps"] = nsteps - with open('job.json', 'w') as _outfile: - json.dump(job, _outfile, indent = 4) - os.chdir(cwd_) - - task_counter += 1 + for ll in lambdas: + for tt in temps: + task_name = make_model_devi_task_name(sys_idx[sys_counter], task_counter) + task_path = os.path.join(work_path, task_name) + create_path(task_path) + gromacs_settings = jdata.get("gromacs_settings" , "") + for key,file in gromacs_settings.items(): + if key != "traj_filename" and key != "mdp_filename": + os.symlink(os.path.join(cc,file), os.path.join(task_path, file)) + # input.json for DP-Gromacs + with open(os.path.join(cc, "input.json")) as f: + input_json = json.load(f) + input_json["graph_file"] = models[0] + input_json["lambda"] = ll + with open(os.path.join(task_path,'input.json'), 'w') as _outfile: + json.dump(input_json, _outfile, indent = 4) + + # trj_freq + trj_freq = cur_job.get("trj_freq", 10) + mdp = MDP() + mdp.read(os.path.join(cc, gromacs_settings['mdp_filename'])) + mdp['nstcomm'] = trj_freq + mdp['nstxout'] = trj_freq + mdp['nstlog'] = trj_freq + mdp['nstenergy'] = trj_freq + # dt + mdp['dt'] = model_devi_dt + # temps + if "ref_t" in list(mdp.keys()): + mdp["ref_t"] = tt + else: + mdp["ref-t"] = tt + mdp.write(os.path.join(task_path, gromacs_settings['mdp_filename'])) + + cwd_ = os.getcwd() + os.chdir(task_path) + job = {} + job["trj_freq"] = cur_job["trj_freq"] + job["model_devi_dt"] = model_devi_dt + job["nsteps"] = nsteps + with open('job.json', 'w') as _outfile: + json.dump(job, _outfile, indent = 4) + os.chdir(cwd_) + task_counter += 1 conf_counter += 1 sys_counter += 1 @@ -1208,21 +1220,28 @@ def run_model_devi (iter_index, if use_plm_path: forward_files += ['plmpath.pdb'] elif model_devi_engine == "gromacs": + gromacs_settings = jdata.get("gromacs_settings", {}) mdp_filename = gromacs_settings.get("mdp_filename", "md.mdp") topol_filename = gromacs_settings.get("topol_filename", "processed.top") conf_filename = gromacs_settings.get("conf_filename", "conf.gro") index_filename = gromacs_settings.get("index_filename", "index.raw") + # Initial reference to process pbc condition. + # Default is em.tpr + ref_filename = gromacs_settings.get("ref_filename", "em.tpr") deffnm = gromacs_settings.get("deffnm", "deepmd") maxwarn = gromacs_settings.get("maxwarn", 1) + traj_filename = gromacs_settings.get("traj_filename", "deepmd_traj.gro") nsteps = cur_job["nsteps"] command = "%s grompp -f %s -p %s -c %s -o %s -maxwarn %d" % (lmp_exec, mdp_filename, topol_filename, conf_filename, deffnm, maxwarn) command += "&& %s mdrun -deffnm %s -nsteps %d" %(lmp_exec, deffnm, nsteps) + command += "&& echo -e \"MOL\nMOL\n\" | %s trjconv -s %s -f %s.trr -o %s -pbc mol -ur compact -center" % (lmp_exec, ref_filename, deffnm, traj_filename) + command += "&& python model_devi.py %s" % traj_filename commands = [command] - - forward_files = [mdp_filename, topol_filename, conf_filename, index_filename, "input.json" ] - backward_files = ["%s.tpr" % deffnm, "%s.log" %deffnm , 'model_devi.out', 'model_devi.log'] + + forward_files = [mdp_filename, topol_filename, conf_filename, index_filename, ref_filename, "input.json", "model_devi.py", "job.json" ] + backward_files = ["%s.tpr" % deffnm, "%s.log" %deffnm , traj_filename, 'model_devi.out', 'model_devi.log', "traj" ] cwd = os.getcwd() From 19bdd4653e8d562200e318137d7aade589865588 Mon Sep 17 00:00:00 2001 From: Ericwang6 Date: Sat, 31 Jul 2021 00:46:27 +0800 Subject: [PATCH 04/23] add support for gromacs + deepmd-kit-2.0 --- dpgen/generator/run.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/dpgen/generator/run.py b/dpgen/generator/run.py index 37d3753dc..a3720bc95 100644 --- a/dpgen/generator/run.py +++ b/dpgen/generator/run.py @@ -1073,6 +1073,9 @@ def _make_model_devi_native(iter_index, jdata, mdata, conf_systems): sys_counter += 1 def _make_model_devi_native_gromacs(iter_index, jdata, mdata, conf_systems): + # only support for deepmd v2.0 + if LooseVersion(mdata['deepmd_version']) < LooseVersion('2.0'): + raise RuntimeError("Only support deepmd-kit 2.x for model_devi_engine='gromacs'") model_devi_jobs = jdata['model_devi_jobs'] if (iter_index >= len(model_devi_jobs)) : return False @@ -1172,7 +1175,7 @@ def run_model_devi (iter_index, mdata) : #rmdlog.info("This module has been run !") lmp_exec = mdata['lmp_command'] - # Angus: lmp_exec name should be changed to model_devi_exec. + # Anguse: lmp_exec name should be changed to model_devi_exec. # We should also change make_dispatcher # For now, I will use this name for gromacs command @@ -1232,16 +1235,20 @@ def run_model_devi (iter_index, deffnm = gromacs_settings.get("deffnm", "deepmd") maxwarn = gromacs_settings.get("maxwarn", 1) traj_filename = gromacs_settings.get("traj_filename", "deepmd_traj.gro") + grp_name = gromacs_settings.get("group_name", "Other") nsteps = cur_job["nsteps"] + trj_freq = cur_job.get("trj_freq", 10) command = "%s grompp -f %s -p %s -c %s -o %s -maxwarn %d" % (lmp_exec, mdp_filename, topol_filename, conf_filename, deffnm, maxwarn) command += "&& %s mdrun -deffnm %s -nsteps %d" %(lmp_exec, deffnm, nsteps) - command += "&& echo -e \"MOL\nMOL\n\" | %s trjconv -s %s -f %s.trr -o %s -pbc mol -ur compact -center" % (lmp_exec, ref_filename, deffnm, traj_filename) - command += "&& python model_devi.py %s" % traj_filename + command += "&& echo -e \"%s\n%s\n\" | %s trjconv -s %s -f %s.trr -o %s -pbc mol -ur compact -center" % (grp_name, grp_name, lmp_exec, ref_filename, deffnm, traj_filename) + command += "&& if [ ! -d traj ]; then \n mkdir traj; fi\n" + command += f"python -c \"import dpdata;system = dpdata.System('{traj_filename}', fmt='gromacs/gro'); [system.to_gromacs_gro('traj/%d.gromacstrj' % (i * {trj_freq}), frame_idx=i) for i in range(system.get_nframes())]; system.to_deepmd_npy('traj_deepmd')\"" + command += "&& dp model-devi -m ../graph.000.pb ../graph.001.pb ../graph.002.pb ../graph.003.pb -s traj_deepmd -o model_devi.out" commands = [command] - forward_files = [mdp_filename, topol_filename, conf_filename, index_filename, ref_filename, "input.json", "model_devi.py", "job.json" ] - backward_files = ["%s.tpr" % deffnm, "%s.log" %deffnm , traj_filename, 'model_devi.out', 'model_devi.log', "traj" ] + forward_files = [mdp_filename, topol_filename, conf_filename, index_filename, ref_filename, "input.json", "job.json" ] + backward_files = ["%s.tpr" % deffnm, "%s.log" %deffnm , traj_filename, 'model_devi.out', "traj", "traj_deepmd" ] cwd = os.getcwd() From 28c0d351f329bf4838a2836296d3b2ac4d2e2b87 Mon Sep 17 00:00:00 2001 From: Ericwang6 Date: Sat, 31 Jul 2021 14:44:14 +0800 Subject: [PATCH 05/23] Add support for different-charge model_devi systems in gromacs+gaussian and bug fix in training_resue_stop_batch keyword --- dpgen/generator/lib/gaussian.py | 7 ++++- dpgen/generator/run.py | 47 ++++++++++++++++++++++++--------- 2 files changed, 41 insertions(+), 13 deletions(-) diff --git a/dpgen/generator/lib/gaussian.py b/dpgen/generator/lib/gaussian.py index 71f2b5405..ee7e2828f 100644 --- a/dpgen/generator/lib/gaussian.py +++ b/dpgen/generator/lib/gaussian.py @@ -114,8 +114,13 @@ def make_gaussian_input(sys_data, fp_params): keywords = [keywords] else: keywords = keywords.copy() + # assume default charge is zero and default spin multiplicity is 1 - charge = fp_params.get('charge', 0) + if 'charge' in sys_data.keys(): + charge = sys_data['charge'] + else: + charge = fp_params.get('charge', 0) + use_fragment_guesses = False multiplicity = fp_params.get('multiplicity', 'auto') if type(multiplicity) == int: diff --git a/dpgen/generator/run.py b/dpgen/generator/run.py index a3720bc95..eadc3a371 100644 --- a/dpgen/generator/run.py +++ b/dpgen/generator/run.py @@ -186,9 +186,12 @@ def dump_to_poscar(dump, poscar, type_map, fmt = "lammps/dump") : sys = dpdata.System(dump, fmt = fmt, type_map = type_map) sys.to_vasp_poscar(poscar) -def dump_to_deepmd_raw(dump, deepmd_raw, type_map, fmt='gromacs/gro'): +def dump_to_deepmd_raw(dump, deepmd_raw, type_map, fmt='gromacs/gro', charge=None): system = dpdata.System(dump, fmt = fmt, type_map = type_map) system.to_deepmd_raw(deepmd_raw) + if charge is not None: + with open(os.path.join(deepmd_raw, "charge"), 'w+') as f: + f.write(str(charge)) def make_train (iter_index, @@ -208,7 +211,14 @@ def make_train (iter_index, training_init_model = jdata.get('training_init_model', False) training_reuse_iter = jdata.get('training_reuse_iter') training_reuse_old_ratio = jdata.get('training_reuse_old_ratio', None) - training_reuse_stop_batch = jdata.get('training_reuse_stop_batch', 400000) + + if 'training_reuse_stop_batch' in jdata.keys(): + training_reuse_stop_batch = jdata['training_reuse_stop_batch'] + elif 'training_reuse_numb_steps' in jdata.keys(): + training_reuse_stop_batch = jdata['training_reuse_numb_steps'] + else: + training_reuse_stop_batch = 40000 + training_reuse_start_lr = jdata.get('training_reuse_start_lr', 1e-4) training_reuse_start_pref_e = jdata.get('training_reuse_start_pref_e', 0.1) training_reuse_start_pref_f = jdata.get('training_reuse_start_pref_f', 100) @@ -349,10 +359,12 @@ def make_train (iter_index, # set training reuse model if training_reuse_iter is not None and iter_index >= training_reuse_iter: if LooseVersion('1') <= LooseVersion(mdata["deepmd_version"]) < LooseVersion('2'): + jinput['training']['stop_batch'] = training_reuse_stop_batch jinput['training']['auto_prob_style'] \ ="prob_sys_size; 0:%d:%f; %d:%d:%f" \ %(old_range, training_reuse_old_ratio, old_range, len(init_data_sys), 1.-training_reuse_old_ratio) elif LooseVersion('2') <= LooseVersion(mdata["deepmd_version"]) < LooseVersion('3'): + jinput['training']['numb_steps'] = training_reuse_stop_batch jinput['training']['training_data']['auto_prob'] \ ="prob_sys_size; 0:%d:%f; %d:%d:%f" \ %(old_range, training_reuse_old_ratio, old_range, len(init_data_sys), 1.-training_reuse_old_ratio) @@ -363,7 +375,7 @@ def make_train (iter_index, if jinput['loss'].get('start_pref_f') is not None: jinput['loss']['start_pref_f'] = training_reuse_start_pref_f jinput['learning_rate']['start_lr'] = training_reuse_start_lr - jinput['training']['stop_batch'] = training_reuse_stop_batch + for ii in range(numb_models) : task_path = os.path.join(work_path, train_task_fmt % ii) @@ -1128,7 +1140,7 @@ def _make_model_devi_native_gromacs(iter_index, jdata, mdata, conf_systems): create_path(task_path) gromacs_settings = jdata.get("gromacs_settings" , "") for key,file in gromacs_settings.items(): - if key != "traj_filename" and key != "mdp_filename": + if key != "traj_filename" and key != "mdp_filename" and key != "group_name": os.symlink(os.path.join(cc,file), os.path.join(task_path, file)) # input.json for DP-Gromacs with open(os.path.join(cc, "input.json")) as f: @@ -1374,6 +1386,10 @@ def _make_fp_vasp_inner (modd_path, system_index.sort() fp_tasks = [] + + charges_recorder = [] # record charges for each fp_task + charges_map = jdata.get("sys_charges", []) + cluster_cutoff = jdata['cluster_cutoff'] if jdata.get('use_clusters', False) else None # skip save *.out if detailed_report_make_fp is False, default is True detailed_report_make_fp = jdata.get("detailed_report_make_fp", True) @@ -1487,11 +1503,11 @@ def _make_fp_vasp_inner (modd_path, continue if fp_cluster_vacuum is not None: - assert fp_cluster_vacuum >0 - skip_cluster = check_cluster(conf_name, fp_cluster_vacuum) - if skip_cluster: - count_bad_cluster +=1 - continue + assert fp_cluster_vacuum >0 + skip_cluster = check_cluster(conf_name, fp_cluster_vacuum) + if skip_cluster: + count_bad_cluster +=1 + continue # link job.json job_name = os.path.join(tt, "job.json") @@ -1507,6 +1523,8 @@ def _make_fp_vasp_inner (modd_path, fp_task_path = os.path.join(work_path, fp_task_name) create_path(fp_task_path) fp_tasks.append(fp_task_path) + if charges_map: + charges_recorder.append(charges_map[int(ss)]) cwd = os.getcwd() os.chdir(fp_task_path) if cluster_cutoff is None: @@ -1524,13 +1542,16 @@ def _make_fp_vasp_inner (modd_path, dlog.info("system {0:s} skipped {1:6d} confs with bad cluster, {2:6d} remains".format(ss, count_bad_cluster, numb_task - count_bad_cluster)) if cluster_cutoff is None: cwd = os.getcwd() - for ii in fp_tasks: - os.chdir(ii) + for idx, task in enumerate(fp_tasks): + os.chdir(task) if model_devi_engine == "lammps": dump_to_poscar('conf.dump', 'POSCAR', type_map, fmt = "lammps/dump") elif model_devi_engine == "gromacs": # dump_to_poscar('conf.dump', 'POSCAR', type_map, fmt = "gromacs/gro") - dump_to_deepmd_raw('conf.dump', 'deepmd.raw', type_map, fmt = 'gromacs/gro') + if charges_map: + dump_to_deepmd_raw('conf.dump', 'deepmd.raw', type_map, fmt='gromacs/gro', charge=charges_recorder[idx]) + else: + dump_to_deepmd_raw('conf.dump', 'deepmd.raw', type_map, fmt='gromacs/gro', charge=None) else: raise RuntimeError("unknown model_devi engine", model_devi_engine) os.chdir(cwd) @@ -1956,6 +1977,8 @@ def make_fp_gaussian(iter_index, sys_data = dpdata.System('POSCAR').data elif model_devi_engine == "gromacs": sys_data = dpdata.System("deepmd.raw", fmt='deepmd/raw').data + if os.path.isfile('deepmd.raw/charge'): + sys_data['charge'] = int(np.loadtxt('deepmd.raw/charge', dtype=int)) ret = make_gaussian_input(sys_data, fp_params) with open('input', 'w') as fp: fp.write(ret) From ea39197e30cff53fd81e07bb0b56ae408cd9ba66 Mon Sep 17 00:00:00 2001 From: Ericwang6 Date: Sat, 31 Jul 2021 14:44:51 +0800 Subject: [PATCH 06/23] Add warning for 'sys_charges' use in lammps --- dpgen/generator/run.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dpgen/generator/run.py b/dpgen/generator/run.py index eadc3a371..d3a508780 100644 --- a/dpgen/generator/run.py +++ b/dpgen/generator/run.py @@ -1546,6 +1546,8 @@ def _make_fp_vasp_inner (modd_path, os.chdir(task) if model_devi_engine == "lammps": dump_to_poscar('conf.dump', 'POSCAR', type_map, fmt = "lammps/dump") + if charges_map: + warnings.warn('"sys_charges" keyword only support for gromacs engine now.') elif model_devi_engine == "gromacs": # dump_to_poscar('conf.dump', 'POSCAR', type_map, fmt = "gromacs/gro") if charges_map: From 21512d38dd343a66aedf6c06020df85555882ddd Mon Sep 17 00:00:00 2001 From: Yingze Wang Date: Sat, 31 Jul 2021 22:41:23 +0800 Subject: [PATCH 07/23] fix bug with 1-dim model_devi_activation_func --- dpgen/generator/run.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dpgen/generator/run.py b/dpgen/generator/run.py index ceaaed9b2..dafc99826 100644 --- a/dpgen/generator/run.py +++ b/dpgen/generator/run.py @@ -395,7 +395,7 @@ def make_train (iter_index, jinput['model']['fitting_net']['activation_function'] = model_devi_activation_func[ii][1] if len(np.array(model_devi_activation_func).shape) == 1 : # for backward compatibility, 1-dim list, not net-resolved jinput['model']['descriptor']['activation_function'] = model_devi_activation_func[ii] - jinput['model']['descriptor']['activation_function'] = model_devi_activation_func[ii] + jinput['model']['fitting_net']['activation_function'] = model_devi_activation_func[ii] # dump the input.json with open(os.path.join(task_path, train_input_file), 'w') as outfile: json.dump(jinput, outfile, indent = 4) From a14799aaba553cde619da383b049980c25b0d09e Mon Sep 17 00:00:00 2001 From: Yingze Wang Date: Sat, 31 Jul 2021 22:50:01 +0800 Subject: [PATCH 08/23] fix bug with 1-dim model_devi_activation_func (#484) --- dpgen/generator/run.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dpgen/generator/run.py b/dpgen/generator/run.py index ceaaed9b2..dafc99826 100644 --- a/dpgen/generator/run.py +++ b/dpgen/generator/run.py @@ -395,7 +395,7 @@ def make_train (iter_index, jinput['model']['fitting_net']['activation_function'] = model_devi_activation_func[ii][1] if len(np.array(model_devi_activation_func).shape) == 1 : # for backward compatibility, 1-dim list, not net-resolved jinput['model']['descriptor']['activation_function'] = model_devi_activation_func[ii] - jinput['model']['descriptor']['activation_function'] = model_devi_activation_func[ii] + jinput['model']['fitting_net']['activation_function'] = model_devi_activation_func[ii] # dump the input.json with open(os.path.join(task_path, train_input_file), 'w') as outfile: json.dump(jinput, outfile, indent = 4) From ddf39f174a1784ab2a41db225ecbc3b867ae5f99 Mon Sep 17 00:00:00 2001 From: AnguseZhang <529133328@qq.com> Date: Sat, 7 Aug 2021 09:51:49 +0800 Subject: [PATCH 09/23] Allow user defined forward_common_files and backward_files (#482) * Add Cu's init_surf examples. * Delete personal information * Update examples/init/cu.surf.hcp.111.json Co-authored-by: Jinzhe Zeng * Add fp.log in backward_files * Add original methane INCAR * Refactor decide_machine; Allow user_defined input and output; * Debug user_defined_forward_common_files * Fix bug in user_forward_common_files * Allow define input and output files * Fix bugs * Fix bugs * Fix bug * Fix bug * Fix bug * Add documentation * Fix bug * Add file for unittest * Fix backward_files and examples. * Fix user_forward_files * Change comments; add support for init; fix cvasp problems * Fix bugs in cvasp of init_bulk * Fix bug in user_forward_files in init_bulk * Fix bug in init's user_forward_files * Fix cvasp bugs in init_bulk * Fix bugs in cvasp of init_bulk * Fix bug in INCAR_md, fixed unit test * Update context.py * Update context.py * Update test_convert_mdata.py * Update context.py * Update test_convert_mdata.py Co-authored-by: Han Wang Co-authored-by: Jinzhe Zeng --- README.md | 20 +- doc/CONTRIBUTING.md | 10 + dpgen/auto_test/common_equi.py | 7 +- dpgen/auto_test/common_prop.py | 7 +- dpgen/auto_test/lib/util.py | 4 +- dpgen/data/gen.py | 58 +- dpgen/data/surf.py | 4 +- dpgen/generator/ch4/machine.json | 2 +- dpgen/generator/lib/utils.py | 36 +- dpgen/generator/run.py | 56 +- dpgen/remote/decide_machine.py | 568 ++++++++++-------- dpgen/simplify/simplify.py | 8 +- .../machine-ali-ehpc.json | 12 +- examples/init/INCAR_methane.md | 30 +- .../machine/DeePMD-kit-1.x/machine-local.json | 2 +- .../DeePMD-kit-1.x/machine-pbs-gaussian.json | 2 +- .../DeePMD-kit-0.12/machine-aws.json | 2 +- .../DeePMD-kit-0.12/machine-local.json | 2 +- examples/machine/deprecated/machine-hnu.json | 2 +- .../deprecated/machine-tiger-pwscf-della.json | 2 +- .../deprecated/machine-tiger-vasp-della.json | 2 +- .../machine/deprecated/machine-tiger.json | 2 +- .../machine/deprecated/machine-ucloud.json | 2 +- tests/generator/machine-local-v1.json | 2 +- tests/generator/machine-local.json | 4 +- tests/generator/test_make_fp.py | 13 +- tests/generator/vdw_kernel.bindat | 0 tests/tools/context.py | 2 + tests/tools/machine_fp_single.json | 15 + tests/tools/test_convert_mdata.py | 17 + 30 files changed, 539 insertions(+), 354 deletions(-) create mode 100644 doc/CONTRIBUTING.md create mode 100644 tests/generator/vdw_kernel.bindat create mode 100644 tests/tools/machine_fp_single.json create mode 100644 tests/tools/test_convert_mdata.py diff --git a/README.md b/README.md index 8462ccaf2..a7aa26bac 100644 --- a/README.md +++ b/README.md @@ -25,7 +25,7 @@ [![conda install](https://img.shields.io/conda/dn/conda-forge/dpgen?label=conda%20install)](https://anaconda.org/conda-forge/dpgen) [![pip install](https://img.shields.io/pypi/dm/dpgen?label=pip%20install)](https://pypi.org/project/dpgen) -DP-GEN (Deep Generator) is a software written in Python, delicately designed to generate a deep learning based model of interatomic potential energy and force field. DP-GEN is depedent on DeepMD-kit (https://github.com/deepmodeling/deepmd-kit/blob/master/README.md). With highly scalable interface with common softwares for molecular simulation, DP-GEN is capable to automatically prepare scripts and maintain job queues on HPC machines (High Performance Cluster) and analyze results. +DP-GEN (Deep Generator) is a software written in Python, delicately designed to generate a deep learning based model of interatomic potential energy and force field. DP-GEN is dependent on [DeepMD-kit](https://github.com/deepmodeling/deepmd-kit/blob/master/README.md). With highly scalable interface with common softwares for molecular simulation, DP-GEN is capable to automatically prepare scripts and maintain job queues on HPC machines (High Performance Cluster) and analyze results. If you use this software in any publication, please cite: @@ -34,7 +34,7 @@ Yuzhi Zhang, Haidi Wang, Weijie Chen, Jinzhe Zeng, Linfeng Zhang, Han Wang, and ### Highlighted features + **Accurate and efficient**: DP-GEN is capable to sample more than tens of million structures and select only a few for first principles calculation. DP-GEN will finally obtain a uniformly accurate model. + **User-friendly and automatic**: Users may install and run DP-GEN easily. Once succusefully running, DP-GEN can dispatch and handle all jobs on HPCs, and thus there's no need for any personal effort. -+ **Highly scalable**: With modularized code structures, users and developers can easily extend DP-GEN for their most relevant needs. DP-GEN currently supports for HPC systems (Slurm, PBS, LSF and cloud machines ), Deep Potential interface with DeePMD-kit, MD interface with LAMMPS and *ab-initio* calculation interface with VASP, PWSCF,SIESTA and Gaussian. We're sincerely welcome and embraced to users' contributions, with more possibilities and cases to use DP-GEN. ++ **Highly scalable**: With modularized code structures, users and developers can easily extend DP-GEN for their most relevant needs. DP-GEN currently supports for HPC systems (Slurm, PBS, LSF and cloud machines ), Deep Potential interface with DeePMD-kit, MD interface with [LAMMPS](https://www.lammps.org/), [Gromacs](http://www.gromacs.org/) and *ab-initio* calculation interface with VASP, PWSCF, CP2K, SIESTA and Gaussian, Abacus, PWMAT, etc . We're sincerely welcome and embraced to users' contributions, with more possibilities and cases to use DP-GEN. ### Code structure and interface + dpgen: @@ -43,7 +43,8 @@ Yuzhi Zhang, Haidi Wang, Weijie Chen, Jinzhe Zeng, Linfeng Zhang, Han Wang, and * generator: source codes for main process of deep generator. * auto_test : source code for undertaking materials property analysis. - * remote : source code for automatically submiting scripts,maintaining job queues and collecting results. + * remote and dispatcher : source code for automatically submiting scripts,maintaining job queues and collecting results. + Notice this part hase been integrated into [dpdispatcher](https://github.com/deepmodeling/dpdispatcher) * database : source code for collecting data generated by DP-GEN and interface with database. + examples : providing example JSON files. @@ -63,6 +64,15 @@ Options for TASK: * `test`: Auto-test for Deep Potential. * `db`: Collecting data from DP-GEN. + +[Here](examples) are examples you can refer to. You should make sure that provide a correct [JSON](https://docs.python.org/3/library/json.html) file. +You can use following command to check your JSON file. +```python +import json +#Specify machine parameters in machine.json +json.load(open("machine.json")) +``` + ## Download and Install One can download the source code of dpgen by ```bash @@ -1322,7 +1332,9 @@ mem_limit | Interger | 16 | Maximal memory permitted to apply for the job. | # End of resources | command | String | "lmp_serial" | Executable path of software, such as `lmp_serial`, `lmp_mpi` and `vasp_gpu`, `vasp_std`, etc. | group_size | Integer | 5 | DP-GEN will put these jobs together in one submitting script. - +| user_forward_files | List of str | ["/path_to/vdw_kernel.bindat"] | These files will be uploaded in each calculation task. You should make sure provide the path exists. +| user_backward_files | List of str | ["HILLS"] | Besides DP-GEN's normal output, these files will be downloaded after each calculation. You should make sure these files can be generated. + ## Troubleshooting 1. The most common problem is whether two settings correspond with each other, including: - The order of elements in `type_map` and `mass_map` and **`fp_pp_files`**. diff --git a/doc/CONTRIBUTING.md b/doc/CONTRIBUTING.md new file mode 100644 index 000000000..31a8996a1 --- /dev/null +++ b/doc/CONTRIBUTING.md @@ -0,0 +1,10 @@ +# DP-GEN Contributing Guide +Welcome to [DP-GEN](https://github.com/deepmodeling/dpgen/tree/master/dpgen) ! + + +## How to contribute +DP-GEN adopts the same convention as other softwares in DeepModeling Community. +You can first refer to DeePMD-kit's +[Contributing guide](https://github.com/deepmodeling/deepmd-kit/edit/devel/CONTRIBUTING.md) +and [Developer guide](https://github.com/deepmodeling/deepmd-kit/edit/devel/doc/development/index.md). + diff --git a/dpgen/auto_test/common_equi.py b/dpgen/auto_test/common_equi.py index 9dcb83e03..103e16dcc 100644 --- a/dpgen/auto_test/common_equi.py +++ b/dpgen/auto_test/common_equi.py @@ -9,10 +9,9 @@ from dpgen.auto_test.calculator import make_calculator from dpgen.auto_test.mpdb import get_structure from dpgen.dispatcher.Dispatcher import make_dispatcher -from dpgen.remote.decide_machine import decide_fp_machine, decide_model_devi_machine from distutils.version import LooseVersion from dpgen.dispatcher.Dispatcher import make_submission - +from dpgen.remote.decide_machine import convert_mdata lammps_task_type = ['deepmd', 'meam', 'eam_fs', 'eam_alloy'] @@ -133,9 +132,9 @@ def run_equi(confs, inter_type = inter_param['type'] # vasp if inter_type == "vasp": - mdata = decide_fp_machine(mdata) + mdata = convert_mdata(mdata, ["fp"]) elif inter_type in lammps_task_type: - mdata = decide_model_devi_machine(mdata) + mdata = convert_mdata(mdata, ["model_devi"]) else: raise RuntimeError("unknown task %s, something wrong" % inter_type) diff --git a/dpgen/auto_test/common_prop.py b/dpgen/auto_test/common_prop.py index bbd7203e2..00f439d37 100644 --- a/dpgen/auto_test/common_prop.py +++ b/dpgen/auto_test/common_prop.py @@ -13,9 +13,8 @@ from dpgen.auto_test.Vacancy import Vacancy from dpgen.auto_test.calculator import make_calculator from dpgen.dispatcher.Dispatcher import make_dispatcher -from dpgen.remote.decide_machine import decide_fp_machine, decide_model_devi_machine from dpgen.dispatcher.Dispatcher import make_submission - +from dpgen.remote.decide_machine import convert_mdata lammps_task_type = ['deepmd', 'meam', 'eam_fs', 'eam_alloy'] @@ -150,9 +149,9 @@ def run_property(confs, inter_type = inter_param_prop['type'] # vasp if inter_type == "vasp": - mdata = decide_fp_machine(mdata) + mdata = convert_mdata(mdata, ["fp"]) elif inter_type in lammps_task_type: - mdata = decide_model_devi_machine(mdata) + mdata = convert_mdata(mdata, ["model_devi"]) else: raise RuntimeError("unknown task %s, something wrong" % inter_type) diff --git a/dpgen/auto_test/lib/util.py b/dpgen/auto_test/lib/util.py index 0a86287fd..32709da28 100644 --- a/dpgen/auto_test/lib/util.py +++ b/dpgen/auto_test/lib/util.py @@ -77,11 +77,11 @@ def get_machine_info(mdata,task_type): command = vasp_exec command = cmd_append_log(command, "log") elif task_type in lammps_task_type: - lmp_exec = mdata['lmp_command'] + model_devi_exec = mdata['model_devi_command'] group_size = mdata['model_devi_group_size'] resources = mdata['model_devi_resources'] machine=mdata['model_devi_machine'] - command = lmp_exec + " -i in.lammps" + command = model_devi_exec + " -i in.lammps" command = cmd_append_log(command, "model_devi.log") return machine, resources, command, group_size diff --git a/dpgen/data/gen.py b/dpgen/data/gen.py index 10d220d61..25c610c61 100644 --- a/dpgen/data/gen.py +++ b/dpgen/data/gen.py @@ -22,14 +22,16 @@ import dpgen.data.tools.sc as sc from distutils.version import LooseVersion from dpgen.generator.lib.vasp import incar_upper +from dpgen.generator.lib.utils import symlink_user_forward_files from pymatgen.core import Structure from pymatgen.io.vasp import Incar -from dpgen.remote.decide_machine import decide_fp_machine +from dpgen.remote.decide_machine import convert_mdata from dpgen import ROOT_PATH from dpgen.dispatcher.Dispatcher import Dispatcher, make_dispatcher, make_submission + def create_path (path,back=False) : if path[-1] != "/": path += '/' @@ -311,12 +313,7 @@ def make_vasp_relax (jdata, mdata) : os.remove(os.path.join(work_dir, 'POTCAR')) shutil.copy2( jdata['relax_incar'], os.path.join(work_dir, 'INCAR')) - is_cvasp = False - if 'cvasp' in mdata['fp_resources'].keys(): - is_cvasp = mdata['fp_resources']['cvasp'] - if is_cvasp: - cvasp_file=os.path.join(ROOT_PATH,'generator/lib/cvasp.py') - shutil.copyfile(cvasp_file, os.path.join(work_dir, 'cvasp.py')) + out_potcar = os.path.join(work_dir, 'POTCAR') with open(out_potcar, 'w') as outfile: for fname in potcars: @@ -338,8 +335,17 @@ def make_vasp_relax (jdata, mdata) : os.symlink(ln_src, 'POTCAR') except FileExistsError: pass + is_cvasp = False + if 'cvasp' in mdata['fp_resources'].keys(): + is_cvasp = mdata['fp_resources']['cvasp'] + if is_cvasp: + cvasp_file = os.path.join(ROOT_PATH, 'generator/lib/cvasp.py') + shutil.copyfile(cvasp_file, 'cvasp.py') os.chdir(work_dir) os.chdir(cwd) + symlink_user_forward_files(mdata=mdata, task_type="fp", + work_path=os.path.join(os.path.basename(out_dir),global_dirname_02), + task_format= {"fp" : "sys-*"}) def make_scale(jdata): out_dir = jdata['out_dir'] @@ -373,6 +379,7 @@ def make_scale(jdata): os.chdir(scale_path) poscar_scale(pos_src, 'POSCAR', jj) os.chdir(cwd) + def pert_scaled(jdata) : out_dir = jdata['out_dir'] @@ -425,7 +432,7 @@ def pert_scaled(jdata) : shutil.copy2(pos_in, pos_out) os.chdir(cwd) -def make_vasp_md(jdata) : +def make_vasp_md(jdata, mdata) : out_dir = jdata['out_dir'] potcars = jdata['potcars'] scale = jdata['scale'] @@ -451,7 +458,9 @@ def make_vasp_md(jdata) : with open(fname) as infile: outfile.write(infile.read()) os.chdir(path_md) - os.chdir(cwd) + os.chdir(cwd) + + for ii in sys_ps : for jj in scale : @@ -478,8 +487,20 @@ def make_vasp_md(jdata) : os.symlink(os.path.relpath(file_potcar), 'POTCAR') except FileExistsError: pass + + is_cvasp = False + if 'cvasp' in mdata['fp_resources'].keys(): + is_cvasp = mdata['fp_resources']['cvasp'] + if is_cvasp: + cvasp_file = os.path.join(ROOT_PATH, 'generator/lib/cvasp.py') + shutil.copyfile(cvasp_file, 'cvasp.py') - os.chdir(cwd) + os.chdir(cwd) + + symlink_user_forward_files(mdata=mdata, task_type="fp", + work_path=os.path.join(os.path.basename(out_dir),global_dirname_04), + task_format= {"fp" :"sys-*/scale*/00*"}) + def coll_vasp_md(jdata) : out_dir = jdata['out_dir'] @@ -565,11 +586,14 @@ def run_vasp_relax(jdata, mdata): work_dir = os.path.join(jdata['out_dir'], global_dirname_02) forward_files = ["POSCAR", "INCAR", "POTCAR"] + user_forward_files = mdata.get("fp" + "_user_forward_files", []) + forward_files += [os.path.basename(file) for file in user_forward_files] backward_files = ["OUTCAR","CONTCAR"] + backward_files += mdata.get("fp" + "_user_backward_files", []) forward_common_files = [] if 'cvasp' in mdata['fp_resources']: if mdata['fp_resources']['cvasp']: - forward_common_files=['cvasp.py'] + forward_files +=['cvasp.py'] relax_tasks = glob.glob(os.path.join(work_dir, "sys-*")) relax_tasks.sort() #dlog.info("work_dir",work_dir) @@ -624,11 +648,14 @@ def run_vasp_md(jdata, mdata): md_nstep = jdata['md_nstep'] forward_files = ["POSCAR", "INCAR", "POTCAR"] + user_forward_files = mdata.get("fp" + "_user_forward_files", []) + forward_files += [os.path.basename(file) for file in user_forward_files] backward_files = ["OUTCAR"] + backward_files += mdata.get("fp" + "_user_backward_files", []) forward_common_files = [] if 'cvasp' in mdata['fp_resources']: if mdata['fp_resources']['cvasp']: - forward_common_files=['cvasp.py'] + forward_files +=['cvasp.py'] path_md = work_dir path_md = os.path.abspath(path_md) @@ -694,7 +721,7 @@ def gen_init_bulk(args) : if args.MACHINE is not None: # Selecting a proper machine - mdata = decide_fp_machine(mdata) + mdata = convert_mdata(mdata, ["fp"]) #disp = make_dispatcher(mdata["fp_machine"]) # Decide work path @@ -757,9 +784,12 @@ def gen_init_bulk(args) : pert_scaled(jdata) elif stage == 3 : dlog.info("Current stage is 3, run a short md") - make_vasp_md(jdata) if args.MACHINE is not None: + make_vasp_md(jdata, mdata) run_vasp_md(jdata, mdata) + else: + make_vasp_md(jdata, {"fp_resources":{}}) + elif stage == 4 : dlog.info("Current stage is 4, collect data") coll_vasp_md(jdata) diff --git a/dpgen/data/surf.py b/dpgen/data/surf.py index 322d26ad4..13420e118 100644 --- a/dpgen/data/surf.py +++ b/dpgen/data/surf.py @@ -11,7 +11,7 @@ import dpgen.data.tools.bcc as bcc from dpgen import dlog from dpgen import ROOT_PATH -from dpgen.remote.decide_machine import decide_fp_machine +from dpgen.remote.decide_machine import convert_mdata from dpgen.dispatcher.Dispatcher import Dispatcher, make_dispatcher #-----PMG--------- from pymatgen.io.vasp import Poscar @@ -596,7 +596,7 @@ def gen_init_surf(args): if args.MACHINE is not None: # Decide a proper machine - mdata = decide_fp_machine(mdata) + mdata = convert_mdata(mdata, ["fp"]) # disp = make_dispatcher(mdata["fp_machine"]) #stage = args.STAGE diff --git a/dpgen/generator/ch4/machine.json b/dpgen/generator/ch4/machine.json index bff646bcd..653f613d6 100644 --- a/dpgen/generator/ch4/machine.json +++ b/dpgen/generator/ch4/machine.json @@ -21,7 +21,7 @@ "_comment": "that's all" }, - "lmp_command": "/sharedext4/softwares/lammps/bin/lmp_serial", + "model_devi_command": "/sharedext4/softwares/lammps/bin/lmp_serial", "model_devi_group_size": 1, "_comment": "model_devi on localhost", "model_devi_machine": { diff --git a/dpgen/generator/lib/utils.py b/dpgen/generator/lib/utils.py index af7a71bf6..772d379ce 100644 --- a/dpgen/generator/lib/utils.py +++ b/dpgen/generator/lib/utils.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 import os, re, shutil, logging +import glob iter_format = "%06d" task_format = "%02d" @@ -60,4 +61,37 @@ def log_task (message) : def record_iter (record, ii, jj) : with open (record, "a") as frec : - frec.write ("%d %d\n" % (ii, jj)) + frec.write ("%d %d\n" % (ii, jj)) + +def symlink_user_forward_files(mdata, task_type, work_path, task_format = None): + ''' + Symlink user-defined forward_common_files + Current path should be work_path, such as 00.train + + Parameters + --------- + mdata : dict + machine parameters + task_type: str + task_type, such as "train" + work_path : str + work_path, such as "iter.000001/00.train" + Returns + ------- + None + ''' + user_forward_files = mdata.get(task_type + "_" + "user_forward_files", []) + #Angus: In the future, we may unify the task format. + if task_format is None: + task_format = {"train" : "0*", "model_devi" : "task.*", "fp": "task.*"} + #"init_relax" : "sys-*", "init_md" : "sys-*/scale*/00*" + for file in user_forward_files: + assert os.path.isfile(file) ,\ + "user_forward_file %s of %s stage doesn't exist. " % (file, task_type) + tasks = glob.glob(os.path.join(work_path, task_format[task_type])) + for task in tasks: + if os.path.isfile(os.path.join(task, os.path.basename(file))): + os.remove(os.path.join(task, os.path.basename(file))) + os.symlink(file, os.path.join(task, os.path.basename(file))) + return + \ No newline at end of file diff --git a/dpgen/generator/run.py b/dpgen/generator/run.py index dafc99826..7958e492e 100644 --- a/dpgen/generator/run.py +++ b/dpgen/generator/run.py @@ -38,6 +38,7 @@ from dpgen.generator.lib.utils import log_iter from dpgen.generator.lib.utils import record_iter from dpgen.generator.lib.utils import log_task +from dpgen.generator.lib.utils import symlink_user_forward_files from dpgen.generator.lib.lammps import make_lammps_input from dpgen.generator.lib.vasp import write_incar_dict from dpgen.generator.lib.vasp import make_vasp_incar_user_dict @@ -53,11 +54,7 @@ from dpgen.generator.lib.gaussian import make_gaussian_input, take_cluster from dpgen.generator.lib.cp2k import make_cp2k_input, make_cp2k_input_from_external, make_cp2k_xyz from dpgen.generator.lib.ele_temp import NBandsEsti -from dpgen.remote.RemoteJob import SSHSession, JobStatus, SlurmJob, PBSJob, LSFJob, CloudMachineJob, awsMachineJob -from dpgen.remote.group_jobs import ucloud_submit_jobs, aws_submit_jobs -from dpgen.remote.group_jobs import group_slurm_jobs -from dpgen.remote.group_jobs import group_local_jobs -from dpgen.remote.decide_machine import decide_train_machine, decide_fp_machine, decide_model_devi_machine +from dpgen.remote.decide_machine import convert_mdata from dpgen.dispatcher.Dispatcher import Dispatcher, _split_tasks, make_dispatcher, make_submission from dpgen.util import sepline from dpgen import ROOT_PATH @@ -345,7 +342,7 @@ def make_train (iter_index, else: raise RuntimeError('invalid setting for use_ele_temp ' + str(use_ele_temp)) else: - raise RuntimeError("DP-GEN currently only supports for DeePMD-kit 1.x version!" ) + raise RuntimeError("DP-GEN currently only supports for DeePMD-kit 1.x or 2.x version!" ) # set training reuse model if training_reuse_iter is not None and iter_index >= training_reuse_iter: if LooseVersion('1') <= LooseVersion(mdata["deepmd_version"]) < LooseVersion('2'): @@ -384,7 +381,7 @@ def make_train (iter_index, jinput['model']['fitting_net']['seed'] = random.randrange(sys.maxsize) % (2**32) jinput['training']['seed'] = random.randrange(sys.maxsize) % (2**32) else: - raise RuntimeError("DP-GEN currently only supports for DeePMD-kit 1.x version!" ) + raise RuntimeError("DP-GEN currently only supports for DeePMD-kit 1.x or 2.x version!" ) # set model activation function if model_devi_activation_func is not None: if LooseVersion(mdata["deepmd_version"]) < LooseVersion('1'): @@ -422,6 +419,9 @@ def make_train (iter_index, for ii in range(len(iter0_models)): old_model_files = glob.glob(os.path.join(iter0_models[ii], 'model.ckpt*')) _link_old_models(work_path, old_model_files, ii) + # Copy user defined forward files + symlink_user_forward_files(mdata=mdata, task_type="train", work_path=work_path) + def _link_old_models(work_path, old_model_files, ii): @@ -502,7 +502,7 @@ def run_train (iter_index, command = '%s freeze' % train_command commands.append(command) else: - raise RuntimeError("DP-GEN currently only supports for DeePMD-kit 1.x version!" ) + raise RuntimeError("DP-GEN currently only supports for DeePMD-kit 1.x or 2.x version!" ) #_tasks = [os.path.basename(ii) for ii in all_task] # run_tasks = [] @@ -559,8 +559,10 @@ def run_train (iter_index, train_group_size = 1 api_version = mdata.get('api_version', '0.9') - # print('debug:commands', commands) - + + user_forward_files = mdata.get("train" + "_user_forward_files", []) + forward_files += [os.path.basename(file) for file in user_forward_files] + backward_files += mdata.get("train" + "_user_backward_files", []) if LooseVersion(api_version) < LooseVersion('1.0'): warnings.warn(f"the dpdispatcher will be updated to new version." f"And the interface may be changed. Please check the documents for more details") @@ -836,7 +838,8 @@ def make_model_devi (iter_index, _make_model_devi_revmat(iter_index, jdata, mdata, conf_systems) else: raise RuntimeError('unknown model_devi input mode', input_mode) - + #Copy user defined forward_files + symlink_user_forward_files(mdata=mdata, task_type="model_devi", work_path=work_path) return True @@ -1159,10 +1162,7 @@ def run_model_devi (iter_index, jdata, mdata) : #rmdlog.info("This module has been run !") - lmp_exec = mdata['lmp_command'] - # Angus: lmp_exec name should be changed to model_devi_exec. - # We should also change make_dispatcher - # For now, I will use this name for gromacs command + model_devi_exec = mdata['model_devi_command'] model_devi_group_size = mdata['model_devi_group_size'] model_devi_resources = mdata['model_devi_resources'] @@ -1196,7 +1196,7 @@ def run_model_devi (iter_index, model_devi_engine = jdata.get("model_devi_engine", "lammps") if model_devi_engine == "lammps": - command = "{ if [ ! -f dpgen.restart.10000 ]; then %s -i input.lammps -v restart 0; else %s -i input.lammps -v restart 1; fi }" % (lmp_exec, lmp_exec) + command = "{ if [ ! -f dpgen.restart.10000 ]; then %s -i input.lammps -v restart 0; else %s -i input.lammps -v restart 1; fi }" % (model_devi_exec, model_devi_exec) command = "/bin/sh -c '%s'" % command commands = [command] forward_files = ['conf.lmp', 'input.lammps', 'traj'] @@ -1217,8 +1217,8 @@ def run_model_devi (iter_index, maxwarn = gromacs_settings.get("maxwarn", 1) nsteps = cur_job["nsteps"] - command = "%s grompp -f %s -p %s -c %s -o %s -maxwarn %d" % (lmp_exec, mdp_filename, topol_filename, conf_filename, deffnm, maxwarn) - command += "&& %s mdrun -deffnm %s -nsteps %d" %(lmp_exec, deffnm, nsteps) + command = "%s grompp -f %s -p %s -c %s -o %s -maxwarn %d" % (model_devi_exec, mdp_filename, topol_filename, conf_filename, deffnm, maxwarn) + command += "&& %s mdrun -deffnm %s -nsteps %d" %(model_devi_exec, deffnm, nsteps) commands = [command] forward_files = [mdp_filename, topol_filename, conf_filename, index_filename, "input.json" ] @@ -1227,6 +1227,9 @@ def run_model_devi (iter_index, cwd = os.getcwd() + user_forward_files = mdata.get("model_devi" + "_user_forward_files", []) + forward_files += [os.path.basename(file) for file in user_forward_files] + backward_files += mdata.get("model_devi" + "_user_backward_files", []) api_version = mdata.get('api_version', '0.9') if LooseVersion(api_version) < LooseVersion('1.0'): warnings.warn(f"the dpdispatcher will be updated to new version." @@ -2015,6 +2018,10 @@ def make_fp (iter_index, make_fp_pwmat(iter_index, jdata) else : raise RuntimeError ("unsupported fp style") + # Copy user defined forward_files + iter_name = make_iter_name(iter_index) + work_path = os.path.join(iter_name, fp_name) + symlink_user_forward_files(mdata=mdata, task_type="fp", work_path=work_path) def _vasp_check_fin (ii) : if os.path.isfile(os.path.join(ii, 'OUTCAR')) : @@ -2120,6 +2127,10 @@ def run_fp_inner (iter_index, # fp_run_tasks.append(ii) run_tasks = [os.path.basename(ii) for ii in fp_run_tasks] + user_forward_files = mdata.get("fp" + "_user_forward_files", []) + forward_files += [os.path.basename(file) for file in user_forward_files] + backward_files += mdata.get("fp" + "_user_backward_files", []) + api_version = mdata.get('api_version', '0.9') if LooseVersion(api_version) < LooseVersion('1.0'): warnings.warn(f"the dpdispatcher will be updated to new version." @@ -2158,10 +2169,9 @@ def run_fp (iter_index, mdata) : fp_style = jdata['fp_style'] fp_pp_files = jdata['fp_pp_files'] - if fp_style == "vasp" : forward_files = ['POSCAR', 'INCAR', 'POTCAR','KPOINTS'] - backward_files = ['OUTCAR','vasprun.xml'] + backward_files = ['fp.log','OUTCAR','vasprun.xml'] # Move cvasp interface to jdata if ('cvasp' in jdata) and (jdata['cvasp'] == True): mdata['fp_resources']['cvasp'] = True @@ -2646,7 +2656,8 @@ def run_iter (param_file, machine_file) : listener = logging.handlers.QueueListener(que, smtp_handler) dlog.addHandler(queue_handler) listener.start() - + # Convert mdata + mdata = convert_mdata(mdata) max_tasks = 10000 numb_task = 9 record = "record.dpgen" @@ -2673,7 +2684,6 @@ def run_iter (param_file, machine_file) : make_train (ii, jdata, mdata) elif jj == 1 : log_iter ("run_train", ii, jj) - mdata = decide_train_machine(mdata) run_train (ii, jdata, mdata) elif jj == 2 : log_iter ("post_train", ii, jj) @@ -2685,7 +2695,6 @@ def run_iter (param_file, machine_file) : break elif jj == 4 : log_iter ("run_model_devi", ii, jj) - mdata = decide_model_devi_machine(mdata) run_model_devi (ii, jdata, mdata) elif jj == 5 : @@ -2696,7 +2705,6 @@ def run_iter (param_file, machine_file) : make_fp (ii, jdata, mdata) elif jj == 7 : log_iter ("run_fp", ii, jj) - mdata = decide_fp_machine(mdata) run_fp (ii, jdata, mdata) elif jj == 8 : log_iter ("post_fp", ii, jj) diff --git a/dpgen/remote/decide_machine.py b/dpgen/remote/decide_machine.py index cda17853e..5996b45b2 100644 --- a/dpgen/remote/decide_machine.py +++ b/dpgen/remote/decide_machine.py @@ -11,278 +11,312 @@ import numpy as np from distutils.version import LooseVersion -def decide_train_machine(mdata): - if LooseVersion(mdata.get('api_version', '0.9')) >= LooseVersion('1.0'): - mdata['train_group_size'] = mdata['train'][0]['resources']['group_size'] - if 'train' in mdata: - continue_flag = False - if 'record.machine' in os.listdir(): - try: - with open('record.machine', 'r') as _infile: - profile = json.load(_infile) - if profile['purpose'] == 'train': - mdata['train_machine'] = profile['machine'] - mdata['train_resources'] = profile['resources'] - - if 'python_path' in profile: - mdata['python_path'] = profile['python_path'] - if "group_size" in profile: - mdata["train_group_size"] = profile["group_size"] - if 'deepmd_version' in profile: - mdata["deepmd_version"] = profile['deepmd_version'] - if 'command' in profile: - mdata['train_command'] = profile["command"] - continue_flag = True - except: - pass - if ("hostname" not in mdata["train"][0]["machine"]) or (len(mdata["train"]) == 1): - mdata["train_machine"] = mdata["train"][0]["machine"] - mdata["train_resources"] = mdata["train"][0]["resources"] - - if 'python_path' in mdata["train"][0]: - mdata["python_path"] = mdata["train"][0]["python_path"] - if "group_size" in mdata["train"][0]: - mdata["train_group_size"] = mdata["train"][0]["group_size"] - if 'deepmd_version' in mdata["train"][0]: - mdata["deepmd_version"] = mdata["train"][0]["deepmd_version"] - if 'command' in mdata["train"][0]: - mdata["train_command"] = mdata["train"][0]["command"] - continue_flag = True - - pd_flag = False - pd_count_list =[] - # pd for pending job in slurm - # if we need to launch new machine_idxines - if not continue_flag: - #assert isinstance(mdata['train']['machine'], list) - #assert isinstance(mdata['train']['resources'], list) - #assert len(mdata['train']['machine']) == len(mdata['train']['resources']) - # mdata['train'] is a list - for machine_idx in range(len(mdata['train'])): - temp_machine = mdata['train'][machine_idx]['machine'] - temp_resources = mdata['train'][machine_idx]['resources'] - temp_ssh_sess = SSHSession(temp_machine) - cwd = os.getcwd() - temp_context = SSHContext(cwd, temp_ssh_sess) - if temp_machine['machine_type'] == 'lsf': - temp_batch = LSF(temp_context) - else: - temp_batch = Slurm(temp_context) - # For other type of machines, please add them using 'elif'. - # Here slurm is selected as the final choice in convinience. - command = temp_batch._make_squeue(temp_machine, temp_resources) - ret, stdin, stdout, stderr = temp_batch.context.block_call(command) - pd_response = stdout.read().decode('utf-8').split("\n") - pd_count = len(pd_response) - temp_context.clean() - ## If there is no need to waiting for allocation - if pd_count ==1: - mdata['train_machine'] = temp_machine - mdata['train_resources'] = temp_resources +def convert_mdata(mdata, task_types=["train", "model_devi", "fp"]): + ''' + Convert mdata for DP-GEN main process. + New convension is like mdata["fp"]["machine"], + DP-GEN needs mdata["fp_machine"] - if 'python_path' in mdata['train'][machine_idx]: - mdata['python_path'] = mdata['train'][machine_idx]['python_path'] - if 'group_size' in mdata['train'][machine_idx]: - mdata['train_group_size'] = mdata['train'][machine_idx]['group_size'] - if 'deepmd_version' in mdata['train'][machine_idx]: - mdata['deepmd_version'] = mdata['train'][machine_idx]['deepmd_version'] - if 'command' in mdata['train'][machine_idx]: - mdata['train_command'] = mdata['train'][machine_idx]['command'] + Notice that we deprecate the function which can automatically select one most avalaible machine, + since this function was only used by Angus, and only supports for Slurm. + In the future this can be implemented. - ## No need to wait - pd_flag = True - break - else: - pd_count_list.append(pd_count) - if not pd_flag: - ## All machines need waiting, then compare waiting jobs - ## Select a machine which has fewest waiting jobs - min_machine_idx = np.argsort(pd_count_list)[0] - mdata['train_machine'] = mdata['train'][min_machine_idx]['machine'] - mdata['train_resources'] = mdata['train'][min_machine_idx]['resources'] - - if 'python_path' in mdata['train'][min_machine_idx]: - mdata['python_path'] = mdata['train'][min_machine_idx]['python_path'] - if "group_size" in mdata['train'][min_machine_idx]: - mdata["train_group_size"] = mdata['train'][min_machine_idx]["group_size"] - if 'deepmd_version' in mdata['train'][min_machine_idx]: - mdata['deepmd_version'] = mdata['train'][min_machine_idx]["deepmd_version"] - if 'command' in mdata['train'][min_machine_idx]: - mdata['train_command'] = mdata['train'][min_machine_idx]['command'] + Parameters + ---------- + mdata : dict + Machine parameters to be converted. + task_types : list of string + Type of tasks, default is ["train", "model_devi", "fp"] - ## Record which machine is selected - with open("record.machine","w") as _outfile: - profile = {} - profile['purpose'] = 'train' - profile['machine'] = mdata['train_machine'] - profile['resources'] = mdata['train_resources'] - - if 'python_path' in mdata: - profile['python_path'] = mdata['python_path'] - if "train_group_size" in mdata: - profile["group_size"] = mdata["train_group_size"] - if 'deepmd_version' in mdata: - profile['deepmd_version'] = mdata['deepmd_version'] - if 'train_command' in mdata: - profile['command'] = mdata['train_command'] + Returns + ------- + dict + mdata converted + ''' + for task_type in task_types: + if task_type in mdata: + for key, item in mdata[task_type][0].items(): + if "comments" not in key: + mdata[task_type + "_" + key] = item + group_size = mdata[task_type][0]["resources"].get("group_size", 1) + mdata[task_type + "_" + "group_size"] = group_size + return mdata - json.dump(profile, _outfile, indent = 4) - return mdata -def decide_model_devi_machine(mdata): - if LooseVersion(mdata.get('api_version', '0.9')) >= LooseVersion('1.0'): - mdata['model_devi_group_size'] = mdata['model_devi'][0]['resources']['group_size'] - if 'model_devi' in mdata: - continue_flag = False - if 'record.machine' in os.listdir(): - try: - with open('record.machine', 'r') as _infile: - profile = json.load(_infile) - if profile['purpose'] == 'model_devi': - mdata['model_devi_machine'] = profile['machine'] - mdata['model_devi_resources'] = profile['resources'] - mdata['lmp_command'] = profile['command'] - mdata['model_devi_group_size'] = profile['group_size'] - continue_flag = True - except: - pass - if ("hostname" not in mdata["model_devi"][0]["machine"]) or (len(mdata["model_devi"]) == 1): - mdata["model_devi_machine"] = mdata["model_devi"][0]["machine"] - mdata["model_devi_resources"] = mdata["model_devi"][0]["resources"] - mdata["lmp_command"] = mdata["model_devi"][0]["command"] - #if "group_size" in mdata["train"][0]: - mdata["model_devi_group_size"] = mdata["model_devi"][0].get("group_size", 1) - continue_flag = True - pd_count_list =[] - pd_flag = False - if not continue_flag: - - #assert isinstance(mdata['model_devi']['machine'], list) - #ssert isinstance(mdata['model_devi']['resources'], list) - #assert len(mdata['model_devi']['machine']) == len(mdata['model_devi']['resources']) - - for machine_idx in range(len(mdata['model_devi'])): - temp_machine = mdata['model_devi'][machine_idx]['machine'] - temp_resources = mdata['model_devi'][machine_idx]['resources'] - #assert isinstance(temp_machine, dict), "unsupported type of model_devi machine [%d]!" %machine_idx - #assert isinstance(temp_resources, dict), "unsupported type of model_devi resources [%d]!"%machine_idx - #assert temp_machine['machine_type'] == 'slurm', "Currently only support for Slurm!" - temp_ssh_sess = SSHSession(temp_machine) - cwd = os.getcwd() - temp_context = SSHContext(cwd, temp_ssh_sess) - if temp_machine['machine_type'] == 'lsf': - temp_batch = LSF(temp_context) - else: - temp_batch = Slurm(temp_context) - # For other type of machines, please add them using 'elif'. - # Here slurm is selected as the final choice in convinience. - command = temp_batch._make_squeue(temp_machine, temp_resources) - ret, stdin, stdout, stderr = temp_batch.context.block_call(command) - pd_response = stdout.read().decode('utf-8').split("\n") - pd_count = len(pd_response) - temp_context.clean() - if pd_count ==0: - mdata['model_devi_machine'] = temp_machine - mdata['model_devi_resources'] = temp_resources - mdata['lmp_command'] = mdata['model_devi'][machine_idx]['command'] - mdata['model_devi_group_size'] = mdata['model_devi'][machine_idx].get('group_size', 1) - pd_flag = True - break - else: - pd_count_list.append(pd_count) - if not pd_flag: - min_machine_idx = np.argsort(pd_count_list)[0] - mdata['model_devi_machine'] = mdata['model_devi'][min_machine_idx]['machine'] - mdata['model_devi_resources'] = mdata['model_devi'][min_machine_idx]['resources'] - mdata['lmp_command'] = mdata['model_devi'][min_machine_idx]['command'] - mdata['model_devi_group_size'] = mdata['model_devi'][min_machine_idx].get('group_size', 1) - with open("record.machine","w") as _outfile: - profile = {} - profile['purpose'] = 'model_devi' - profile['machine'] = mdata['model_devi_machine'] - profile['resources'] = mdata['model_devi_resources'] - profile['group_size'] = mdata['model_devi_group_size'] - profile['command'] = mdata['lmp_command'] - - json.dump(profile, _outfile, indent = 4) - return mdata -def decide_fp_machine(mdata): - if LooseVersion(mdata.get('api_version', '0.9')) >= LooseVersion('1.0'): - mdata['fp_group_size'] = mdata['fp'][0]['resources']['group_size'] - if 'fp' in mdata: - #ssert isinstance(mdata['fp']['machine'], list) - #assert isinstance(mdata['fp']['resources'], list) - #assert len(mdata['fp']['machine']) == len(mdata['fp']['resources']) - continue_flag = False - ## decide whether to use an existing machine - if 'record.machine' in os.listdir(): - try: - with open('record.machine', 'r') as _infile: - profile = json.load(_infile) - if profile['purpose'] == 'fp': - mdata['fp_machine'] = profile['machine'] - mdata['fp_resources'] = profile['resources'] - mdata['fp_command'] = profile['command'] - mdata['fp_group_size'] = profile['group_size'] - - continue_flag = True - except: - pass - if ("hostname" not in mdata["fp"][0]["machine"]) or (len(mdata["fp"]) == 1): - mdata["fp_machine"] = mdata["fp"][0]["machine"] - mdata["fp_resources"] = mdata["fp"][0]["resources"] - mdata["fp_command"] = mdata["fp"][0]["command"] - #if "group_size" in mdata["train"][0]: - mdata["fp_group_size"] = mdata["fp"][0].get("group_size", 1) - continue_flag = True - - - pd_count_list =[] - pd_flag = False - if not continue_flag: - for machine_idx in range(len(mdata['fp'])): - temp_machine = mdata['fp'][machine_idx]['machine'] - temp_resources = mdata['fp'][machine_idx]['resources'] - temp_ssh_sess = SSHSession(temp_machine) - cwd = os.getcwd() - temp_context = SSHContext(cwd, temp_ssh_sess) - if temp_machine['machine_type'] == 'lsf': - temp_batch = LSF(temp_context) - else: - temp_batch = Slurm(temp_context) - # For other type of machines, please add them using 'elif'. - # Here slurm is selected as the final choice in convinience. - command = temp_batch._make_squeue(temp_machine, temp_resources) - ret, stdin, stdout, stderr = temp_batch.context.block_call(command) - pd_response = stdout.read().decode('utf-8').split("\n") - pd_count = len(pd_response) - temp_context.clean() - #dlog.info(temp_machine["username"] + " " + temp_machine["hostname"] + " " + str(pd_count)) - if pd_count ==0: - mdata['fp_machine'] = temp_machine - mdata['fp_resources'] = temp_resources - mdata['fp_command'] = mdata['fp'][machine_idx]['command'] - mdata['fp_group_size'] = mdata['fp'][machine_idx].get('group_size', 1) - pd_flag = True - break - else: - pd_count_list.append(pd_count) - if not pd_flag: - min_machine_idx = np.argsort(pd_count_list)[0] - mdata['fp_machine'] = mdata['fp'][min_machine_idx]['machine'] - mdata['fp_resources'] = mdata['fp'][min_machine_idx]['resources'] - mdata['fp_command'] = mdata['fp'][min_machine_idx]['command'] - mdata['fp_group_size'] = mdata['fp'][min_machine_idx].get('group_size',1) - - with open("record.machine","w") as _outfile: - profile = {} - profile['purpose'] = 'fp' - profile['machine'] = mdata['fp_machine'] - profile['resources'] = mdata['fp_resources'] - profile['group_size'] = mdata['fp_group_size'] - profile['command'] = mdata['fp_command'] - json.dump(profile, _outfile, indent = 4) - return mdata +# def decide_train_machine(mdata): +# if LooseVersion(mdata.get('api_version', '0.9')) >= LooseVersion('1.0'): +# mdata['train_group_size'] = mdata['train'][0]['resources']['group_size'] +# if 'train' in mdata: +# continue_flag = False +# if 'record.machine' in os.listdir(): +# try: +# with open('record.machine', 'r') as _infile: +# profile = json.load(_infile) +# if profile['purpose'] == 'train': +# mdata['train_machine'] = profile['machine'] +# mdata['train_resources'] = profile['resources'] +# +# if 'python_path' in profile: +# mdata['python_path'] = profile['python_path'] +# if "group_size" in profile: +# mdata["train_group_size"] = profile["group_size"] +# if 'deepmd_version' in profile: +# mdata["deepmd_version"] = profile['deepmd_version'] +# if 'command' in profile: +# mdata['train_command'] = profile["command"] +# continue_flag = True +# except: +# pass +# if ("hostname" not in mdata["train"][0]["machine"]) or (len(mdata["train"]) == 1): +# mdata["train_machine"] = mdata["train"][0]["machine"] +# mdata["train_resources"] = mdata["train"][0]["resources"] +# +# if 'python_path' in mdata["train"][0]: +# mdata["python_path"] = mdata["train"][0]["python_path"] +# if "group_size" in mdata["train"][0]: +# mdata["train_group_size"] = mdata["train"][0]["group_size"] +# if 'deepmd_version' in mdata["train"][0]: +# mdata["deepmd_version"] = mdata["train"][0]["deepmd_version"] +# if 'command' in mdata["train"][0]: +# mdata["train_command"] = mdata["train"][0]["command"] +# continue_flag = True +# +# pd_flag = False +# pd_count_list =[] +# # pd for pending job in slurm +# # if we need to launch new machine_idxines +# if not continue_flag: +# +# #assert isinstance(mdata['train']['machine'], list) +# #assert isinstance(mdata['train']['resources'], list) +# #assert len(mdata['train']['machine']) == len(mdata['train']['resources']) +# # mdata['train'] is a list +# for machine_idx in range(len(mdata['train'])): +# temp_machine = mdata['train'][machine_idx]['machine'] +# temp_resources = mdata['train'][machine_idx]['resources'] +# temp_ssh_sess = SSHSession(temp_machine) +# cwd = os.getcwd() +# temp_context = SSHContext(cwd, temp_ssh_sess) +# if temp_machine['machine_type'] == 'lsf': +# temp_batch = LSF(temp_context) +# else: +# temp_batch = Slurm(temp_context) +# # For other type of machines, please add them using 'elif'. +# # Here slurm is selected as the final choice in convinience. +# command = temp_batch._make_squeue(temp_machine, temp_resources) +# ret, stdin, stdout, stderr = temp_batch.context.block_call(command) +# pd_response = stdout.read().decode('utf-8').split("\n") +# pd_count = len(pd_response) +# temp_context.clean() +# ## If there is no need to waiting for allocation +# if pd_count ==1: +# mdata['train_machine'] = temp_machine +# mdata['train_resources'] = temp_resources +# +# if 'python_path' in mdata['train'][machine_idx]: +# mdata['python_path'] = mdata['train'][machine_idx]['python_path'] +# if 'group_size' in mdata['train'][machine_idx]: +# mdata['train_group_size'] = mdata['train'][machine_idx]['group_size'] +# if 'deepmd_version' in mdata['train'][machine_idx]: +# mdata['deepmd_version'] = mdata['train'][machine_idx]['deepmd_version'] +# if 'command' in mdata['train'][machine_idx]: +# mdata['train_command'] = mdata['train'][machine_idx]['command'] +# +# ## No need to wait +# pd_flag = True +# break +# else: +# pd_count_list.append(pd_count) +# if not pd_flag: +# ## All machines need waiting, then compare waiting jobs +# ## Select a machine which has fewest waiting jobs +# min_machine_idx = np.argsort(pd_count_list)[0] +# mdata['train_machine'] = mdata['train'][min_machine_idx]['machine'] +# mdata['train_resources'] = mdata['train'][min_machine_idx]['resources'] +# +# if 'python_path' in mdata['train'][min_machine_idx]: +# mdata['python_path'] = mdata['train'][min_machine_idx]['python_path'] +# if "group_size" in mdata['train'][min_machine_idx]: +# mdata["train_group_size"] = mdata['train'][min_machine_idx]["group_size"] +# if 'deepmd_version' in mdata['train'][min_machine_idx]: +# mdata['deepmd_version'] = mdata['train'][min_machine_idx]["deepmd_version"] +# if 'command' in mdata['train'][min_machine_idx]: +# mdata['train_command'] = mdata['train'][min_machine_idx]['command'] +# +# ## Record which machine is selected +# with open("record.machine","w") as _outfile: +# profile = {} +# profile['purpose'] = 'train' +# profile['machine'] = mdata['train_machine'] +# profile['resources'] = mdata['train_resources'] +# +# if 'python_path' in mdata: +# profile['python_path'] = mdata['python_path'] +# if "train_group_size" in mdata: +# profile["group_size"] = mdata["train_group_size"] +# if 'deepmd_version' in mdata: +# profile['deepmd_version'] = mdata['deepmd_version'] +# if 'train_command' in mdata: +# profile['command'] = mdata['train_command'] +# +# json.dump(profile, _outfile, indent = 4) +# return mdata +# +# def decide_model_devi_machine(mdata): +# if LooseVersion(mdata.get('api_version', '0.9')) >= LooseVersion('1.0'): +# mdata['model_devi_group_size'] = mdata['model_devi'][0]['resources']['group_size'] +# if 'model_devi' in mdata: +# continue_flag = False +# if 'record.machine' in os.listdir(): +# try: +# with open('record.machine', 'r') as _infile: +# profile = json.load(_infile) +# if profile['purpose'] == 'model_devi': +# mdata['model_devi_machine'] = profile['machine'] +# mdata['model_devi_resources'] = profile['resources'] +# mdata['model_devi_command'] = profile['command'] +# mdata['model_devi_group_size'] = profile['group_size'] +# continue_flag = True +# except: +# pass +# if ("hostname" not in mdata["model_devi"][0]["machine"]) or (len(mdata["model_devi"]) == 1): +# mdata["model_devi_machine"] = mdata["model_devi"][0]["machine"] +# mdata["model_devi_resources"] = mdata["model_devi"][0]["resources"] +# mdata["model_devi_command"] = mdata["model_devi"][0]["command"] +# #if "group_size" in mdata["train"][0]: +# mdata["model_devi_group_size"] = mdata["model_devi"][0].get("group_size", 1) +# continue_flag = True +# +# pd_count_list =[] +# pd_flag = False +# if not continue_flag: +# +# #assert isinstance(mdata['model_devi']['machine'], list) +# #ssert isinstance(mdata['model_devi']['resources'], list) +# #assert len(mdata['model_devi']['machine']) == len(mdata['model_devi']['resources']) +# +# for machine_idx in range(len(mdata['model_devi'])): +# temp_machine = mdata['model_devi'][machine_idx]['machine'] +# temp_resources = mdata['model_devi'][machine_idx]['resources'] +# #assert isinstance(temp_machine, dict), "unsupported type of model_devi machine [%d]!" %machine_idx +# #assert isinstance(temp_resources, dict), "unsupported type of model_devi resources [%d]!"%machine_idx +# #assert temp_machine['machine_type'] == 'slurm', "Currently only support for Slurm!" +# temp_ssh_sess = SSHSession(temp_machine) +# cwd = os.getcwd() +# temp_context = SSHContext(cwd, temp_ssh_sess) +# if temp_machine['machine_type'] == 'lsf': +# temp_batch = LSF(temp_context) +# else: +# temp_batch = Slurm(temp_context) +# # For other type of machines, please add them using 'elif'. +# # Here slurm is selected as the final choice in convinience. +# command = temp_batch._make_squeue(temp_machine, temp_resources) +# ret, stdin, stdout, stderr = temp_batch.context.block_call(command) +# pd_response = stdout.read().decode('utf-8').split("\n") +# pd_count = len(pd_response) +# temp_context.clean() +# if pd_count ==0: +# mdata['model_devi_machine'] = temp_machine +# mdata['model_devi_resources'] = temp_resources +# mdata['model_devi_command'] = mdata['model_devi'][machine_idx]['command'] +# mdata['model_devi_group_size'] = mdata['model_devi'][machine_idx].get('group_size', 1) +# pd_flag = True +# break +# else: +# pd_count_list.append(pd_count) +# if not pd_flag: +# min_machine_idx = np.argsort(pd_count_list)[0] +# mdata['model_devi_machine'] = mdata['model_devi'][min_machine_idx]['machine'] +# mdata['model_devi_resources'] = mdata['model_devi'][min_machine_idx]['resources'] +# mdata['model_devi_command'] = mdata['model_devi'][min_machine_idx]['command'] +# mdata['model_devi_group_size'] = mdata['model_devi'][min_machine_idx].get('group_size', 1) +# with open("record.machine","w") as _outfile: +# profile = {} +# profile['purpose'] = 'model_devi' +# profile['machine'] = mdata['model_devi_machine'] +# profile['resources'] = mdata['model_devi_resources'] +# profile['group_size'] = mdata['model_devi_group_size'] +# profile['command'] = mdata['model_devi_command'] +# +# json.dump(profile, _outfile, indent = 4) +# return mdata +# def decide_fp_machine(mdata): +# if LooseVersion(mdata.get('api_version', '0.9')) >= LooseVersion('1.0'): +# mdata['fp_group_size'] = mdata['fp'][0]['resources']['group_size'] +# if 'fp' in mdata: +# #ssert isinstance(mdata['fp']['machine'], list) +# #assert isinstance(mdata['fp']['resources'], list) +# #assert len(mdata['fp']['machine']) == len(mdata['fp']['resources']) +# continue_flag = False +# ## decide whether to use an existing machine +# if 'record.machine' in os.listdir(): +# try: +# with open('record.machine', 'r') as _infile: +# profile = json.load(_infile) +# if profile['purpose'] == 'fp': +# mdata['fp_machine'] = profile['machine'] +# mdata['fp_resources'] = profile['resources'] +# mdata['fp_command'] = profile['command'] +# mdata['fp_group_size'] = profile['group_size'] +# +# continue_flag = True +# except: +# pass +# if ("hostname" not in mdata["fp"][0]["machine"]) or (len(mdata["fp"]) == 1): +# mdata["fp_machine"] = mdata["fp"][0]["machine"] +# mdata["fp_resources"] = mdata["fp"][0]["resources"] +# mdata["fp_command"] = mdata["fp"][0]["command"] +# #if "group_size" in mdata["train"][0]: +# mdata["fp_group_size"] = mdata["fp"][0].get("group_size", 1) +# continue_flag = True +# +# +# pd_count_list =[] +# pd_flag = False +# if not continue_flag: +# for machine_idx in range(len(mdata['fp'])): +# temp_machine = mdata['fp'][machine_idx]['machine'] +# temp_resources = mdata['fp'][machine_idx]['resources'] +# temp_ssh_sess = SSHSession(temp_machine) +# cwd = os.getcwd() +# temp_context = SSHContext(cwd, temp_ssh_sess) +# if temp_machine['machine_type'] == 'lsf': +# temp_batch = LSF(temp_context) +# else: +# temp_batch = Slurm(temp_context) +# # For other type of machines, please add them using 'elif'. +# # Here slurm is selected as the final choice in convinience. +# command = temp_batch._make_squeue(temp_machine, temp_resources) +# ret, stdin, stdout, stderr = temp_batch.context.block_call(command) +# pd_response = stdout.read().decode('utf-8').split("\n") +# pd_count = len(pd_response) +# temp_context.clean() +# #dlog.info(temp_machine["username"] + " " + temp_machine["hostname"] + " " + str(pd_count)) +# if pd_count ==0: +# mdata['fp_machine'] = temp_machine +# mdata['fp_resources'] = temp_resources +# mdata['fp_command'] = mdata['fp'][machine_idx]['command'] +# mdata['fp_group_size'] = mdata['fp'][machine_idx].get('group_size', 1) +# pd_flag = True +# break +# else: +# pd_count_list.append(pd_count) +# if not pd_flag: +# min_machine_idx = np.argsort(pd_count_list)[0] +# mdata['fp_machine'] = mdata['fp'][min_machine_idx]['machine'] +# mdata['fp_resources'] = mdata['fp'][min_machine_idx]['resources'] +# mdata['fp_command'] = mdata['fp'][min_machine_idx]['command'] +# mdata['fp_group_size'] = mdata['fp'][min_machine_idx].get('group_size',1) +# +# with open("record.machine","w") as _outfile: +# profile = {} +# profile['purpose'] = 'fp' +# profile['machine'] = mdata['fp_machine'] +# profile['resources'] = mdata['fp_resources'] +# profile['group_size'] = mdata['fp_group_size'] +# profile['command'] = mdata['fp_command'] +# json.dump(profile, _outfile, indent = 4) +# return mdata diff --git a/dpgen/simplify/simplify.py b/dpgen/simplify/simplify.py index 9856dc58a..768d64835 100644 --- a/dpgen/simplify/simplify.py +++ b/dpgen/simplify/simplify.py @@ -22,12 +22,10 @@ from dpgen import dlog from dpgen import SHORT_CMD from dpgen.util import sepline -from dpgen.remote.decide_machine import decide_train_machine from dpgen.dispatcher.Dispatcher import Dispatcher, make_dispatcher from dpgen.generator.run import make_train, run_train, post_train, run_fp, post_fp, fp_name, model_devi_name, train_name, train_task_fmt, sys_link_fp_vasp_pp, make_fp_vasp_incar, make_fp_vasp_kp, make_fp_vasp_cp_cvasp, data_system_fmt, model_devi_task_fmt, fp_task_fmt # TODO: maybe the following functions can be moved to dpgen.util from dpgen.generator.lib.utils import log_iter, make_iter_name, create_path, record_iter -from dpgen.remote.decide_machine import decide_train_machine, decide_fp_machine, decide_model_devi_machine from dpgen.generator.lib.gaussian import make_gaussian_input @@ -603,7 +601,8 @@ def run_iter(param_file, machine_file): listener = logging.handlers.QueueListener(que, smtp_handler) dlog.addHandler(queue_handler) listener.start() - + + mdata = convert_mdata(mdata) max_tasks = 10000 numb_task = 9 record = "record.dpgen" @@ -638,7 +637,6 @@ def run_iter(param_file, machine_file): make_train(ii, jdata, mdata) elif jj == 1: log_iter("run_train", ii, jj) - mdata = decide_train_machine(mdata) #disp = make_dispatcher(mdata['train_machine']) run_train(ii, jdata, mdata) elif jj == 2: @@ -651,7 +649,6 @@ def run_iter(param_file, machine_file): break elif jj == 4: log_iter("run_model_devi", ii, jj) - mdata = decide_model_devi_machine(mdata) #disp = make_dispatcher(mdata['model_devi_machine']) run_model_devi(ii, jdata, mdata) elif jj == 5: @@ -665,7 +662,6 @@ def run_iter(param_file, machine_file): if jdata.get("labeled", False): dlog.info("already have labeled data, skip run_fp") else: - mdata = decide_fp_machine(mdata) #disp = make_dispatcher(mdata['fp_machine']) run_fp(ii, jdata, mdata) elif jj == 8: diff --git a/examples/CH4-refact-dpdispatcher/machine-ali-ehpc.json b/examples/CH4-refact-dpdispatcher/machine-ali-ehpc.json index 442ddb201..a90b04f35 100644 --- a/examples/CH4-refact-dpdispatcher/machine-ali-ehpc.json +++ b/examples/CH4-refact-dpdispatcher/machine-ali-ehpc.json @@ -46,7 +46,11 @@ "queue_name": "T4_4_15", "group_size": 5, "source_list": ["/home/fengbo/deepmd.1.2.4.env"] - } + }, + "_comments" : "In user_forward_files, define input files to be uploaded.", + "user_forward_files" : [], + "_comments" : "In user_backward_files, define output files to be collected.", + "user_backward_files" : ["HILLS"] } ], "fp":[ @@ -69,7 +73,11 @@ "queue_name": "G_32_128", "group_size": 1, "source_list": ["~/vasp.env"] - } + }, + "_comments" : "In user_forward_files, define input files to be uploaded.", + "user_forward_files" : ["vdw_kernel.bindat"], + "_comments" : "In user_backward_files, define output files to be collected.", + "user_backward_files" : [] } ] } diff --git a/examples/init/INCAR_methane.md b/examples/init/INCAR_methane.md index a0e3ca29b..9831387aa 100644 --- a/examples/init/INCAR_methane.md +++ b/examples/init/INCAR_methane.md @@ -1,21 +1,33 @@ PREC=A -ENCUT=400 +ENCUT=400.000000 ISYM=0 -ALGO=Fast -EDIFF=1.000000e-06 -LREAL=False +ALGO=fast +EDIFF=1E-6 +LREAL=F NPAR=4 KPAR=1 -NELM=120 -NELMIN=4 + +NELM=200 +ISTART=0 +ICHARG=2 ISIF=2 ISMEAR=0 -SIGMA=0.20000 +SIGMA=0.200000 IBRION=0 -POTIM=0.5 +MAXMIX=50 +NBLOCK=1 +KBLOCK=100 + +SMASS=0 +POTIM=2g +TEBEG=50 +TEEND=50 + NSW=10 + LWAVE=F LCHARG=F PSTRESS=0 + KSPACING=0.500000 -KGAMMA=.FALSE. +KGAMMA=F diff --git a/examples/machine/DeePMD-kit-1.x/machine-local.json b/examples/machine/DeePMD-kit-1.x/machine-local.json index 5c356baef..a266f712b 100644 --- a/examples/machine/DeePMD-kit-1.x/machine-local.json +++ b/examples/machine/DeePMD-kit-1.x/machine-local.json @@ -13,7 +13,7 @@ "_comment": "model_devi on localhost ", - "lmp_command": "/home/wanghan/local/bin/lmp_mpi_010", + "model_devi_command": "/home/wanghan/local/bin/lmp_mpi_010", "model_devi_group_size": 5, "model_devi_machine": { "batch": "shell", diff --git a/examples/machine/DeePMD-kit-1.x/machine-pbs-gaussian.json b/examples/machine/DeePMD-kit-1.x/machine-pbs-gaussian.json index 25cb48349..6893471c5 100644 --- a/examples/machine/DeePMD-kit-1.x/machine-pbs-gaussian.json +++ b/examples/machine/DeePMD-kit-1.x/machine-pbs-gaussian.json @@ -27,7 +27,7 @@ "_comment": "model_devi on localhost ", - "lmp_command": "/gpfs/home/tzhu/lammps-stable_5Jun2019/src/lmp_intel_cpu_intelmpi -pk intel 0 omp 2", + "model_devi_command": "/gpfs/home/tzhu/lammps-stable_5Jun2019/src/lmp_intel_cpu_intelmpi -pk intel 0 omp 2", "model_devi_group_size": 1, "model_devi_machine": { "machine_type": "lsf", diff --git a/examples/machine/deprecated/DeePMD-kit-0.12/machine-aws.json b/examples/machine/deprecated/DeePMD-kit-0.12/machine-aws.json index f4015b612..7d050b548 100644 --- a/examples/machine/deprecated/DeePMD-kit-0.12/machine-aws.json +++ b/examples/machine/deprecated/DeePMD-kit-0.12/machine-aws.json @@ -96,7 +96,7 @@ "with_mpi":true }, "deepmd_path": "/deepmd_root/", - "lmp_command":"/usr/bin/lmp_mpi", + "model_devi_command":"/usr/bin/lmp_mpi", "fp_command":"/usr/bin/vasp_std", "train_resources": {}, diff --git a/examples/machine/deprecated/DeePMD-kit-0.12/machine-local.json b/examples/machine/deprecated/DeePMD-kit-0.12/machine-local.json index 057db2722..b8e15a625 100644 --- a/examples/machine/deprecated/DeePMD-kit-0.12/machine-local.json +++ b/examples/machine/deprecated/DeePMD-kit-0.12/machine-local.json @@ -14,7 +14,7 @@ "_comment": "model_devi on localhost ", - "lmp_command": "/home/wanghan/local/bin/lmp_mpi_010", + "model_devi_command": "/home/wanghan/local/bin/lmp_mpi_010", "model_devi_group_size": 5, "model_devi_machine": { "batch": "shell", diff --git a/examples/machine/deprecated/machine-hnu.json b/examples/machine/deprecated/machine-hnu.json index 8b9ee8003..eb9cb91f2 100644 --- a/examples/machine/deprecated/machine-hnu.json +++ b/examples/machine/deprecated/machine-hnu.json @@ -21,7 +21,7 @@ "_comment": "that's all" }, - "lmp_command": "/home/llang/dp_v2/local/bin/lmp_mpi_0_12_0", + "model_devi_command": "/home/llang/dp_v2/local/bin/lmp_mpi_0_12_0", "model_devi_group_size": 10, "_comment": "model_devi on localhost ", "model_devi_machine": { diff --git a/examples/machine/deprecated/machine-tiger-pwscf-della.json b/examples/machine/deprecated/machine-tiger-pwscf-della.json index 7201947b1..44911f487 100644 --- a/examples/machine/deprecated/machine-tiger-pwscf-della.json +++ b/examples/machine/deprecated/machine-tiger-pwscf-della.json @@ -19,7 +19,7 @@ "_comment": "that's all" }, - "lmp_command": "/home/linfengz/SCR/wanghan/local/bin/lmp_serial_0110_gpu", + "model_devi_command": "/home/linfengz/SCR/wanghan/local/bin/lmp_serial_0110_gpu", "model_devi_group_size": 20, "_comment": "model_devi on localhost ", "model_devi_machine": { diff --git a/examples/machine/deprecated/machine-tiger-vasp-della.json b/examples/machine/deprecated/machine-tiger-vasp-della.json index 822788b8f..fa1fdf6e9 100644 --- a/examples/machine/deprecated/machine-tiger-vasp-della.json +++ b/examples/machine/deprecated/machine-tiger-vasp-della.json @@ -19,7 +19,7 @@ "_comment": "that's all" }, - "lmp_command": "/home/linfengz/SCR/wanghan/local/bin/lmp_serial_0110_gpu", + "model_devi_command": "/home/linfengz/SCR/wanghan/local/bin/lmp_serial_0110_gpu", "model_devi_group_size": 10, "_comment": "model_devi on localhost ", "model_devi_machine": { diff --git a/examples/machine/deprecated/machine-tiger.json b/examples/machine/deprecated/machine-tiger.json index b1400d76f..ccc1b573f 100644 --- a/examples/machine/deprecated/machine-tiger.json +++ b/examples/machine/deprecated/machine-tiger.json @@ -19,7 +19,7 @@ "_comment": "that's all" }, - "lmp_command": "/home/linfengz/SCR/wanghan/local/bin/lmp_serial_0110_gpu", + "model_devi_command": "/home/linfengz/SCR/wanghan/local/bin/lmp_serial_0110_gpu", "model_devi_group_size": 20, "_comment": "model_devi on localhost ", "model_devi_machine": { diff --git a/examples/machine/deprecated/machine-ucloud.json b/examples/machine/deprecated/machine-ucloud.json index 963c250e9..52e9040c1 100644 --- a/examples/machine/deprecated/machine-ucloud.json +++ b/examples/machine/deprecated/machine-ucloud.json @@ -30,7 +30,7 @@ }, - "lmp_command": "/usr/bin/lmp_mpi", + "model_devi_command": "/usr/bin/lmp_mpi", "model_devi_group_size": 20, "model_devi_machine": { "machine_type": "ucloud", diff --git a/tests/generator/machine-local-v1.json b/tests/generator/machine-local-v1.json index 7079678e8..2218884f2 100644 --- a/tests/generator/machine-local-v1.json +++ b/tests/generator/machine-local-v1.json @@ -28,7 +28,7 @@ "source_list": [], "_comment": "that's All" }, - "lmp_command": "/home/wanghan/local/bin/lmp_mpi_1_1_0", + "model_devi_command": "/home/wanghan/local/bin/lmp_mpi_1_1_0", "model_devi_group_size": 10, "fp_machine": { diff --git a/tests/generator/machine-local.json b/tests/generator/machine-local.json index 05a0f2811..a4743c964 100644 --- a/tests/generator/machine-local.json +++ b/tests/generator/machine-local.json @@ -18,7 +18,7 @@ "_comment": "model_devi on localhost ", - "lmp_command": "/home/wanghan/local/bin/lmp_mpi_010", + "model_devi_command": "/home/wanghan/local/bin/lmp_mpi_010", "model_devi_group_size": 5, "model_devi_machine": { "machine_type": "local", @@ -49,6 +49,6 @@ "with_mpi": true, "_comment": "that's all" }, - + "fp_user_forward_files" : ["vdw_kernel.bindat"], "_comment": " that's all " } diff --git a/tests/generator/test_make_fp.py b/tests/generator/test_make_fp.py index 09ac5aede..914c9b149 100644 --- a/tests/generator/test_make_fp.py +++ b/tests/generator/test_make_fp.py @@ -481,6 +481,15 @@ def _check_pwmat_input(testCase, idx): testCase.assertEqual(lines.strip(), pwmat_input_ref.strip()) os.chdir(cwd) +def _check_symlink_user_forward_files(testCase, idx, file): + fp_path = os.path.join('iter.%06d' % idx, '02.fp') + tasks = glob.glob(os.path.join(fp_path, 'task.*')) + cwd = os.getcwd() + for ii in tasks: + os.chdir(ii) + testCase.assertEqual(os.path.isfile("vdw_kernel.bindat"), True) + os.chdir(cwd) + class TestMakeFPPwscf(unittest.TestCase): def test_make_fp_pwscf(self): setUpModule() @@ -614,7 +623,7 @@ def test_make_fp_vasp(self): atom_types = [0, 1, 0, 1] type_map = jdata['type_map'] _make_fake_md(0, md_descript, atom_types, type_map) - make_fp(0, jdata, {}) + make_fp(0, jdata, {"fp_user_forward_files" : ["vdw_kernel.bindat"] }) _check_sel(self, 0, jdata['fp_task_max'], jdata['model_devi_f_trust_lo'], jdata['model_devi_f_trust_hi']) _check_poscars(self, 0, jdata['fp_task_max'], jdata['type_map']) # _check_incar_exists(self, 0) @@ -755,7 +764,7 @@ def test_make_fp_vasp_ele_temp(self): # checked elsewhere # _check_potcar(self, 0, jdata['fp_pp_path'], jdata['fp_pp_files']) shutil.rmtree('iter.000000') - + class TestMakeFPGaussian(unittest.TestCase): def make_fp_gaussian(self, multiplicity="auto"): diff --git a/tests/generator/vdw_kernel.bindat b/tests/generator/vdw_kernel.bindat new file mode 100644 index 000000000..e69de29bb diff --git a/tests/tools/context.py b/tests/tools/context.py index d4e70a8c5..1d3510786 100644 --- a/tests/tools/context.py +++ b/tests/tools/context.py @@ -8,3 +8,5 @@ def my_file_cmp(test, f0, f1): with open(f1) as fp1: test.assertTrue(fp0.read() == fp1.read()) +def setUpModule(): + os.chdir(os.path.abspath(os.path.dirname(__file__))) diff --git a/tests/tools/machine_fp_single.json b/tests/tools/machine_fp_single.json new file mode 100644 index 000000000..f998388eb --- /dev/null +++ b/tests/tools/machine_fp_single.json @@ -0,0 +1,15 @@ +{ + "fp":[ + { + "command": "vasp_std", + "machine":{ + "batch_type": "PBS" + }, + "resources": { + "group_size" : 8 + }, + "_comments" : "In user_forward_files, define input files to be uploaded.", + "user_forward_files" : ["vdw_kernel.bindat"] + } + ] +} \ No newline at end of file diff --git a/tests/tools/test_convert_mdata.py b/tests/tools/test_convert_mdata.py new file mode 100644 index 000000000..5458b0faa --- /dev/null +++ b/tests/tools/test_convert_mdata.py @@ -0,0 +1,17 @@ +import os,sys,json +import unittest + +test_dir = os.path.abspath(os.path.join(os.path.dirname(__file__))) +sys.path.insert(0, os.path.join(test_dir, '..')) +__package__ = 'tools' +from dpgen.remote.decide_machine import convert_mdata +from .context import setUpModule +machine_file = 'machine_fp_single.json' +class TestConvertMdata(unittest.TestCase): + def test_convert_mdata (self): + mdata = json.load(open(machine_file)) + mdata = convert_mdata(mdata, ["fp"]) + self.assertEqual(mdata["fp_command"], "vasp_std") + self.assertEqual(mdata["fp_group_size"], 8) + self.assertEqual(mdata["fp_machine"]["batch_type"], "PBS") + self.assertEqual(mdata["fp_user_forward_files"], ["vdw_kernel.bindat"]) From 483932813e1712a70b873015079ea03e0d252a44 Mon Sep 17 00:00:00 2001 From: Ericwang6 Date: Mon, 9 Aug 2021 11:57:15 +0800 Subject: [PATCH 10/23] update ut for gmx enegine --- tests/generator/test_gromacs_engine.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/generator/test_gromacs_engine.py b/tests/generator/test_gromacs_engine.py index cfbeaa997..fe7da360e 100644 --- a/tests/generator/test_gromacs_engine.py +++ b/tests/generator/test_gromacs_engine.py @@ -91,7 +91,7 @@ def _copy_outputs(self, path_1, path_2): def test_make_model_devi_gromacs(self): flag = make_model_devi(iter_index=0, jdata=self.jdata, - mdata={}) + mdata={"deepmd_version": "2.0"}) self.assertTrue(flag) self.assertTrue(os.path.exists(self.model_devi_path)) self.assertTrue(os.path.exists(self.model_devi_task_path)) @@ -108,7 +108,7 @@ def test_make_fp_gaussian(self): def tearDown(self): - #pass + # pass shutil.rmtree(self.iter_path) if __name__ == '__main__': unittest.main() From f1a815cd843d6cbfd47df8cace2f79d9e06cb7e8 Mon Sep 17 00:00:00 2001 From: Ericwang6 Date: Mon, 9 Aug 2021 11:58:45 +0800 Subject: [PATCH 11/23] BUG FIX: handle exception of UnboundLocalError when parsing gaussian log --- dpgen/generator/run.py | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/dpgen/generator/run.py b/dpgen/generator/run.py index d3a508780..571ac290f 100644 --- a/dpgen/generator/run.py +++ b/dpgen/generator/run.py @@ -407,7 +407,7 @@ def make_train (iter_index, jinput['model']['fitting_net']['activation_function'] = model_devi_activation_func[ii][1] if len(np.array(model_devi_activation_func).shape) == 1 : # for backward compatibility, 1-dim list, not net-resolved jinput['model']['descriptor']['activation_function'] = model_devi_activation_func[ii] - jinput['model']['descriptor']['activation_function'] = model_devi_activation_func[ii] + jinput['model']['fitting_net']['activation_function'] = model_devi_activation_func[ii] # dump the input.json with open(os.path.join(task_path, train_input_file), 'w') as outfile: json.dump(jinput, outfile, indent = 4) @@ -2524,18 +2524,22 @@ def post_fp_gaussian (iter_index, sys_output = glob.glob(os.path.join(work_path, "task.%s.*/output"%ss)) sys_output.sort() for idx,oo in enumerate(sys_output) : - sys = dpdata.LabeledSystem(oo, fmt = 'gaussian/log') - if len(sys) > 0: - sys.check_type_map(type_map = jdata['type_map']) - if jdata.get('use_atom_pref', False): - sys.data['atom_pref'] = np.load(os.path.join(os.path.dirname(oo), "atom_pref.npy")) - if idx == 0: - if jdata.get('use_clusters', False): - all_sys = dpdata.MultiSystems(sys, type_map = jdata['type_map']) + # TODO : UnboundLocalError sometimes occurs and I cannot figure it out. + try: + sys = dpdata.LabeledSystem(oo, fmt = 'gaussian/log') + if len(sys) > 0: + sys.check_type_map(type_map = jdata['type_map']) + if jdata.get('use_atom_pref', False): + sys.data['atom_pref'] = np.load(os.path.join(os.path.dirname(oo), "atom_pref.npy")) + if idx == 0: + if jdata.get('use_clusters', False): + all_sys = dpdata.MultiSystems(sys, type_map = jdata['type_map']) + else: + all_sys = sys else: - all_sys = sys - else: - all_sys.append(sys) + all_sys.append(sys) + except UnboundLocalError as e: + pass sys_data_path = os.path.join(work_path, 'data.%s'%ss) all_sys.to_deepmd_raw(sys_data_path) all_sys.to_deepmd_npy(sys_data_path, set_size = len(sys_output)) From 06e59598f2fbdb3f073d8367ac7813544e3b7fba Mon Sep 17 00:00:00 2001 From: Yingze Wang Date: Mon, 9 Aug 2021 16:04:00 +0800 Subject: [PATCH 12/23] Remove catching UnboundLocalError & refine code --- dpgen/generator/run.py | 46 ++++++++++++++++++------------------------ 1 file changed, 20 insertions(+), 26 deletions(-) diff --git a/dpgen/generator/run.py b/dpgen/generator/run.py index 29d44425c..9c33882bd 100644 --- a/dpgen/generator/run.py +++ b/dpgen/generator/run.py @@ -187,7 +187,7 @@ def dump_to_deepmd_raw(dump, deepmd_raw, type_map, fmt='gromacs/gro', charge=Non system = dpdata.System(dump, fmt = fmt, type_map = type_map) system.to_deepmd_raw(deepmd_raw) if charge is not None: - with open(os.path.join(deepmd_raw, "charge"), 'w+') as f: + with open(os.path.join(deepmd_raw, "charge"), 'w') as f: f.write(str(charge)) @@ -1101,16 +1101,13 @@ def _make_model_devi_native_gromacs(iter_index, jdata, mdata, conf_systems): else: model_devi_dt = jdata['model_devi_dt'] nsteps = cur_job.get("nsteps", None) - lambdas = cur_job.get("lambdas", []) - temps = cur_job.get("temps", []) - if not lambdas: - lambdas = [1.0] - else: - for ll in lambdas: - if ll > 1: - raise RuntimeError("lambda is larger than 1.0") - if not temps: - temps = [298.0] + lambdas = cur_job.get("lambdas", [1.0]) + temps = cur_job.get("temps", [298.0]) + + for ll in lambdas: + if ll > 1: + raise RuntimeError("lambda is larger than 1.0") + if nsteps is None: raise RuntimeError("nsteps is None, you should set nsteps in model_devi_jobs!") # Currently Gromacs engine is not supported for different temperatures! @@ -2534,22 +2531,19 @@ def post_fp_gaussian (iter_index, sys_output = glob.glob(os.path.join(work_path, "task.%s.*/output"%ss)) sys_output.sort() for idx,oo in enumerate(sys_output) : - # TODO : UnboundLocalError sometimes occurs and I cannot figure it out. - try: - sys = dpdata.LabeledSystem(oo, fmt = 'gaussian/log') - if len(sys) > 0: - sys.check_type_map(type_map = jdata['type_map']) - if jdata.get('use_atom_pref', False): - sys.data['atom_pref'] = np.load(os.path.join(os.path.dirname(oo), "atom_pref.npy")) - if idx == 0: - if jdata.get('use_clusters', False): - all_sys = dpdata.MultiSystems(sys, type_map = jdata['type_map']) - else: - all_sys = sys + # TODO : UnboundLocalError sometimes occurs when parsing gaussian log + sys = dpdata.LabeledSystem(oo, fmt = 'gaussian/log') + if len(sys) > 0: + sys.check_type_map(type_map = jdata['type_map']) + if jdata.get('use_atom_pref', False): + sys.data['atom_pref'] = np.load(os.path.join(os.path.dirname(oo), "atom_pref.npy")) + if idx == 0: + if jdata.get('use_clusters', False): + all_sys = dpdata.MultiSystems(sys, type_map = jdata['type_map']) else: - all_sys.append(sys) - except UnboundLocalError as e: - pass + all_sys = sys + else: + all_sys.append(sys) sys_data_path = os.path.join(work_path, 'data.%s'%ss) all_sys.to_deepmd_raw(sys_data_path) all_sys.to_deepmd_npy(sys_data_path, set_size = len(sys_output)) From 94fdf09772aee6b97a4647aa2a657ec024223ed8 Mon Sep 17 00:00:00 2001 From: Yongbin Zhuang <38876805+robinzyb@users.noreply.github.com> Date: Tue, 10 Aug 2021 09:20:24 +0800 Subject: [PATCH 13/23] exclude no converged task for cp2k fp (#486) * Update CP2K new example and slurm-lsf-mix machine * exclude the unconverge fp task in data set * modify the way to count failed fp * Update run.py * Update run.py --- dpgen/generator/run.py | 31 +++++++++++++++++++++---------- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/dpgen/generator/run.py b/dpgen/generator/run.py index 7958e492e..f8134c0bd 100644 --- a/dpgen/generator/run.py +++ b/dpgen/generator/run.py @@ -2521,20 +2521,31 @@ def post_fp_cp2k (iter_index, system_index.sort() cwd = os.getcwd() + # tcount: num of all fp tasks + tcount = 0 + # icount: num of converged fp tasks + icount = 0 for ss in system_index : sys_output = glob.glob(os.path.join(work_path, "task.%s.*/output"%ss)) sys_output.sort() - for idx,oo in enumerate(sys_output) : - sys = dpdata.LabeledSystem(oo, fmt = 'cp2k/output') - if len(sys) > 0: - sys.check_type_map(type_map = jdata['type_map']) - if idx == 0: - all_sys = sys + tcount += len(sys_output) + all_sys = None + for oo in sys_output : + _sys = dpdata.LabeledSystem(oo, fmt = 'cp2k/output') + _sys.check_type_map(type_map = jdata['type_map']) + if all_sys is None: + all_sys = _sys else: - all_sys.append(sys) - sys_data_path = os.path.join(work_path, 'data.%s'%ss) - all_sys.to_deepmd_raw(sys_data_path) - all_sys.to_deepmd_npy(sys_data_path, set_size = len(sys_output)) + all_sys.append(_sys) + + + icount += len(all_sys) + if all_sys is not None: + sys_data_path = os.path.join(work_path, 'data.%s'%ss) + all_sys.to_deepmd_raw(sys_data_path) + all_sys.to_deepmd_npy(sys_data_path, set_size = len(sys_output)) + dlog.info("failed frame number: %s "%(tcount-icount)) + dlog.info("total frame number: %s "%tcount) def post_fp_pwmat (iter_index, From a36abfd4679e3c9adca7b3ad49d8fcaa9f3ed795 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yuan=20Fengbo=20=28=E8=A2=81=E5=A5=89=E5=8D=9A=29?= <757627927@qq.com> Date: Tue, 10 Aug 2021 09:21:20 +0800 Subject: [PATCH 14/23] add conda publish github action (#488) * update conda release CI * fix publish conda workflow * add conda publish github action; it will release dpgen to conda deepmodeling channel when a new tag is added automatically * update conda build actions * upload conda .sh installer to github when new tag is add * update conda build CICD * use local source code in conda build CICD Co-authored-by: Jinzhe Zeng Co-authored-by: felix5572 Co-authored-by: Jinzhe Zeng --- .github/workflows/release.yml | 34 +++++++++++++++++++++++ conda/conda_build_config.yaml | 5 ++++ conda/construct.yaml | 18 ++++++++++++ conda/meta.yaml | 52 +++++++++++++++++++++++++++++++++++ 4 files changed, 109 insertions(+) create mode 100644 .github/workflows/release.yml create mode 100644 conda/conda_build_config.yaml create mode 100644 conda/construct.yaml create mode 100644 conda/meta.yaml diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 000000000..939a76699 --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,34 @@ +name: release_conda_and_github + +on: + push: + tags: + - 'v*' + +jobs: + publish-conda: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - name: publish-to-conda + uses: felix5572/conda-publish-action@v1.9 + with: + subdir: 'conda' + anacondatoken: ${{ secrets.ANACONDA_TOKEN }} + platforms: 'noarch' + construct-and-publish: + runs-on: ubuntu-latest + needs: conda-publish + steps: + - uses: actions/checkout@master + - uses: s-weigand/setup-conda@v1 + - run: conda install constructor jinja2 -y + - run: constructor ./conda + - name: Upload to release + uses: softprops/action-gh-release@master + if: startsWith(github.ref, 'refs/tags/') + with: + files: dpgen-*.sh + repository: ${{ env.GITHUB_REPOSITORY }} + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/conda/conda_build_config.yaml b/conda/conda_build_config.yaml new file mode 100644 index 000000000..2cb7c1896 --- /dev/null +++ b/conda/conda_build_config.yaml @@ -0,0 +1,5 @@ +channel_sources: + - defaults + - conda-forge +channel_targets: + - deepmodeling diff --git a/conda/construct.yaml b/conda/construct.yaml new file mode 100644 index 000000000..c3c6005de --- /dev/null +++ b/conda/construct.yaml @@ -0,0 +1,18 @@ +{% set version = os.popen('git describe --tags --abbrev=0').read().strip('\n').lstrip('v').replace('-', '_') %} + +name: dpgen +version: {{ version }} + +channels: + - defaults + - conda-forge + - deepmodeling + +specs: + - python 3.8 + - pip + - dpgen {{ version }} + +ignore_duplicate_files: True + +license_file: ../LICENSE diff --git a/conda/meta.yaml b/conda/meta.yaml new file mode 100644 index 000000000..c599c642f --- /dev/null +++ b/conda/meta.yaml @@ -0,0 +1,52 @@ +{% set name = "dpgen" %} +{% set version = os.popen('git describe --tags --abbrev=0').read().strip('\n').lstrip('v').replace('-', '_') %} + +package: + name: {{ name|lower }} + version: {{ version }} + +source: + path: .. + +build: + number: 0 + noarch: python + script: python -m pip install --no-deps --ignore-installed . + +requirements: + build: + - git + host: + - python >=3.6 + - pip + - setuptools_scm + - dargs + - paramiko + - requests + - dpdata + - dpdispatcher + + run: + - python >=3.6 + - dargs + - paramiko + - requests + - dpdata + - dpdispatcher + - pymatgen + +test: + imports: + - dpgen + +about: + home: https://github.com/deepmodeling/dpgen + license: LGPL-3.0 + license_family: LGPL + license_file: LICENSE + doc_url: https://github.com/deepmodeling/dpgen + dev_url: https://github.com/deepmodeling/dpgen + +extra: + recipe-maintainers: + - felix5572 From 9108224b459e9d00585d203e9db649c485de9d0b Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Mon, 9 Aug 2021 21:21:46 -0400 Subject: [PATCH 15/23] rename `n_neuron` to `neuron` (#492) A bug in DeePMD-kit v1.3.0 ~ v1.3.3 will ignore `n_neuron` (deepmodeling/deepmd-kit#846). Rename to prevent some one uses these versions. --- examples/run/dp1.x-lammps-cp2k/methane/param-ch4.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/run/dp1.x-lammps-cp2k/methane/param-ch4.json b/examples/run/dp1.x-lammps-cp2k/methane/param-ch4.json index 273854d93..fec7fcf92 100644 --- a/examples/run/dp1.x-lammps-cp2k/methane/param-ch4.json +++ b/examples/run/dp1.x-lammps-cp2k/methane/param-ch4.json @@ -48,7 +48,7 @@ "seed": 1 }, "fitting_net": { - "n_neuron": [ + "neuron": [ 120, 120, 120 From 11ce22817b6017107b008d3d365f498b869eedc1 Mon Sep 17 00:00:00 2001 From: Han Wang Date: Wed, 11 Aug 2021 10:41:05 +0800 Subject: [PATCH 16/23] Adaptive trust levels (#495) * support adaptive trust level * update README * fix bugs in readme * adaptive lower trust level support percentage of total number of frames Co-authored-by: Han Wang --- README.md | 9 +- dpgen/generator/run.py | 241 ++++++++++++++++++++++++++++++++--------- 2 files changed, 196 insertions(+), 54 deletions(-) diff --git a/README.md b/README.md index a7aa26bac..93a624f87 100644 --- a/README.md +++ b/README.md @@ -549,8 +549,13 @@ The bold notation of key (such aas **type_map**) means that it's a necessary key | **model_devi_skip** | Integer | 0 | Number of structures skipped for fp in each MD | **model_devi_f_trust_lo** | Float | 0.05 | Lower bound of forces for the selection. | **model_devi_f_trust_hi** | Float | 0.15 | Upper bound of forces for the selection -| **model_devi_e_trust_lo** | Float | 1e10 | Lower bound of energies for the selection. Recommend to set them a high number, since forces provide more precise information. Special cases such as energy minimization may need this. | -| **model_devi_e_trust_hi** | Float | 1e10 | Upper bound of energies for the selection. | +| **model_devi_v_trust_lo** | Float | 1e10 | Lower bound of virial for the selection. Should be used with DeePMD-kit v2.x | +| **model_devi_v_trust_hi** | Float | 1e10 | Upper bound of virial for the selection. Should be used with DeePMD-kit v2.x | +| model_devi_adapt_trust_lo | Boolean | False | Adaptively determines the lower trust levels of force and virial. This option should be used together with `model_devi_numb_candi_f`, `model_devi_numb_candi_v` and optionally with `model_devi_perc_candi_f` and `model_devi_perc_candi_v`. `dpgen` will make two sets: 1. From the frames with force model deviation lower than `model_devi_f_trust_hi`, select `max(model_devi_numb_candi_f, model_devi_perc_candi_f*n_frames)` frames with largest force model deviation. 2. From the frames with virial model deviation lower than `model_devi_v_trust_hi`, select `max(model_devi_numb_candi_v, model_devi_perc_candi_v*n_frames)` frames with largest virial model deviation. The union of the two sets is made as candidate dataset| +| model_devi_numb_candi_f | Int | 10 | See `model_devi_adapt_trust_lo`.| +| model_devi_numb_candi_v | Int | 0 | See `model_devi_adapt_trust_lo`.| +| model_devi_perc_candi_f | Float | 0.0 | See `model_devi_adapt_trust_lo`.| +| model_devi_perc_candi_v | Float | 0.0 | See `model_devi_adapt_trust_lo`.| | **model_devi_clean_traj** | Boolean | true | Deciding whether to clean traj folders in MD since they are too large. | | **model_devi_nopbc** | Boolean | False | Assume open boundary condition in MD simulations. | | model_devi_activation_func | List of list of string | [["tanh","tanh"],["tanh","gelu"],["gelu","tanh"],["gelu","gelu"]] | Set activation functions for models, length of the List should be the same as `numb_models`, and two elements in the list of string respectively assign activation functions to the embedding and fitting nets within each model. *Backward compatibility*: the orginal "List of String" format is still supported, where embedding and fitting nets of one model use the same activation function, and the length of the List should be the same as `numb_models`| diff --git a/dpgen/generator/run.py b/dpgen/generator/run.py index f8134c0bd..696fcc5f2 100644 --- a/dpgen/generator/run.py +++ b/dpgen/generator/run.py @@ -28,6 +28,7 @@ import scipy.constants as pc from collections import Counter from distutils.version import LooseVersion +from typing import List from numpy.linalg import norm from dpgen import dlog from dpgen import SHORT_CMD @@ -1321,11 +1322,169 @@ def check_bad_box(conf_name, raise RuntimeError('unknow key', key) return is_bad + +def _select_by_model_devi_standard( + modd_system_task: List[str], + f_trust_lo : float, + f_trust_hi : float, + v_trust_lo : float, + v_trust_hi : float, + cluster_cutoff : float, + model_devi_skip : int = 0, + detailed_report_make_fp : bool = True, +): + fp_candidate = [] + if detailed_report_make_fp: + fp_rest_accurate = [] + fp_rest_failed = [] + cc = 0 + counter = Counter() + counter['candidate'] = 0 + counter['failed'] = 0 + counter['accurate'] = 0 + for tt in modd_system_task : + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + all_conf = np.loadtxt(os.path.join(tt, 'model_devi.out')) + for ii in range(all_conf.shape[0]) : + if all_conf[ii][0] < model_devi_skip : + continue + cc = int(all_conf[ii][0]) + if cluster_cutoff is None: + if (all_conf[ii][1] < v_trust_hi and all_conf[ii][1] >= v_trust_lo) or \ + (all_conf[ii][4] < f_trust_hi and all_conf[ii][4] >= f_trust_lo) : + fp_candidate.append([tt, cc]) + counter['candidate'] += 1 + elif (all_conf[ii][1] >= v_trust_hi ) or (all_conf[ii][4] >= f_trust_hi ): + if detailed_report_make_fp: + fp_rest_failed.append([tt, cc]) + counter['failed'] += 1 + elif (all_conf[ii][1] < v_trust_lo and all_conf[ii][4] < f_trust_lo ): + if detailed_report_make_fp: + fp_rest_accurate.append([tt, cc]) + counter['accurate'] += 1 + else : + raise RuntimeError('md traj %s frame %d with f devi %f does not belong to either accurate, candidiate and failed, it should not happen' % (tt, ii, all_conf[ii][4])) + else: + idx_candidate = np.where(np.logical_and(all_conf[ii][7:] < f_trust_hi, all_conf[ii][7:] >= f_trust_lo))[0] + for jj in idx_candidate: + fp_candidate.append([tt, cc, jj]) + counter['candidate'] += len(idx_candidate) + idx_rest_accurate = np.where(all_conf[ii][7:] < f_trust_lo)[0] + if detailed_report_make_fp: + for jj in idx_rest_accurate: + fp_rest_accurate.append([tt, cc, jj]) + counter['accurate'] += len(idx_rest_accurate) + idx_rest_failed = np.where(all_conf[ii][7:] >= f_trust_hi)[0] + if detailed_report_make_fp: + for jj in idx_rest_failed: + fp_rest_failed.append([tt, cc, jj]) + counter['failed'] += len(idx_rest_failed) + + return fp_rest_accurate, fp_candidate, fp_rest_failed, counter + + + +def _select_by_model_devi_adaptive_trust_low( + modd_system_task: List[str], + f_trust_hi : float, + numb_candi_f : int, + perc_candi_f : float, + v_trust_hi : float, + numb_candi_v : int, + perc_candi_v : float, + model_devi_skip : int = 0 +): + """ + modd_system_task model deviation tasks belonging to one system + f_trust_hi + numb_candi_f number of candidate due to the f model deviation + perc_candi_f percentage of candidate due to the f model deviation + v_trust_hi + numb_candi_v number of candidate due to the v model deviation + perc_candi_v percentage of candidate due to the v model deviation + model_devi_skip + + returns + accur the accurate set + candi the candidate set + failed the failed set + counter counters, number of elements in the sets + f_trust_lo adapted trust level of f + v_trust_lo adapted trust level of v + """ + idx_v = 1 + idx_f = 4 + accur = set() + candi = set() + failed = [] + coll_v = [] + coll_f = [] + for tt in modd_system_task: + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + model_devi = np.loadtxt(os.path.join(tt, 'model_devi.out')) + for ii in range(model_devi.shape[0]) : + if model_devi[ii][0] < model_devi_skip : + continue + cc = int(model_devi[ii][0]) + # tt: name of task folder + # cc: time step of the frame + md_v = model_devi[ii][idx_v] + md_f = model_devi[ii][idx_f] + if md_f > f_trust_hi or md_v > v_trust_hi: + failed.append([tt, cc]) + else: + coll_v.append([model_devi[ii][idx_v], tt, cc]) + coll_f.append([model_devi[ii][idx_f], tt, cc]) + # now accur takes all non-failed frames, + # will be substracted by candidate lat er + accur.add((tt, cc)) + # sort + coll_v.sort() + coll_f.sort() + assert(len(coll_v) == len(coll_f)) + # calcuate numbers + numb_candi_v = max(numb_candi_v, int(perc_candi_v * 0.01 * len(coll_v))) + numb_candi_f = max(numb_candi_f, int(perc_candi_f * 0.01 * len(coll_f))) + # adjust number of candidate + if len(coll_v) < numb_candi_v: + numb_candi_v = len(coll_v) + if len(coll_f) < numb_candi_f: + numb_candi_f = len(coll_f) + # compute trust lo + if numb_candi_v == 0: + v_trust_lo = v_trust_hi + else: + v_trust_lo = coll_v[-numb_candi_v][0] + if numb_candi_f == 0: + f_trust_lo = f_trust_hi + else: + f_trust_lo = coll_f[-numb_candi_f][0] + # add to candidate set + for ii in range(len(coll_v) - numb_candi_v, len(coll_v)): + candi.add(tuple(coll_v[ii][1:])) + for ii in range(len(coll_f) - numb_candi_f, len(coll_f)): + candi.add(tuple(coll_f[ii][1:])) + # accurate set is substracted by the candidate set + accur = accur - candi + # convert to list + candi = [list(ii) for ii in candi] + accur = [list(ii) for ii in accur] + # counters + counter = Counter() + counter['candidate'] = len(candi) + counter['failed'] = len(failed) + counter['accurate'] = len(accur) + + return accur, candi, failed, counter, f_trust_lo, v_trust_lo + + def _make_fp_vasp_inner (modd_path, work_path, model_devi_skip, - e_trust_lo, - e_trust_hi, + v_trust_lo, + v_trust_hi, f_trust_lo, f_trust_hi, fp_task_min, @@ -1352,6 +1511,7 @@ def _make_fp_vasp_inner (modd_path, fp_tasks = [] cluster_cutoff = jdata['cluster_cutoff'] if jdata.get('use_clusters', False) else None + model_devi_adapt_trust_lo = jdata.get('model_devi_adapt_trust_lo', False) # skip save *.out if detailed_report_make_fp is False, default is True detailed_report_make_fp = jdata.get("detailed_report_make_fp", True) # skip bad box criteria @@ -1359,56 +1519,33 @@ def _make_fp_vasp_inner (modd_path, # skip discrete structure in cluster fp_cluster_vacuum = jdata.get('fp_cluster_vacuum',None) for ss in system_index : - fp_candidate = [] - if detailed_report_make_fp: - fp_rest_accurate = [] - fp_rest_failed = [] modd_system_glob = os.path.join(modd_path, 'task.' + ss + '.*') modd_system_task = glob.glob(modd_system_glob) modd_system_task.sort() - cc = 0 - counter = Counter() - counter['candidate'] = 0 - counter['failed'] = 0 - counter['accurate'] = 0 - for tt in modd_system_task : - with warnings.catch_warnings(): - warnings.simplefilter("ignore") - all_conf = np.loadtxt(os.path.join(tt, 'model_devi.out')) - for ii in range(all_conf.shape[0]) : - if all_conf[ii][0] < model_devi_skip : - continue - cc = int(all_conf[ii][0]) - if cluster_cutoff is None: - if (all_conf[ii][1] < e_trust_hi and all_conf[ii][1] >= e_trust_lo) or \ - (all_conf[ii][4] < f_trust_hi and all_conf[ii][4] >= f_trust_lo) : - fp_candidate.append([tt, cc]) - counter['candidate'] += 1 - elif (all_conf[ii][1] >= e_trust_hi ) or (all_conf[ii][4] >= f_trust_hi ): - if detailed_report_make_fp: - fp_rest_failed.append([tt, cc]) - counter['failed'] += 1 - elif (all_conf[ii][1] < e_trust_lo and all_conf[ii][4] < f_trust_lo ): - if detailed_report_make_fp: - fp_rest_accurate.append([tt, cc]) - counter['accurate'] += 1 - else : - raise RuntimeError('md traj %s frame %d with f devi %f does not belong to either accurate, candidiate and failed, it should not happen' % (tt, ii, all_conf[ii][4])) - else: - idx_candidate = np.where(np.logical_and(all_conf[ii][7:] < f_trust_hi, all_conf[ii][7:] >= f_trust_lo))[0] - for jj in idx_candidate: - fp_candidate.append([tt, cc, jj]) - counter['candidate'] += len(idx_candidate) - idx_rest_accurate = np.where(all_conf[ii][7:] < f_trust_lo)[0] - if detailed_report_make_fp: - for jj in idx_rest_accurate: - fp_rest_accurate.append([tt, cc, jj]) - counter['accurate'] += len(idx_rest_accurate) - idx_rest_failed = np.where(all_conf[ii][7:] >= f_trust_hi)[0] - if detailed_report_make_fp: - for jj in idx_rest_failed: - fp_rest_failed.append([tt, cc, jj]) - counter['failed'] += len(idx_rest_failed) + + # assumed e -> v + if not model_devi_adapt_trust_lo: + fp_rest_accurate, fp_candidate, fp_rest_failed, counter \ + = _select_by_model_devi_standard( + modd_system_task, + f_trust_lo, f_trust_hi, + v_trust_lo, v_trust_hi, + cluster_cutoff, + model_devi_skip, + detailed_report_make_fp = detailed_report_make_fp) + else: + numb_candi_f = jdata.get('model_devi_numb_candi_f', 10) + numb_candi_v = jdata.get('model_devi_numb_candi_v', 0) + perc_candi_f = jdata.get('model_devi_perc_candi_f', 0.) + perc_candi_v = jdata.get('model_devi_perc_candi_v', 0.) + fp_rest_accurate, fp_candidate, fp_rest_failed, counter, f_trust_lo_ad, v_trust_lo_ad \ + = _select_by_model_devi_adaptive_trust_low( + modd_system_task, + f_trust_hi, numb_candi_f, perc_candi_f, + v_trust_hi, numb_candi_v, perc_candi_v, + model_devi_skip = model_devi_skip) + dlog.info("system {0:s} {1:9s} : f_trust_lo {2:6.3f} v_trust_lo {3:6.3f}".format(ss, 'adapted', f_trust_lo_ad, v_trust_lo_ad)) + # print a report fp_sum = sum(counter.values()) for cc_key, cc_value in counter.items(): @@ -1768,8 +1905,8 @@ def _make_fp_vasp_configs(iter_index, jdata): fp_task_max = jdata['fp_task_max'] model_devi_skip = jdata['model_devi_skip'] - e_trust_lo = 1e+10 - e_trust_hi = 1e+10 + v_trust_lo = jdata.get('model_devi_v_trust_lo', 1e10) + v_trust_hi = jdata.get('model_devi_v_trust_hi', 1e10) f_trust_lo = jdata['model_devi_f_trust_lo'] f_trust_hi = jdata['model_devi_f_trust_hi'] type_map = jdata['type_map'] @@ -1787,7 +1924,7 @@ def _make_fp_vasp_configs(iter_index, # make configs fp_tasks = _make_fp_vasp_inner(modd_path, work_path, model_devi_skip, - e_trust_lo, e_trust_hi, + v_trust_lo, v_trust_hi, f_trust_lo, f_trust_hi, task_min, fp_task_max, [], From 1c18b1cecf241e1d74702e5c116a2f7a82b89bb4 Mon Sep 17 00:00:00 2001 From: Han Wang Date: Thu, 12 Aug 2021 08:32:40 +0800 Subject: [PATCH 17/23] Support relative force model deviation by normalizing the RMS force magnitude (#496) * support relative force model deviation normalized by avg f of MD trajectories * update readme Co-authored-by: Han Wang --- README.md | 1 + dpgen/generator/lib/lammps.py | 38 +++++++++++++++++++++++++++++++--- dpgen/generator/run.py | 36 +++++++++++++++++++++++++++----- tests/generator/context.py | 1 + tests/generator/test_lammps.py | 38 ++++++++++++++++++++++++++++++++++ 5 files changed, 106 insertions(+), 8 deletions(-) create mode 100644 tests/generator/test_lammps.py diff --git a/README.md b/README.md index 93a624f87..52d608313 100644 --- a/README.md +++ b/README.md @@ -556,6 +556,7 @@ The bold notation of key (such aas **type_map**) means that it's a necessary key | model_devi_numb_candi_v | Int | 0 | See `model_devi_adapt_trust_lo`.| | model_devi_perc_candi_f | Float | 0.0 | See `model_devi_adapt_trust_lo`.| | model_devi_perc_candi_v | Float | 0.0 | See `model_devi_adapt_trust_lo`.| +| model_devi_f_avg_relative | Boolean | False | Normalized the force model deviations by the RMS force magnitude along the trajectory. This key should not be used with `use_relative`. | | **model_devi_clean_traj** | Boolean | true | Deciding whether to clean traj folders in MD since they are too large. | | **model_devi_nopbc** | Boolean | False | Assume open boundary condition in MD simulations. | | model_devi_activation_func | List of list of string | [["tanh","tanh"],["tanh","gelu"],["gelu","tanh"],["gelu","gelu"]] | Set activation functions for models, length of the List should be the same as `numb_models`, and two elements in the list of string respectively assign activation functions to the embedding and fitting nets within each model. *Backward compatibility*: the orginal "List of String" format is still supported, where embedding and fitting nets of one model use the same activation function, and the length of the List should be the same as `numb_models`| diff --git a/dpgen/generator/lib/lammps.py b/dpgen/generator/lib/lammps.py index 03dca2b4e..3190d9f01 100644 --- a/dpgen/generator/lib/lammps.py +++ b/dpgen/generator/lib/lammps.py @@ -89,7 +89,7 @@ def make_lammps_input(ensemble, ret+= "\n" ret+= "thermo_style custom step temp pe ke etotal press vol lx ly lz xy xz yz\n" ret+= "thermo ${THERMO_FREQ}\n" - ret+= "dump 1 all custom ${DUMP_FREQ} traj/*.lammpstrj id type x y z\n" + ret+= "dump 1 all custom ${DUMP_FREQ} traj/*.lammpstrj id type x y z fx fy fz\n" ret+= "restart 10000 dpgen.restart\n" ret+= "\n" if pka_e is None : @@ -137,5 +137,37 @@ def make_lammps_input(ensemble, # cvt_lammps_conf('POSCAR', 'tmp.lmp') - - +def get_dumped_forces( + file_name): + with open(file_name) as fp: + lines = fp.read().split('\n') + natoms = None + for idx,ii in enumerate(lines): + if 'ITEM: NUMBER OF ATOMS' in ii: + natoms = int(lines[idx+1]) + break + if natoms is None: + raise RuntimeError('wrong dump file format, cannot find number of atoms', file_name) + idfx = None + for idx,ii in enumerate(lines): + if 'ITEM: ATOMS' in ii: + keys = ii + keys = keys.replace('ITEM: ATOMS', '') + keys = keys.split() + idfx = keys.index('fx') + idfy = keys.index('fy') + idfz = keys.index('fz') + break + if idfx is None: + raise RuntimeError('wrong dump file format, cannot find dump keys', file_name) + ret = [] + for ii in range(idx+1, idx+natoms+1): + words = lines[ii].split() + ret.append([ float(words[ii]) for ii in [idfx, idfy, idfz] ]) + ret = np.array(ret) + return ret + + +if __name__ == '__main__': + ret = get_dumped_forces('40.lammpstrj') + print(ret) diff --git a/dpgen/generator/run.py b/dpgen/generator/run.py index 696fcc5f2..a00bc7c9a 100644 --- a/dpgen/generator/run.py +++ b/dpgen/generator/run.py @@ -40,7 +40,7 @@ from dpgen.generator.lib.utils import record_iter from dpgen.generator.lib.utils import log_task from dpgen.generator.lib.utils import symlink_user_forward_files -from dpgen.generator.lib.lammps import make_lammps_input +from dpgen.generator.lib.lammps import make_lammps_input, get_dumped_forces from dpgen.generator.lib.vasp import write_incar_dict from dpgen.generator.lib.vasp import make_vasp_incar_user_dict from dpgen.generator.lib.vasp import incar_upper @@ -1323,6 +1323,24 @@ def check_bad_box(conf_name, return is_bad +def _read_model_devi_file( + task_path : str, + model_devi_f_avg_relative : bool = False +): + model_devi = np.loadtxt(os.path.join(task_path, 'model_devi.out')) + if model_devi_f_avg_relative : + trajs = glob.glob(os.path.join(task_path, 'traj', '*.lammpstrj')) + all_f = [] + for ii in trajs: + all_f.append(get_dumped_forces(ii)) + all_f = np.array(all_f) + all_f = all_f.reshape([-1,3]) + avg_f = np.sqrt(np.average(np.sum(np.square(all_f), axis = 1))) + model_devi[:,4:7] = model_devi[:,4:7] / avg_f + np.savetxt(os.path.join(task_path, 'model_devi_avgf.out'), model_devi, fmt='%16.6e') + return model_devi + + def _select_by_model_devi_standard( modd_system_task: List[str], f_trust_lo : float, @@ -1331,6 +1349,7 @@ def _select_by_model_devi_standard( v_trust_hi : float, cluster_cutoff : float, model_devi_skip : int = 0, + model_devi_f_avg_relative : bool = False, detailed_report_make_fp : bool = True, ): fp_candidate = [] @@ -1345,7 +1364,7 @@ def _select_by_model_devi_standard( for tt in modd_system_task : with warnings.catch_warnings(): warnings.simplefilter("ignore") - all_conf = np.loadtxt(os.path.join(tt, 'model_devi.out')) + all_conf = _read_model_devi_file(tt, model_devi_f_avg_relative) for ii in range(all_conf.shape[0]) : if all_conf[ii][0] < model_devi_skip : continue @@ -1393,7 +1412,8 @@ def _select_by_model_devi_adaptive_trust_low( v_trust_hi : float, numb_candi_v : int, perc_candi_v : float, - model_devi_skip : int = 0 + model_devi_skip : int = 0, + model_devi_f_avg_relative : bool = False, ): """ modd_system_task model deviation tasks belonging to one system @@ -1424,6 +1444,7 @@ def _select_by_model_devi_adaptive_trust_low( with warnings.catch_warnings(): warnings.simplefilter("ignore") model_devi = np.loadtxt(os.path.join(tt, 'model_devi.out')) + model_devi = _read_model_devi_file(tt, model_devi_f_avg_relative) for ii in range(model_devi.shape[0]) : if model_devi[ii][0] < model_devi_skip : continue @@ -1512,6 +1533,7 @@ def _make_fp_vasp_inner (modd_path, fp_tasks = [] cluster_cutoff = jdata['cluster_cutoff'] if jdata.get('use_clusters', False) else None model_devi_adapt_trust_lo = jdata.get('model_devi_adapt_trust_lo', False) + model_devi_f_avg_relative = jdata.get('model_devi_f_avg_relative', False) # skip save *.out if detailed_report_make_fp is False, default is True detailed_report_make_fp = jdata.get("detailed_report_make_fp", True) # skip bad box criteria @@ -1532,7 +1554,9 @@ def _make_fp_vasp_inner (modd_path, v_trust_lo, v_trust_hi, cluster_cutoff, model_devi_skip, - detailed_report_make_fp = detailed_report_make_fp) + model_devi_f_avg_relative = model_devi_f_avg_relative, + detailed_report_make_fp = detailed_report_make_fp, + ) else: numb_candi_f = jdata.get('model_devi_numb_candi_f', 10) numb_candi_v = jdata.get('model_devi_numb_candi_v', 0) @@ -1543,7 +1567,9 @@ def _make_fp_vasp_inner (modd_path, modd_system_task, f_trust_hi, numb_candi_f, perc_candi_f, v_trust_hi, numb_candi_v, perc_candi_v, - model_devi_skip = model_devi_skip) + model_devi_skip = model_devi_skip, + model_devi_f_avg_relative = model_devi_f_avg_relative, + ) dlog.info("system {0:s} {1:9s} : f_trust_lo {2:6.3f} v_trust_lo {3:6.3f}".format(ss, 'adapted', f_trust_lo_ad, v_trust_lo_ad)) # print a report diff --git a/tests/generator/context.py b/tests/generator/context.py index c79920396..a943bc895 100644 --- a/tests/generator/context.py +++ b/tests/generator/context.py @@ -4,6 +4,7 @@ from dpgen.generator.run import * from dpgen.generator.lib.gaussian import detect_multiplicity from dpgen.generator.lib.ele_temp import NBandsEsti +from dpgen.generator.lib.lammps import get_dumped_forces param_file = 'param-mg-vasp.json' param_file_v1 = 'param-mg-vasp-v1.json' diff --git a/tests/generator/test_lammps.py b/tests/generator/test_lammps.py new file mode 100644 index 000000000..b1dcc55a4 --- /dev/null +++ b/tests/generator/test_lammps.py @@ -0,0 +1,38 @@ +import os,sys,json,glob,shutil,textwrap +import dpdata +import numpy as np +import unittest + +sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) +__package__ = 'generator' +from .context import get_dumped_forces + +class TestGetDumpForce(unittest.TestCase): + def setUp(self): + file_content = textwrap.dedent("""\ +ITEM: TIMESTEP +40 +ITEM: NUMBER OF ATOMS +2 +ITEM: BOX BOUNDS xy xz yz pp pp pp +-2.9180686220264818e-04 8.0855380329747089e+00 1.4011011277606830e-07 +-2.9198257591541018e-04 8.0855378881632269e+00 3.3202396460852749e-08 +-2.9180686326490957e-04 8.0855378891632768e+00 -1.7571268247505500e-07 +ITEM: ATOMS id type x y z fx fy fz +1 1 2.09532 8.19528 2.00538 -0.00569269 -0.0200373 -0.0342394 +2 1 -0.0727384 4.01773 4.05582 -0.0297083 0.0817184 0.0722508 +""") + with open('tmp.dump', 'w') as fp: + fp.write(file_content) + self.expected_f = [ -0.00569269, -0.0200373, -0.0342394, -0.0297083, 0.0817184, 0.0722508] + + def tearDown(self): + if os.path.isfile('tmp.dump'): + os.remove('tmp.dump') + + def test_read_dump(self): + ff = get_dumped_forces('tmp.dump') + self.assertEqual(ff.shape, (2, 3)) + ff = ff.reshape([-1]) + for ii in range(6): + self.assertAlmostEqual(ff[ii], self.expected_f[ii]) From a136b0f7202d5ddea9e3cbc593565131cc3527ca Mon Sep 17 00:00:00 2001 From: Ericwang6 Date: Thu, 12 Aug 2021 14:01:48 +0800 Subject: [PATCH 18/23] Bug fix for traj frequency in gromacs enegine --- dpgen/generator/run.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dpgen/generator/run.py b/dpgen/generator/run.py index 571ac290f..83fcf82ac 100644 --- a/dpgen/generator/run.py +++ b/dpgen/generator/run.py @@ -1256,7 +1256,7 @@ def run_model_devi (iter_index, command += "&& echo -e \"%s\n%s\n\" | %s trjconv -s %s -f %s.trr -o %s -pbc mol -ur compact -center" % (grp_name, grp_name, lmp_exec, ref_filename, deffnm, traj_filename) command += "&& if [ ! -d traj ]; then \n mkdir traj; fi\n" command += f"python -c \"import dpdata;system = dpdata.System('{traj_filename}', fmt='gromacs/gro'); [system.to_gromacs_gro('traj/%d.gromacstrj' % (i * {trj_freq}), frame_idx=i) for i in range(system.get_nframes())]; system.to_deepmd_npy('traj_deepmd')\"" - command += "&& dp model-devi -m ../graph.000.pb ../graph.001.pb ../graph.002.pb ../graph.003.pb -s traj_deepmd -o model_devi.out" + command += f"&& dp model-devi -m ../graph.000.pb ../graph.001.pb ../graph.002.pb ../graph.003.pb -s traj_deepmd -o model_devi.out -f {trj_freq}" commands = [command] forward_files = [mdp_filename, topol_filename, conf_filename, index_filename, ref_filename, "input.json", "job.json" ] From cbdb48695ae41c6dcbec2c468d53aa29cf25000c Mon Sep 17 00:00:00 2001 From: Ericwang6 Date: Thu, 12 Aug 2021 14:17:23 +0800 Subject: [PATCH 19/23] move dp1.x-gmx examples to deprecated and add dp2.x-gmx examples --- .../dp1.x-gromacs-gaussian/param.json | 0 .../run/dp2.x-gromacs-gaussian/machine.json | 69 ++++++++ .../run/dp2.x-gromacs-gaussian/param.json | 160 ++++++++++++++++++ 3 files changed, 229 insertions(+) rename examples/run/{ => deprecated}/dp1.x-gromacs-gaussian/param.json (100%) create mode 100644 examples/run/dp2.x-gromacs-gaussian/machine.json create mode 100644 examples/run/dp2.x-gromacs-gaussian/param.json diff --git a/examples/run/dp1.x-gromacs-gaussian/param.json b/examples/run/deprecated/dp1.x-gromacs-gaussian/param.json similarity index 100% rename from examples/run/dp1.x-gromacs-gaussian/param.json rename to examples/run/deprecated/dp1.x-gromacs-gaussian/param.json diff --git a/examples/run/dp2.x-gromacs-gaussian/machine.json b/examples/run/dp2.x-gromacs-gaussian/machine.json new file mode 100644 index 000000000..0f73b2277 --- /dev/null +++ b/examples/run/dp2.x-gromacs-gaussian/machine.json @@ -0,0 +1,69 @@ +{ + "deepmd_version" : "2.0", + "train": [ + { + "machine": { + "batch": "slurm", + "work_path": "/work/path" + }, + "resources": { + "numb_node": 1, + "numb_gpu": 1, + "partition": "all", + "time_limit": "120:0:0", + "task_per_node": 8, + "exclude_list": [], + "module_list": [], + "source_list": ["/path/to/dp-2.0.env"] + }, + "command": "dp" + } + ], + "model_devi": [ + { + "machine": { + "batch": "slurm", + "work_path": "/work/path" + }, + "resources": { + "numb_node": 1, + "numb_gpu": 1, + "partition": "all", + "time_limit": "120:0:0", + "task_per_node": 8, + "source_list": [ + "/path/to/gromacs-dp/env" + ], + "module_list": [], + "exclude_list": [], + "envs": { + "GMX_DEEPMD_INPUT_JSON": "input.json" + } + }, + "command": "gmx_mpi", + "group_size": 1 + } + ], + "fp": [ + { + "machine": { + "batch": "slurm", + "work_path": "/work/path" + }, + "resources": { + "numb_node": 1, + "numb_gpu": 0, + "time_limit": "120:0:0", + "task_per_node": 28, + "partition": "cpu", + "exclude_list": [], + "source_list": [ + "/path/to/gaussian/bashrc" + ], + "module_list": [] + }, + "command": "g16 < input", + "group_size": 20 + } + ] +} diff --git a/examples/run/dp2.x-gromacs-gaussian/param.json b/examples/run/dp2.x-gromacs-gaussian/param.json new file mode 100644 index 000000000..30557d307 --- /dev/null +++ b/examples/run/dp2.x-gromacs-gaussian/param.json @@ -0,0 +1,160 @@ +{ + "type_map": ["H", "C", "N", "O", "F", "S", "Cl"], + "mass_map": [2, 12, 14, 16, 17, 32, 35], + "init_data_prefix": "/path/to/init/data", + "init_data_sys": ["data.init"], + "init_multi_systems": true, + "sys_configs_prefix": "/path/to/model/devi/data", + "sys_configs": [ + ["model_devi/CHEMBL3402749_500"], + ["model_devi/CHEMBL3402741_400"], + ["model_devi/CHEMBL3402748_5300"], + ["model_devi/CHEMBL3402743_42"], + ["model_devi/CHEMBL3402761_1"], + ["model_devi/CHEMBL3402756_2.7"], + ["model_devi/CHEMBL3402750_400"], + ["model_devi/CHEMBL3402764_90"], + ["model_devi/CHEMBL3402758_10"], + ["model_devi/CHEMBL3402754_40"], + ["model_devi/CHEMBL3402747_3400"], + ["model_devi/CHEMBL3402762_1"], + ["model_devi/CHEMBL3402744_300"], + ["model_devi/CHEMBL3402752_30000"], + ["model_devi/CHEMBL3402742_23"], + ["model_devi/CHEMBL3402759_5.7"], + ["model_devi/CHEMBL3402745_200"], + ["model_devi/CHEMBL3402757_6.5"], + ["model_devi/CHEMBL3402755_4200"], + ["model_devi/CHEMBL3402751_2100"], + ["model_devi/CHEMBL3402753_200"], + ["model_devi/CHEMBL3402763_90"], + ["model_devi/CHEMBL3402765_11-charged-pKa-8.1"], + ["model_devi/CHEMBL3402760_1"] + ], + "sys_charges": [ + 0, + 1, + 0, + 1, + 0, + 1, + 0, + 1, + 1, + 0, + 0, + 1, + 0, + 0, + 1, + 1, + 0, + 1, + 0, + 0, + 0, + 1, + 1, + 1 + ], + "sys_format": "gromacs/gro", + "numb_models": 4, + "training_reuse_iter": 1, + "training_reuse_old_ratio": 0.3, + "training_reuse_start_lr": 0.0001, + "training_reuse_decay_steps": 500, + "training_reuse_numb_steps": 200000, + "training_reuse_start_pref_e": 1, + "training_reuse_start_pref_f": 100, + "train_param": "input.json", + "default_training_param": { + "model": { + "type_map": ["H", "C", "N", "O", "F", "S", "Cl"], + "descriptor": { + "type": "se_e2_a", + "sel": [40, 48, 48, 48, 48, 48, 48], + "rcut_smth": 0.50, + "rcut": 6.00, + "neuron": [25, 50, 100], + "resnet_dt": false, + "axis_neuron": 16, + "type_one_side": true + }, + "fitting_net": { + "neuron": [240, 240, 240], + "resnet_dt": true, + "type": "ener" + } + }, + "learning_rate": { + "type": "exp", + "start_lr": 0.001, + "decay_steps": 5000 + }, + "loss": { + "start_pref_e": 0.02, + "limit_pref_e": 8, + "start_pref_f": 1000, + "limit_pref_f": 1, + "start_pref_v": 0, + "limit_pref_v": 0 + }, + "training": { + "numb_steps": 1000000, + "disp_file": "lcurve.out", + "disp_freq": 1000, + "numb_test": 1, + "save_freq": 1000, + "save_ckpt": "model.ckpt" + } + }, + "model_devi_engine": "gromacs", + "gromacs_settings": { + "mdp_filename": "md.mdp", + "topol_filename": "processed.top", + "conf_filename": "npt.gro", + "index_filename": "index.raw", + "ref_filename": "em.tpr", + "model_devi_script": "model_devi.py", + "traj_filename": "deepmd_traj.gro", + "group_name": "Other" + }, + "model_devi_dt": 0.001, + "model_devi_f_trust_lo": 0.20, + "model_devi_f_trust_hi": 0.60, + "model_devi_e_trust_lo": 1e10, + "model_devi_e_trust_hi": 1e10, + "model_devi_clean_traj": false, + "model_devi_skip": 0, + "model_devi_nopbc": true, + "model_devi_activation_func": [ + ["tanh", "tanh"], + ["tanh", "tanh"], + ["tanh", "tanh"], + ["tanh", "tanh"] + ], + "model_devi_jobs": [ + { + "_idx": 0, + "ensemble": "nvt", + "nsteps": 100, + "press": [], + "sys_idx": [ 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23], + "temps": [], + "trj_freq": 1 + } + ], + "fp_style": "gaussian", + "shuffle_poscar": false, + "fp_task_max": 50, + "fp_task_min": 1, + "fp_pp_path": "./", + "fp_pp_files": [], + "fp_params": { + "keywords": "force m062x/6-31** nosymm", + "nproc": 28, + "multiplicity": 1 + } +} From 722944ed063fe683fa2fabb89d68cc40de70fcca Mon Sep 17 00:00:00 2001 From: fqgong Date: Thu, 12 Aug 2021 17:44:42 +0800 Subject: [PATCH 20/23] delete dump.0.xyz in back_forward file --- dpgen/generator/run.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dpgen/generator/run.py b/dpgen/generator/run.py index a00bc7c9a..a16a2f864 100644 --- a/dpgen/generator/run.py +++ b/dpgen/generator/run.py @@ -1205,7 +1205,7 @@ def run_model_devi (iter_index, if use_plm: forward_files += ['input.plumed'] # backward_files += ['output.plumed'] - backward_files += ['output.plumed','COLVAR','dump.0.xyz'] + backward_files += ['output.plumed','COLVAR'] if use_plm_path: forward_files += ['plmpath.pdb'] elif model_devi_engine == "gromacs": From f2397f3f2cd22cf1c87f44d3de3efbb6fce32e99 Mon Sep 17 00:00:00 2001 From: Ericwang6 Date: Fri, 13 Aug 2021 16:26:07 +0800 Subject: [PATCH 21/23] Change traning_reuse_stop_batch to 400000 & add lambda assert --- dpgen/generator/run.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/dpgen/generator/run.py b/dpgen/generator/run.py index b693e83d4..ac006615a 100644 --- a/dpgen/generator/run.py +++ b/dpgen/generator/run.py @@ -214,7 +214,7 @@ def make_train (iter_index, elif 'training_reuse_numb_steps' in jdata.keys(): training_reuse_stop_batch = jdata['training_reuse_numb_steps'] else: - training_reuse_stop_batch = 40000 + training_reuse_stop_batch = 400000 training_reuse_start_lr = jdata.get('training_reuse_start_lr', 1e-4) training_reuse_start_pref_e = jdata.get('training_reuse_start_pref_e', 0.1) @@ -1105,8 +1105,7 @@ def _make_model_devi_native_gromacs(iter_index, jdata, mdata, conf_systems): temps = cur_job.get("temps", [298.0]) for ll in lambdas: - if ll > 1: - raise RuntimeError("lambda is larger than 1.0") + assert (ll >= 0.0 and ll <= 1.0), "Lambda should be in [0,1]" if nsteps is None: raise RuntimeError("nsteps is None, you should set nsteps in model_devi_jobs!") From da84c6949f1e4d76742b6fa9feb355fcfa4e40c5 Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Tue, 24 Aug 2021 03:17:48 -0400 Subject: [PATCH 22/23] add CITATION.cff to show citation in GitHub (#517) --- CITATION.cff | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 CITATION.cff diff --git a/CITATION.cff b/CITATION.cff new file mode 100644 index 000000000..5aed6c23a --- /dev/null +++ b/CITATION.cff @@ -0,0 +1,25 @@ +preferred-citation: + type: article + authors: + - family-names: "Zhang" + given-names: "Yuzhi" + - family-names: "Wang" + given-names: "Haidi" + - family-names: "Chen" + given-names: "Weijie" + - family-names: "Zeng" + given-names: "Jinzhe" + - family-names: "Zhang" + given-names: "Linfeng" + - family-names: "Wang" + given-names: "Han" + - family-names: "E" + given-names: "Weinan" + doi: "10.1016/j.cpc.2020.107206" + journal: "Computer Physics Communications" + month: 8 + start: 107206 # First page number + end: 107206 # Last page number + title: "DP-GEN: A concurrent learning platform for the generation of reliable deep learning based potential energy models" + volume: 253 + year: 2020 From 9d4396d0735bcf65756d52ec054ff745bb96684e Mon Sep 17 00:00:00 2001 From: zhaohan <32747623+dingzhaohan@users.noreply.github.com> Date: Wed, 25 Aug 2021 11:35:00 +0800 Subject: [PATCH 23/23] modify decide_machine for older ALI-Dispatcher --- dpgen/remote/decide_machine.py | 1 + 1 file changed, 1 insertion(+) diff --git a/dpgen/remote/decide_machine.py b/dpgen/remote/decide_machine.py index 5996b45b2..2688d3ca3 100644 --- a/dpgen/remote/decide_machine.py +++ b/dpgen/remote/decide_machine.py @@ -40,6 +40,7 @@ def convert_mdata(mdata, task_types=["train", "model_devi", "fp"]): if "comments" not in key: mdata[task_type + "_" + key] = item group_size = mdata[task_type][0]["resources"].get("group_size", 1) + if group_size == 1: group_size = mdata[task_type][0].get("group_size", 1) mdata[task_type + "_" + "group_size"] = group_size return mdata