From a7880b7826c33c1e0339fbe2dec550f5a71a89eb Mon Sep 17 00:00:00 2001
From: felix5572 <yuanfengbo888@pku.edu.cn>
Date: Mon, 28 Oct 2019 03:50:37 +0000
Subject: [PATCH 001/109] modify AWS.py

---
 dpgen/dispatcher/AWS.py | 33 ++++++++++++++++-----------------
 1 file changed, 16 insertions(+), 17 deletions(-)

diff --git a/dpgen/dispatcher/AWS.py b/dpgen/dispatcher/AWS.py
index ddb91dad6..9bbd3ca85 100644
--- a/dpgen/dispatcher/AWS.py
+++ b/dpgen/dispatcher/AWS.py
@@ -5,15 +5,14 @@
 from dpgen.dispatcher.JobStatus import JobStatus
 from dpgen import dlog
 
+try:
+    import boto3
+except ModuleNotFoundError:
+    pass
+else:
+    batch_client = boto3.client('batch')
 
 class AWS(Batch):
-    try:
-        import boto3
-    except ModuleNotFoundError:
-        pass
-    else:
-        batch_client = boto3.client('batch')
-    _query_max_results = 1000
     _query_time_interval = 30
     _job_id_map_status = {}
     _jobQueue = ""
@@ -44,19 +43,19 @@ def AWS_check_status(cls, job_id=""):
         """
         query_dict ={}
         if datetime.now().timestamp() > cls._query_next_allow_time:
-            cls.batch_client = boto3.client('batch')
             cls._query_next_allow_time=datetime.now().timestamp()+cls._query_time_interval
             for status in ['SUBMITTED', 'PENDING', 'RUNNABLE', 'STARTING', 'RUNNING','SUCCEEDED', 'FAILED']:
-                status_response = cls.batch_client.list_jobs(jobQueue=cls._jobQueue, jobStatus=status, maxResults=cls._query_max_results)
-                status_list=status_response.get('jobSummaryList', [])
-                for job_dict in status_list:
-                    cls._job_id_map_status.update({job_dict['jobId']: cls.map_aws_status_to_dpgen_status(job_dict['status'])})
-            # for job in cls._job_id_map_status:
-            #     cls._job_id_map_status[job]=query_dict.get(job, JobStatus.unknown)
+                nextToken = ''
+                while nextToken is not None:
+                    status_response = batch_client.list_jobs(jobQueue=cls._jobQueue, jobStatus=status, maxResults=100, nextToken=nextToken)
+                    status_list=status_response.get('jobSummaryList')
+                    nextToken = status_response.get('nextToken', None)
+                    for job_dict in status_list:
+                        cls._job_id_map_status.update({job_dict['jobId']: cls.map_aws_status_to_dpgen_status(job_dict['status'])})
             dlog.debug('20000:_map: %s' %(cls._job_id_map_status))
         dlog.debug('62000:job_id:%s, _query: %s, _map: %s' %(job_id, query_dict, cls._job_id_map_status))
         if job_id:
-            return cls._job_id_map_status.get(job_id, JobStatus.unknown)
+            return cls._job_id_map_status.get(job_id)
                     
         return cls._job_id_map_status
 
@@ -67,7 +66,7 @@ def job_id(self):
         except AttributeError:
             if self.context.check_file_exists(self.job_id_name):
                 self._job_id = self.context.read_file(self.job_id_name)
-                response_list = self.__class__.batch_client.describe_jobs(jobs=[self._job_id]).get('jobs')
+                response_list = batch_client.describe_jobs(jobs=[self._job_id]).get('jobs')
                 try:
                     response = response_list[0]
                     jobQueue = response['jobQueue']
@@ -135,7 +134,7 @@ def do_submit(self,
         """
         jobName = os.path.join(self.context.remote_root,job_dirs.pop())[1:].replace('/','-').replace('.','_')
         jobName += ("_" + str(self.context.job_uuid))
-        response = self.__class__.batch_client.submit_job(jobName=jobName, 
+        response = batch_client.submit_job(jobName=jobName, 
                 jobQueue=res['jobQueue'], 
                 jobDefinition=res['jobDefinition'],
                 parameters={'task_command':script_str},

From f31cf7b974c95877d3ca1edaf3f374d1cd094ad8 Mon Sep 17 00:00:00 2001
From: robinzhuang <38876805+robinzyb@users.noreply.github.com>
Date: Mon, 4 Nov 2019 17:41:11 +0100
Subject: [PATCH 002/109] make choice for vdw

---
 README.md                                     |  17 +++
 dpgen/generator/lib/cp2k.py                   |  29 +++--
 tests/generator/context.py                    |   1 +
 .../param-pyridine-cp2k-choose-vdw.json       | 115 ++++++++++++++++++
 tests/generator/param-pyridine-cp2k.json      |   2 +
 tests/generator/test_make_fp.py               | 107 +++++++++++++++-
 6 files changed, 257 insertions(+), 14 deletions(-)
 create mode 100644 tests/generator/param-pyridine-cp2k-choose-vdw.json

diff --git a/README.md b/README.md
index 3ebd81725..a1d708b60 100644
--- a/README.md
+++ b/README.md
@@ -506,6 +506,23 @@ The bold notation of key (such aas **type_map**) means that it's a necessary key
 |**fp_params["kspacing"]** | Float| 0.4 | Sample factor in Brillouin zones.
 |**fp_params["mixingweight"]** | Float| 0.05 | Proportion a of output Density Matrix to be used for the input Density Matrix of next SCF cycle (linear mixing).
 |**fp_params["NumberPulay"]** | Integer| 5 | Controls the Pulay convergence accelerator.
+| *fp_style == cp2k*
+| **fp_params** | Dict | | Parameters for cp2k calculation. find detail in manual.cp2k.org. if it is not remarked with "optional", the parameter must be set. we assume that you have basic knowledge for cp2k input.
+|**fp_params["cutoff"]**| String | 400 |
+|**fp_params["rel_cutoff"]**| String | 50 |
+|**fp_params["functional"]**| String | PBE |
+|**fp_params["max_scf"]**| String | 50 |
+|**fp_params["pair_potential_type"]**| String | DFTD3 | This is optional.
+|**fp_params["pair_potential_path"]**| String | "./cp2k_basis_pp_file/dftd3.dat" | must be set if you set the "pair_potential_type"
+|**fp_params["pair_ref_functional"]**| String | PBE | must be set if you set the "pair_potential_type"
+|**fp_params["basis_path"]**| String | "./cp2k_basis_pp_file/BASIS_MOLOPT" |
+|**fp_params["pp_path"]**| String | "./cp2k_basis_pp_file/GTH_POTENTIALS" |
+|**fp_params["element_list"]**| List | ["H","C","N"] |
+|**fp_params["basis_list"]**| List | ["DZVP_MOLOPT_GTH","DZVP_MOLOPT_GTH","DZVP_MOLOPT_GTH"] | Must be same order with element_list
+|**fp_params["pp_list"]**| List | ["GTH-PBE-q1","GTH-PBE-q4","GTH-PBE-q5"] | Must be same order with element_list
+
+
+
 
 ## Test: Auto-test for Deep Generator
 At this step, we assume that you have prepared some graph files like `graph.*.pb` and the particular pseudopotential `POTCAR`.
diff --git a/dpgen/generator/lib/cp2k.py b/dpgen/generator/lib/cp2k.py
index 93476d864..ec3edf963 100644
--- a/dpgen/generator/lib/cp2k.py
+++ b/dpgen/generator/lib/cp2k.py
@@ -64,29 +64,34 @@ def make_cp2k_input(sys_data, fp_params):
     ot_section = section_add_keyword_and_value(ot_section, 'MINIMIZER', 'DIIS')
     ot_section = section_add_keyword_and_value(ot_section, 'PRECONDITIONER', 'FULL_SINGLE_INVERSE')
 
+#    outer_scf_section = make_section('OUTER_SCF')
+#    outer_scf_section = section_add_keyword_and_value(outer_scf_section, 'MAX_SCF', None)
+#    outer_scf_section = section_add_keyword_and_value(outer_scf_section, 'EPS_SCF', None)
+
     scf_section = make_section('SCF')
     scf_section = section_add_keyword_and_value(scf_section, 'SCF_GUESS', 'ATOMIC')
     scf_section = section_add_keyword_and_value(scf_section, 'EPS_SCF', '1.0E-6')
-    scf_section = section_add_keyword_and_value(scf_section, 'MAX_SCF', '50')
+    scf_section = section_add_keyword_and_value(scf_section, 'MAX_SCF', fp_params['max_scf'])
     scf_section = section_add_subsection(scf_section, ot_section)
-
+#    scf_section = section_add_subsection(scf_section, outer_scf_section)
 
     xc_functional_section = make_section('XC_FUNCTIONAL', fp_params['functional'])
+    if 'pair_potential_type' in fp_params :
+        pair_potential_section = make_section('PAIR_POTENTIAL')
+        pair_potential_section = section_add_keyword_and_value(pair_potential_section, 'TYPE', fp_params['pair_potential_type'])
+        pair_potential_section = section_add_keyword_and_value(pair_potential_section, 'PARAMETER_FILE_NAME', fp_params['pair_potential_path'])
+        pair_potential_section = section_add_keyword_and_value(pair_potential_section, 'REFERENCE_FUNCTIONAL',fp_params['pair_ref_functional'])
 
-    pair_potential_section = make_section('PAIR_POTENTIAL')
-    pair_potential_section = section_add_keyword_and_value(pair_potential_section, 'TYPE', 'DFTD3')
-    pair_potential_section = section_add_keyword_and_value(pair_potential_section, 'PARAMETER_FILE_NAME', fp_params['pair_potential_path'])
-    pair_potential_section = section_add_keyword_and_value(pair_potential_section, 'REFERENCE_FUNCTIONAL', fp_params['pair_ref_functional'])
-
-
-    vdw_potential_section = make_section('VDW_POTENTIAL')
-    vdw_potential_section = section_add_keyword_and_value(vdw_potential_section, 'DISPERSION_FUNCTIONAL', 'PAIR_POTENTIAL')
-    vdw_potential_section = section_add_subsection(vdw_potential_section, pair_potential_section)
+    if 'pair_potential_type' in fp_params :
+        vdw_potential_section = make_section('VDW_POTENTIAL')
+        vdw_potential_section = section_add_keyword_and_value(vdw_potential_section, 'DISPERSION_FUNCTIONAL', 'PAIR_POTENTIAL')
+        vdw_potential_section = section_add_subsection(vdw_potential_section, pair_potential_section)
 
 
     xc_section = make_section('XC')
     xc_section = section_add_subsection(xc_section, xc_functional_section)
-    xc_section = section_add_subsection(xc_section, vdw_potential_section)
+    if 'pair_potential_type' in fp_params :
+        xc_section = section_add_subsection(xc_section, vdw_potential_section)
 
 
     dft_section = make_section('DFT')
diff --git a/tests/generator/context.py b/tests/generator/context.py
index 75baecb0d..0668e7fe2 100644
--- a/tests/generator/context.py
+++ b/tests/generator/context.py
@@ -14,6 +14,7 @@
 param_gaussian_file = 'param-pyridine-gaussian.json'
 param_siesta_file = 'param-pyridine-siesta.json'
 param_cp2k_file = 'param-pyridine-cp2k.json'
+param_cp2k_file_v1 = 'param-pyridine-cp2k-choose-vdw.json'
 machine_file = 'machine-local.json'
 machine_file_v1 = 'machine-local-v1.json'
 param_diy_file = 'param-mg-vasp-diy.json'
diff --git a/tests/generator/param-pyridine-cp2k-choose-vdw.json b/tests/generator/param-pyridine-cp2k-choose-vdw.json
new file mode 100644
index 000000000..824613714
--- /dev/null
+++ b/tests/generator/param-pyridine-cp2k-choose-vdw.json
@@ -0,0 +1,115 @@
+{    
+    "type_map":		["C", "H", "N"],
+    "mass_map":		[16, 2, 14],
+    
+    "init_data_prefix":	"/home/linfengz/SCR/wanghan/deepgen.pyridine/init",
+    "init_data_sys":	["Pyridine-I",
+			 "Pyridine-II"
+			],
+    "init_batch_size":	[1, 1],
+    "sys_configs":	[
+	["/home/linfengz/SCR/wanghan/data/pyridine/pyI.POSCAR.01x01x01/01.scale_pert/sys-0080-0080-0016/scale-1.000/00009?/POSCAR"],
+	["/home/linfengz/SCR/wanghan/data/pyridine/pyI.POSCAR.01x01x01/01.scale_pert/sys-0080-0080-0016/scale-1.000/0000[7-8]?/POSCAR"],
+	["/home/linfengz/SCR/wanghan/data/pyridine/pyI.POSCAR.01x01x01/01.scale_pert/sys-0080-0080-0016/scale-1.000/0000[5-6]?/POSCAR"],
+	["/home/linfengz/SCR/wanghan/data/pyridine/pyI.POSCAR.01x01x01/01.scale_pert/sys-0080-0080-0016/scale-1.000/0000[0-4]?/POSCAR"],
+	["/home/linfengz/SCR/wanghan/data/pyridine/pyII.POSCAR.01x01x01/01.scale_pert/sys-0080-0080-0016/scale-1.000/00009?/POSCAR"],
+	["/home/linfengz/SCR/wanghan/data/pyridine/pyII.POSCAR.01x01x01/01.scale_pert/sys-0080-0080-0016/scale-1.000/0000[7-8]?/POSCAR"],
+	["/home/linfengz/SCR/wanghan/data/pyridine/pyII.POSCAR.01x01x01/01.scale_pert/sys-0080-0080-0016/scale-1.000/0000[5-6]?/POSCAR"],
+	["/home/linfengz/SCR/wanghan/data/pyridine/pyII.POSCAR.01x01x01/01.scale_pert/sys-0080-0080-0016/scale-1.000/0000[0-4]?/POSCAR"]
+    ],
+    "_comment":		"0  1  2  3",
+    "_comment":		"4  5  6  7",
+    "sys_batch_size":	[1, 1, 1, 1,
+			 1, 1, 1, 1
+			],
+
+    "_comment":		" 00.train ",
+    "numb_models":	4,
+    "train_param":	"input.json",
+    "default_training_param" : {
+	"_comment": " model parameters",
+	"use_smooth":		true,
+	"sel_a":		[81, 81, 20],
+	"rcut_smth":		0.50,
+	"rcut":			6.50,
+	"filter_neuron":	[25, 50, 100],
+	"filter_resnet_dt":	false,
+	"n_axis_neuron":	12,
+	"n_neuron":		[240, 240, 240],
+	"resnet_dt":		true,
+	"coord_norm":		true,
+	"type_fitting_net":	false,
+
+	"_comment": " traing controls",
+	"systems":		[],
+	"set_prefix":		"set",    
+	"stop_batch":		400000,
+	"batch_size":		1,
+	"start_lr":		0.002,
+	"decay_steps":		2000,
+	"decay_rate":		0.95,
+	"seed":			0,
+
+	"start_pref_e":		0.02,
+	"limit_pref_e":		2,
+	"start_pref_f":		1000,
+	"limit_pref_f":		1,
+	"start_pref_v":		0.0,
+	"limit_pref_v":		0.0,
+
+	"_comment": " display and restart",
+	"_comment": " frequencies counted in batch",
+	"disp_file":		"lcurve.out",
+	"disp_freq":		2000,
+	"numb_test":		10,
+	"save_freq":		20000,
+	"save_ckpt":		"model.ckpt",
+	"load_ckpt":		"model.ckpt",
+	"disp_training":	true,
+	"time_training":	true,
+	"profiling":		false,
+	"profiling_file":	"timeline.json",
+
+	"_comment":		"that's all"
+    },
+
+    "_comment":		" 01.model_devi ",
+    "_comment": "model_devi_skip: the first x of the recorded frames",
+    "model_devi_dt":		0.001,
+    "model_devi_skip":		0,
+    "model_devi_f_trust_lo":	0.050,
+    "model_devi_f_trust_hi":	0.150,
+    "model_devi_e_trust_lo":	1e10,
+    "model_devi_e_trust_hi":	1e10,
+    "model_devi_clean_traj":	false,
+    "model_devi_jobs":	[
+	{"sys_idx": [0,4], "temps": [  50], "press": [1e0,1e1,1e2,1e3,1e4,2e4,4e4], "trj_freq": 10, "nsteps": 1000, "ensemble": "npt", "_idx": "00"},
+	{"sys_idx": [1,5], "temps": [  50], "press": [1e0,1e1,1e2,1e3,1e4,2e4,4e4], "trj_freq": 10, "nsteps": 1000, "ensemble": "npt", "_idx": "01"},
+	{"sys_idx": [0,4], "temps": [  50], "press": [1e0,1e1,1e2,1e3,1e4,2e4,4e4], "trj_freq": 10, "nsteps": 1000, "ensemble": "npt", "_idx": "02"},
+	{"sys_idx": [1,5], "temps": [  50], "press": [1e0,1e1,1e2,1e3,1e4,2e4,4e4], "trj_freq": 10, "nsteps": 1000, "ensemble": "npt", "_idx": "03"},
+	{"sys_idx": [0,4], "temps": [ 100], "press": [1e0,1e1,1e2,1e3,1e4,2e4,4e4], "trj_freq": 10, "nsteps": 1000, "ensemble": "npt", "_idx": "04"},
+	{"sys_idx": [1,5], "temps": [ 100], "press": [1e0,1e1,1e2,1e3,1e4,2e4,4e4], "trj_freq": 10, "nsteps": 1000, "ensemble": "npt", "_idx": "05"},
+	{"sys_idx": [0,4], "temps": [ 100], "press": [1e0,1e1,1e2,1e3,1e4,2e4,4e4], "trj_freq": 10, "nsteps": 1000, "ensemble": "npt", "_idx": "06"},
+	{"sys_idx": [1,5], "temps": [ 100], "press": [1e0,1e1,1e2,1e3,1e4,2e4,4e4], "trj_freq": 10, "nsteps": 1000, "ensemble": "npt", "_idx": "07"}
+    ],
+
+    "_comment":		" 02.fp ",
+    "fp_style":		"cp2k",
+    "shuffle_poscar":	false,
+    "fp_task_max":	100,
+    "fp_task_min":	10,
+    "fp_pp_path":	".",
+    "fp_pp_files":	[],
+    "user_fp_params":	{
+	"cutoff": "400",
+	"rel_cutoff": "50",
+    "functional": "PBE",
+    "max_scf": "320",
+    "basis_path": "./cp2k_basis_pp_file/BASIS_MOLOPT",
+    "pp_path": "./cp2k_basis_pp_file/GTH_POTENTIALS",
+    "element_list": ["H", "C", "N"],
+    "basis_list": ["DZVP-MOLOPT-GTH", "DZVP-MOLOPT-GTH", "DZVP-MOLOPT-GTH"],
+    "pp_list": ["GTH-PBE-q1", "GTH-PBE-q4", "GTH-PBE-q5"]
+    },
+    "_comment":		" that's all "
+}
diff --git a/tests/generator/param-pyridine-cp2k.json b/tests/generator/param-pyridine-cp2k.json
index 09ccc4d3a..77796bb17 100644
--- a/tests/generator/param-pyridine-cp2k.json
+++ b/tests/generator/param-pyridine-cp2k.json
@@ -104,6 +104,8 @@
 	"cutoff": "400",
 	"rel_cutoff": "50",
     "functional": "PBE",
+    "max_scf": "50",
+    "pair_potential_type": "DFTD3",
     "pair_potential_path": "./cp2k_basis_pp_file/dftd3.dat",
     "pair_ref_functional": "PBE",
     "basis_path": "./cp2k_basis_pp_file/BASIS_MOLOPT",
diff --git a/tests/generator/test_make_fp.py b/tests/generator/test_make_fp.py
index 20fdc51dc..963002974 100644
--- a/tests/generator/test_make_fp.py
+++ b/tests/generator/test_make_fp.py
@@ -15,6 +15,7 @@
 from .context import param_siesta_file
 from .context import param_gaussian_file
 from .context import param_cp2k_file
+from .context import param_cp2k_file_v1
 from .context import machine_file
 from .context import param_diy_file
 from .context import make_kspacing_kpoints
@@ -195,6 +196,67 @@
 &END FORCE_EVAL\n"
 
 
+cp2k_input_ref_v1="\
+&GLOBAL\n\
+PROJECT DPGEN\n\
+&END GLOBAL\n\
+&FORCE_EVAL\n\
+METHOD QS\n\
+STRESS_TENSOR ANALYTICAL\n\
+&DFT\n\
+BASIS_SET_FILE_NAME ./cp2k_basis_pp_file/BASIS_MOLOPT\n\
+POTENTIAL_FILE_NAME ./cp2k_basis_pp_file/GTH_POTENTIALS\n\
+CHARGE 0\n\
+UKS F\n\
+MULTIPLICITY 1\n\
+&MGRID\n\
+CUTOFF 400\n\
+REL_CUTOFF 50\n\
+NGRIDS 4\n\
+&END MGRID\n\
+&QS\n\
+EPS_DEFAULT 1.0E-12\n\
+&END QS\n\
+&SCF\n\
+SCF_GUESS ATOMIC\n\
+EPS_SCF 1.0E-6\n\
+MAX_SCF 320\n\
+&OT\n\
+MINIMIZER DIIS\n\
+PRECONDITIONER FULL_SINGLE_INVERSE\n\
+&END OT\n\
+&END SCF\n\
+&XC\n\
+&XC_FUNCTIONAL PBE\n\
+&END XC_FUNCTIONAL\n\
+&END XC\n\
+&END DFT\n\
+&SUBSYS\n\
+&CELL\n\
+&END CELL\n\
+&COORD\n\
+@include coord.xyz\n\
+&END COORD\n\
+&KIND H\n\
+BASIS_SET DZVP-MOLOPT-GTH\n\
+POTENTIAL GTH-PBE-q1\n\
+&END KIND\n\
+&KIND C\n\
+BASIS_SET DZVP-MOLOPT-GTH\n\
+POTENTIAL GTH-PBE-q4\n\
+&END KIND\n\
+&KIND N\n\
+BASIS_SET DZVP-MOLOPT-GTH\n\
+POTENTIAL GTH-PBE-q5\n\
+&END KIND\n\
+&END SUBSYS\n\
+&PRINT\n\
+&FORCES ON\n\
+&END FORCES\n\
+&END PRINT\n\
+&END FORCE_EVAL\n"
+
+
 def _box2lmpbox(orig, box) :
     lohi = np.zeros([3,2])
     for dd in range(3) :
@@ -384,7 +446,7 @@ def _check_incar(testCase, idx):
     fp_path = os.path.join('iter.%06d' % idx, '02.fp')
     tasks = glob.glob(os.path.join(fp_path, 'task.*'))
     cwd = os.getcwd()
-    for ii in tasks :    
+    for ii in tasks :
         os.chdir(ii)
         with open('INCAR') as fp:
             incar = fp.read()
@@ -395,7 +457,7 @@ def _check_incar_ele_temp(testCase, idx, ele_temp):
     fp_path = os.path.join('iter.%06d' % idx, '02.fp')
     tasks = glob.glob(os.path.join(fp_path, 'task.*'))
     cwd = os.getcwd()
-    for ii in tasks :            
+    for ii in tasks :
         os.chdir(ii)
         bname = os.path.basename(ii)
         sidx = int(bname.split('.')[1])
@@ -472,6 +534,22 @@ def _check_cp2k_input_head(testCase, idx) :
         lines_check = lines[:cell_start_idx+1] + lines[cell_end_idx:]
         testCase.assertEqual(('\n'.join(lines_check)).strip(), cp2k_input_ref.strip())
 
+def _check_cp2k_input_head_v1(testCase, idx) :
+    fp_path = os.path.join('iter.%06d' % idx, '02.fp')
+    tasks = glob.glob(os.path.join(fp_path, 'task.*'))
+    for ii in tasks :
+        ifile = os.path.join(ii, 'input.inp')
+        testCase.assertTrue(os.path.isfile(ifile))
+        with open(ifile) as fp:
+            lines = fp.read().split('\n')
+        for idx, jj in enumerate(lines) :
+            if '&CELL' in jj :
+                cell_start_idx = idx
+            if '&END CELL' in jj :
+                cell_end_idx = idx
+        lines_check = lines[:cell_start_idx+1] + lines[cell_end_idx:]
+        testCase.assertEqual(('\n'.join(lines_check)).strip(), cp2k_input_ref_v1.strip())
+
 
 class TestMakeFPPwscf(unittest.TestCase):
     def test_make_fp_pwscf(self):
@@ -781,6 +859,31 @@ def test_make_fp_cp2k(self):
         _check_cp2k_input_head(self, 0)
         _check_potcar(self, 0, jdata['fp_pp_path'], jdata['fp_pp_files'])
         shutil.rmtree('iter.000000')
+    def test_make_fp_cp2k_choose_vdw(self):
+        if os.path.isdir('iter.000000') :
+            shutil.rmtree('iter.000000')
+        with open (param_cp2k_file_v1, 'r') as fp :
+            jdata = json.load (fp)
+        with open (machine_file, 'r') as fp:
+            mdata = json.load (fp)
+        md_descript = []
+        nsys = 2
+        nmd = 3
+        n_frame = 10
+        for ii in range(nsys) :
+            tmp = []
+            for jj in range(nmd) :
+                tmp.append(np.arange(0, 0.29, 0.29/10))
+            md_descript.append(tmp)
+        atom_types = [0, 1, 2, 2, 0, 1]
+        type_map = jdata['type_map']
+        _make_fake_md(0, md_descript, atom_types, type_map)
+        make_fp(0, jdata, {})
+        _check_sel(self, 0, jdata['fp_task_max'], jdata['model_devi_f_trust_lo'], jdata['model_devi_f_trust_hi'])
+        _check_poscars(self, 0, jdata['fp_task_max'], jdata['type_map'])
+        _check_cp2k_input_head_v1(self, 0)
+        _check_potcar(self, 0, jdata['fp_pp_path'], jdata['fp_pp_files'])
+        shutil.rmtree('iter.000000')
 
 
 if __name__ == '__main__':

From 4502ba27870d242b78bee0f4def33adc3a788a11 Mon Sep 17 00:00:00 2001
From: felix5572 <yuanfengbo888@pku.edu.cn>
Date: Tue, 5 Nov 2019 09:04:19 +0000
Subject: [PATCH 003/109] auto gen

---
 dpgen/main.py                 |  8 ++++++++
 dpgen/tools/auto_gen_param.py | 14 ++++++++++++--
 2 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/dpgen/main.py b/dpgen/main.py
index c3a37260f..5d2c165aa 100644
--- a/dpgen/main.py
+++ b/dpgen/main.py
@@ -12,6 +12,7 @@
 from dpgen.auto_test.run import gen_test
 from dpgen.database.run import db_run
 from dpgen.tools.run_report import run_report
+from dpgen.tools.auto_gen_param import auto_gen_param
 from dpgen import info, __version__, __date__
 
 
@@ -54,6 +55,13 @@ def main():
     parser_init_bulk.add_argument('MACHINE', type=str,default=None,nargs="?",
                         help="machine file, json/yaml format")
     parser_init_bulk.set_defaults(func=gen_init_bulk)
+
+    parser_auto_gen_param = subparsers.add_parser(
+        "auto_gen_param", help="auto gen param.json")
+    # parser_auto_gen_param.add_argument('meltpoint', type=float, help="melt point")
+    parser_auto_gen_param.add_argument('PARAM', type=str, 
+                        help="parameter file, json/yaml format")
+    parser_auto_gen_param.set_defaults(func=auto_gen_param)
     # parser_init.add_argument("-p",'--parameter', type=str, dest='param',
     #                     help="parameter file, json/yaml format")
     # parser_init.add_argument("-s","--stage", type=int, dest='stage',
diff --git a/dpgen/tools/auto_gen_param.py b/dpgen/tools/auto_gen_param.py
index 75faa7337..cd6252d3d 100755
--- a/dpgen/tools/auto_gen_param.py
+++ b/dpgen/tools/auto_gen_param.py
@@ -224,7 +224,7 @@ def get_init_data_sys(scan_dir='./', init_file_name='type.raw'):
 
 
 def get_basic_param_json(melt_point,
-    out_param_filename='param.json',
+    out_param_filename='param_basic.json',
     scan_dir="./", 
     file_name='POSCAR',
     init_file_name='type.raw',
@@ -273,6 +273,7 @@ def get_basic_param_json(melt_point,
     with open(out_param_filename, 'w') as p:
         json.dump(param_dict, p, indent=4)
 
+    return param_dict 
 def _main():
     parser = argparse.ArgumentParser(description='Collect data from inputs and generate basic param.json')
     parser.add_argument("melt_point", type=float, help="melt_point")
@@ -282,5 +283,14 @@ def _main():
   
 if __name__=='__main__':
     _main()
-    
+
+def auto_gen_param(args):
+    if args.PARAM:
+        with open(args.PARAM) as p:
+            j = json.load(p)
+        melt_point = j['melt_point']
+        print('param_basic.json', get_basic_param_json(melt_point=melt_point))
+    else:
+        raise RuntimeError('must provide melt point or PARAM')
+
 #%%

From a4893008c31f37e437713ee5c2da98736ece9619 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Tue, 5 Nov 2019 18:33:55 -0500
Subject: [PATCH 004/109] add node_cpu key document

---
 README.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 3ebd81725..8727fb824 100644
--- a/README.md
+++ b/README.md
@@ -741,7 +741,8 @@ The following table gives explicit descriptions on keys in param.json.
 | # Followings are keys in resources
 | numb_node | Integer | 1 | Node count required for the job
 | task_per_node | Integer | 4 | Number of CPU cores required
-| `numb_gpu` | Integer | 4 | Number of GPUs required
+| numb_gpu | Integer | 4 | Number of GPUs required
+| node_cpu | Integer | 4 | Only for LSF. The number of CPU cores on each node that should be allocated to the job.
 | source_list | List of string | "....../vasp.env" | Environment needed for certain job. For example, if "env" is in the list, 'source env' will be written in the script.
 | module_list | List of string | [ "Intel/2018", "Anaconda3"] | For example, If "Intel/2018" is in the list, "module load Intel/2018" will be written in the script.
 | partition | String  | "AdminGPU" | Partition / queue in which to run the job. |

From a93fa30520287223f355e563aba6124be0964b68 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Wed, 6 Nov 2019 17:57:55 -0500
Subject: [PATCH 005/109] fix allow_failure in the README

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 3ebd81725..0d9efc88e 100644
--- a/README.md
+++ b/README.md
@@ -749,10 +749,10 @@ The following table gives explicit descriptions on keys in param.json.
 mem_limit | Interger | 16 | Maximal memory permitted to apply for the job.
 | with_mpi | Boolean | true | Deciding whether to use mpi for calculation. If it's true and machine type is Slurm, "srun" will be prefixed to `command` in the script.
 | qos | "string"| "bigdata" | Deciding priority, dependent on particular settings of your HPC.
+| allow_failure | Boolean | false | Allow the command to return a non-zero exit code.
 | # End of resources
 | command | String | "lmp_serial" | Executable path of software, such as `lmp_serial`, `lmp_mpi` and `vasp_gpu`, `vasp_std`, etc.
 | group_size | Integer | 5 | DP-GEN will put these jobs together in one submitting script.
-| allow_failure | Boolean | false | Allow the command to return a non-zero exit code.
 
 ## Troubleshooting
 1. The most common problem is whether two settings correspond with each other, including:

From c4d4f6528eb88dfebbd51d3b9b2a18d6347bdd01 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Wed, 6 Nov 2019 18:05:43 -0500
Subject: [PATCH 006/109] add an machine file example for gaussian

---
 examples/machine/machine-pbs-gaussian.json | 79 ++++++++++++++++++++++
 1 file changed, 79 insertions(+)
 create mode 100644 examples/machine/machine-pbs-gaussian.json

diff --git a/examples/machine/machine-pbs-gaussian.json b/examples/machine/machine-pbs-gaussian.json
new file mode 100644
index 000000000..c08363148
--- /dev/null
+++ b/examples/machine/machine-pbs-gaussian.json
@@ -0,0 +1,79 @@
+{
+    "_comment":		"training on localhost ",
+    "python_path":	"/gpfs/home/tzhu/anaconda3/envs/python3.6/bin/python",
+    "train_machine":	{
+      "machine_type":	"lsf",
+      "hostname" :	"59.78.197.77",
+      "port" :	22,
+      "username":	"tzhu",
+      "work_path" :	"/gpfs/home/tzhu/jzzeng/dpgen_workdir",
+      "_comment" :	"that's all"
+        },	
+        "train_resources":	{
+      "source_list":	[ "activate deepmd" ],
+      "envs": {
+        "KMP_BLOCKTIME": 0,
+        "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0"
+      },
+      "numb_gpu": 1,
+      "numb_node": 1,
+      "node_cpu": 0,
+      "partition": "newgpu",
+      "job_name": "dpgen_jzzeng",
+      "with_mpi":	false,
+      "time_limit": false,
+      "_comment":	"that's all"
+    },    
+
+
+    "_comment":		"model_devi on localhost ",
+    "lmp_command":	"/gpfs/home/tzhu/lammps-stable_5Jun2019/src/lmp_intel_cpu_intelmpi -pk intel 0 omp 2",
+    "model_devi_group_size": 1,
+    "model_devi_machine":	{
+      "machine_type":	"lsf",
+      "hostname" :	"59.78.197.77",
+      "port" :	22,
+      "username":	"tzhu",
+      "work_path" :	"/gpfs/home/tzhu/jzzeng/dpgen_workdir",
+      "_comment" :	"that's all"
+    },	
+    "model_devi_resources":	{
+      "envs": {
+        "KMP_BLOCKTIME": 0
+      },
+      "source_list":	[ "activate deepmd" ],
+      "numb_gpu": 1,
+      "numb_node": 1,
+      "node_cpu": 0,
+      "time_limit": false,
+      "partition": "newgpu",
+      "job_name": "dpgen_jzzeng",
+      "with_mpi":	true,
+      "task_per_node": 1,
+      "_comment":	"that's all"
+    },    
+
+    "_comment":		"fp on lsf //localhost ",
+    "fp_command":	"/public/home/tzhu/g16/g16 < input",
+    "fp_group_size":	1,
+    "fp_machine":	{
+      "machine_type":	"pbs",
+      "hostname" :	"59.78.189.132",
+      "port" :	2323,
+      "username":	"tzhu",
+      "work_path" :	"/public/home/tzhu/jzzeng/dpgen_workdir",
+      "_comment" :	"that's all"
+    },	
+    "fp_resources":	{
+      "node_cpu":28,
+      "numb_node": 1,
+      "job_name": "dpgen_jzzeng",
+      "task_per_node": 28,
+      "with_mpi":	false,
+      "time_limit": "10:00:00",
+      "allow_failure": true,
+      "partition": "small",
+      "_comment":	"that's all"
+    },
+    "_comment":		" that's all "
+}

From 974dd77f08960951922d793e45cfe2a95aead076 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Wed, 6 Nov 2019 18:44:49 -0500
Subject: [PATCH 007/109] PATH do not need to contain "dpgen"

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 3ebd81725..254586500 100644
--- a/README.md
+++ b/README.md
@@ -65,7 +65,7 @@ pip install --user .
 ```
 With this command, the dpgen executable is install to `$HOME/.local/bin/dpgen`. You may want to export the `PATH` by
 ```bash
-export PATH=$HOME/.local/bin/dpgen:$PATH
+export PATH=$HOME/.local/bin:$PATH
 ```
 To test if the installation is successful, you may execute
 ```bash

From 265dbfed42c9cd301d15c81303a21a0d02ebefc1 Mon Sep 17 00:00:00 2001
From: AnguseZhang <529133328@qq.con>
Date: Fri, 8 Nov 2019 17:11:33 +0800
Subject: [PATCH 008/109] Add examples for DeepMD-kit-1.0

---
 .../{ => DeePMD-kit-0.12}/machine-aws.json    |  0
 .../{ => DeePMD-kit-0.12}/machine-local.json  | 29 ++++---------
 .../{ => DeePMD-kit-0.12}/machine-lsf.json    |  6 +--
 .../machine-slurm-vasp-multi.json             | 18 ++++----
 .../machine-slurm-vasp-multi.yaml             | 18 ++++----
 .../machine-slurm-vasp-single.json            |  6 +--
 .../machine-slurm-vasp-single.yaml            |  6 +--
 .../machine/DeePMD-kit-1.0/machine-local.json | 42 +++++++++++++++++++
 .../machine-pbs-gaussian.json                 |  0
 examples/machine/{ => bk}/machine-hnu.json    |  0
 .../{ => bk}/machine-tiger-pwscf-della.json   |  0
 .../{ => bk}/machine-tiger-vasp-della.json    |  0
 examples/machine/{ => bk}/machine-tiger.json  |  0
 examples/machine/{ => bk}/machine-ucloud.json |  0
 14 files changed, 78 insertions(+), 47 deletions(-)
 rename examples/machine/{ => DeePMD-kit-0.12}/machine-aws.json (100%)
 rename examples/machine/{ => DeePMD-kit-0.12}/machine-local.json (60%)
 rename examples/machine/{ => DeePMD-kit-0.12}/machine-lsf.json (96%)
 rename examples/machine/{ => DeePMD-kit-0.12}/machine-slurm-vasp-multi.json (95%)
 rename examples/machine/{ => DeePMD-kit-0.12}/machine-slurm-vasp-multi.yaml (95%)
 rename examples/machine/{ => DeePMD-kit-0.12}/machine-slurm-vasp-single.json (95%)
 rename examples/machine/{ => DeePMD-kit-0.12}/machine-slurm-vasp-single.yaml (95%)
 create mode 100644 examples/machine/DeePMD-kit-1.0/machine-local.json
 rename examples/machine/{ => DeePMD-kit-1.0}/machine-pbs-gaussian.json (100%)
 rename examples/machine/{ => bk}/machine-hnu.json (100%)
 rename examples/machine/{ => bk}/machine-tiger-pwscf-della.json (100%)
 rename examples/machine/{ => bk}/machine-tiger-vasp-della.json (100%)
 rename examples/machine/{ => bk}/machine-tiger.json (100%)
 rename examples/machine/{ => bk}/machine-ucloud.json (100%)

diff --git a/examples/machine/machine-aws.json b/examples/machine/DeePMD-kit-0.12/machine-aws.json
similarity index 100%
rename from examples/machine/machine-aws.json
rename to examples/machine/DeePMD-kit-0.12/machine-aws.json
diff --git a/examples/machine/machine-local.json b/examples/machine/DeePMD-kit-0.12/machine-local.json
similarity index 60%
rename from examples/machine/machine-local.json
rename to examples/machine/DeePMD-kit-0.12/machine-local.json
index 05a0f2811..057db2722 100644
--- a/examples/machine/machine-local.json
+++ b/examples/machine/DeePMD-kit-0.12/machine-local.json
@@ -1,19 +1,15 @@
 {
     "_comment":		"training on localhost ",
-    "deepmd_path":	"/home/wanghan/local/deepmd/0.10.1/",
+    "_comment" : "This is for DeePMD-kit 0.12.4",
+    "deepmd_path":	"/home/wanghan/local/deepmd/0.12.4/",
     "train_machine":	{
-	"machine_type":	"local",
-	"hostname" :	"127.0.0.1",
-	"port" :	22,
-	"username":	"wanghan",
-	"work_path" :	"/home/wanghan/tmp/subs/",
-	"_comment" :	"that's all"
+	"batch":	"shell",
+	"work_path" :	"/home/wanghan/tmp/subs/"
     },	
     "train_resources":	{
 	"envs":		{
 	    "PYTHONPATH" : "/home/wanghan/local/tensorflow/1.8.py/lib/python3.6/site-packages/"
-	},
-	"_comment":	"that's all"
+	}
     },    
 
 
@@ -21,25 +17,18 @@
     "lmp_command":	"/home/wanghan/local/bin/lmp_mpi_010",
     "model_devi_group_size": 5,
     "model_devi_machine":	{
-	"machine_type":	"local",
-	"hostname" :	"127.0.0.1",
-	"port" :	22,
-	"username":	"wanghan",
-	"work_path" :	"/home/wanghan/tmp/subs/",
-	"_comment" :	"that's all"
+	"batch":	"shell",
+	"_comment" : "If lazy_local is true, calculations are done directly in current folders.",
+	"lazy_local" : true
     },	
     "model_devi_resources":	{
-	"_comment":	"that's all"
     },    
 
     "_comment":		"fp on localhost ",
     "fp_command":	"/home/wanghan/local/bin/vasp_std",
     "fp_group_size":	2,
     "fp_machine":	{
-	"machine_type":	"local",
-	"hostname" :	"127.0.0.1",
-	"port" :	22,
-	"username":	"wanghan",
+	"batch":	"local",
 	"work_path" :	"/home/wanghan/tmp/subs/",
 	"_comment" :	"that's all"
     },	
diff --git a/examples/machine/machine-lsf.json b/examples/machine/DeePMD-kit-0.12/machine-lsf.json
similarity index 96%
rename from examples/machine/machine-lsf.json
rename to examples/machine/DeePMD-kit-0.12/machine-lsf.json
index f10ed82fb..d8ebd61ed 100644
--- a/examples/machine/machine-lsf.json
+++ b/examples/machine/DeePMD-kit-0.12/machine-lsf.json
@@ -2,7 +2,7 @@
   "train": [
     {
       "machine": {
-        "machine_type": "lsf",
+        "batch": "lsf",
         "hostname": "localhost",
         "port": 22,
         "username": "ypliu",
@@ -32,7 +32,7 @@
   "model_devi": [
     {
       "machine": {
-        "machine_type": "lsf",
+        "batch": "lsf",
         "hostname": "localhost",
         "port": 22,
         "username": "ypliu",
@@ -64,7 +64,7 @@
   "fp": [
     {
       "machine": {
-        "machine_type": "lsf",
+        "batch": "lsf",
         "hostname": "localhost",
         "port": 22,
         "username": "ypliu",
diff --git a/examples/machine/machine-slurm-vasp-multi.json b/examples/machine/DeePMD-kit-0.12/machine-slurm-vasp-multi.json
similarity index 95%
rename from examples/machine/machine-slurm-vasp-multi.json
rename to examples/machine/DeePMD-kit-0.12/machine-slurm-vasp-multi.json
index 17f5bad48..e24838077 100644
--- a/examples/machine/machine-slurm-vasp-multi.json
+++ b/examples/machine/DeePMD-kit-0.12/machine-slurm-vasp-multi.json
@@ -2,7 +2,7 @@
   "train": [
     {
       "machine": {
-        "machine_type": "slurm",
+        "batch": "slurm",
         "hostname": "localhost",
         "port": 22,
         "username": "1600017784",
@@ -25,7 +25,7 @@
     },
     {
       "machine": {
-        "machine_type": "slurm",
+        "batch": "slurm",
         "hostname": "localhost",
         "port": 22,
         "username": "1600017784",
@@ -49,7 +49,7 @@
     {
       "deepmd_path": "/data2/publicsoft/deepmd-kit/0.12.4-s/",
       "machine": {
-        "machine_type": "slurm",
+        "batch": "slurm",
         "hostname": "115.27.161.2",
         "port": 22,
         "username": "anguse",
@@ -78,7 +78,7 @@
   "model_devi": [
     {
       "machine": {
-        "machine_type": "slurm",
+        "batch": "slurm",
         "hostname": "localhost",
         "port": 22,
         "username": "1600017784",
@@ -102,7 +102,7 @@
     },
     {
       "machine": {
-        "machine_type": "slurm",
+        "batch": "slurm",
         "hostname": "localhost",
         "port": 22,
         "username": "1600017784",
@@ -126,7 +126,7 @@
     },
     {
       "machine": {
-        "machine_type": "slurm",
+        "batch": "slurm",
         "hostname": "115.27.161.2",
         "port": 22,
         "username": "anguse",
@@ -156,7 +156,7 @@
   "fp": [
     {
       "machine": {
-        "machine_type": "slurm",
+        "batch": "slurm",
         "hostname": "localhost",
         "port": 22,
         "username": "1600017784",
@@ -183,7 +183,7 @@
     },
     {
       "machine": {
-        "machine_type": "slurm",
+        "batch": "slurm",
         "hostname": "162.105.133.134",
         "port": 22,
         "username": "1600017784",
@@ -211,7 +211,7 @@
     },
     {
       "machine": {
-        "machine_type": "slurm",
+        "batch": "slurm",
         "hostname": "162.105.133.134",
         "port": 22,
         "username": "1600017784",
diff --git a/examples/machine/machine-slurm-vasp-multi.yaml b/examples/machine/DeePMD-kit-0.12/machine-slurm-vasp-multi.yaml
similarity index 95%
rename from examples/machine/machine-slurm-vasp-multi.yaml
rename to examples/machine/DeePMD-kit-0.12/machine-slurm-vasp-multi.yaml
index c90df9a09..5bd30d186 100644
--- a/examples/machine/machine-slurm-vasp-multi.yaml
+++ b/examples/machine/DeePMD-kit-0.12/machine-slurm-vasp-multi.yaml
@@ -1,7 +1,7 @@
 ---
 train:
 - machine:
-    machine_type: slurm
+    batch: slurm
     hostname: localhost
     port: 22
     username: '1600017784'
@@ -19,7 +19,7 @@ train:
     qos: bigdata
   deepmd_path: "/gpfs/share/software/deepmd-kit/0.12.4/gpu/gcc/4.9.0/tf1120-lowprec"
 - machine:
-    machine_type: slurm
+    batch: slurm
     hostname: localhost
     port: 22
     username: '1600017784'
@@ -38,7 +38,7 @@ train:
   deepmd_path: "/gpfs/share/software/deepmd-kit/0.12.4/gpu/gcc/4.9.0/tf1120-lowprec"
 - deepmd_path: "/data2/publicsoft/deepmd-kit/0.12.4-s/"
   machine:
-    machine_type: slurm
+    batch: slurm
     hostname: 115.27.161.2
     port: 22
     username: anguse
@@ -60,7 +60,7 @@ train:
     _comment: that's all
 model_devi:
 - machine:
-    machine_type: slurm
+    batch: slurm
     hostname: localhost
     port: 22
     username: '1600017784'
@@ -79,7 +79,7 @@ model_devi:
   command: lmp_serial
   group_size: 10
 - machine:
-    machine_type: slurm
+    batch: slurm
     hostname: localhost
     port: 22
     username: '1600017784'
@@ -98,7 +98,7 @@ model_devi:
   command: lmp_serial
   group_size: 10
 - machine:
-    machine_type: slurm
+    batch: slurm
     hostname: 115.27.161.2
     port: 22
     username: anguse
@@ -121,7 +121,7 @@ model_devi:
   group_size: 20
 fp:
 - machine:
-    machine_type: slurm
+    batch: slurm
     hostname: localhost
     port: 22
     username: '1600017784'
@@ -143,7 +143,7 @@ fp:
   command: vasp_std
   group_size: 5
 - machine:
-    machine_type: slurm
+    batch: slurm
     hostname: 162.105.133.134
     port: 22
     username: '1600017784'
@@ -165,7 +165,7 @@ fp:
   command: mpirun -n 16 vasp_std
   group_size: 5
 - machine:
-    machine_type: slurm
+    batch: slurm
     hostname: 162.105.133.134
     port: 22
     username: '1600017784'
diff --git a/examples/machine/machine-slurm-vasp-single.json b/examples/machine/DeePMD-kit-0.12/machine-slurm-vasp-single.json
similarity index 95%
rename from examples/machine/machine-slurm-vasp-single.json
rename to examples/machine/DeePMD-kit-0.12/machine-slurm-vasp-single.json
index 199736d66..2dbdafd5e 100644
--- a/examples/machine/machine-slurm-vasp-single.json
+++ b/examples/machine/DeePMD-kit-0.12/machine-slurm-vasp-single.json
@@ -2,7 +2,7 @@
   "train": [
     {
       "machine": {
-        "machine_type": "slurm",
+        "batch": "slurm",
         "hostname": "localhost",
         "port": 22,
         "username": "1600017784",
@@ -27,7 +27,7 @@
   "model_devi": [
     {
       "machine": {
-        "machine_type": "slurm",
+        "batch": "slurm",
         "hostname": "localhost",
         "port": 22,
         "username": "1600017784",
@@ -53,7 +53,7 @@
   "fp": [
     {
       "machine": {
-        "machine_type": "slurm",
+        "batch": "slurm",
         "hostname": "localhost",
         "port": 22,
         "username": "1600017784",
diff --git a/examples/machine/machine-slurm-vasp-single.yaml b/examples/machine/DeePMD-kit-0.12/machine-slurm-vasp-single.yaml
similarity index 95%
rename from examples/machine/machine-slurm-vasp-single.yaml
rename to examples/machine/DeePMD-kit-0.12/machine-slurm-vasp-single.yaml
index 4162f1055..3b52e52ce 100644
--- a/examples/machine/machine-slurm-vasp-single.yaml
+++ b/examples/machine/DeePMD-kit-0.12/machine-slurm-vasp-single.yaml
@@ -1,7 +1,7 @@
 ---
 train:
 - machine:
-    machine_type: slurm
+    batch: slurm
     hostname: localhost
     port: 22
     username: '1600017784'
@@ -20,7 +20,7 @@ train:
   deepmd_path: "/gpfs/share/software/deepmd-kit/0.12.4/gpu/gcc/4.9.0/tf1120-lowprec"
 model_devi:
 - machine:
-    machine_type: slurm
+    batch: slurm
     hostname: localhost
     port: 22
     username: '1600017784'
@@ -40,7 +40,7 @@ model_devi:
   group_size: 10
 fp:
 - machine:
-    machine_type: slurm
+    batch: slurm
     hostname: localhost
     port: 22
     username: '1600017784'
diff --git a/examples/machine/DeePMD-kit-1.0/machine-local.json b/examples/machine/DeePMD-kit-1.0/machine-local.json
new file mode 100644
index 000000000..d418a783e
--- /dev/null
+++ b/examples/machine/DeePMD-kit-1.0/machine-local.json
@@ -0,0 +1,42 @@
+{
+    "_comment":		"training on localhost ",
+    "_comment" : "This is for DeePMD-kit 1.*",
+    "python_path":	"/home/wanghan/local/deepmd/1.*/python",
+    "train_machine":	{
+	"batch":	"shell",
+	"work_path" :	"/home/wanghan/tmp/subs/"
+    },	
+    "train_resources":	{
+	"envs":		{
+	}
+    },    
+
+
+    "_comment":		"model_devi on localhost ",
+    "lmp_command":	"/home/wanghan/local/bin/lmp_mpi_010",
+    "model_devi_group_size": 5,
+    "model_devi_machine":	{
+	"batch":	"shell",
+	"_comment" : "If lazy_local is true, calculations are done directly in current folders.",
+	"lazy_local" : true
+    },	
+    "model_devi_resources":	{
+    },    
+
+    "_comment":		"fp on localhost ",
+    "fp_command":	"/home/wanghan/local/bin/vasp_std",
+    "fp_group_size":	2,
+    "fp_machine":	{
+	"batch":	"local",
+	"work_path" :	"/home/wanghan/tmp/subs/",
+	"_comment" :	"that's all"
+    },	
+    "fp_resources":	{
+	"module_list":  ["mpi"],
+	"task_per_node":4,
+	"with_mpi":	true,
+	"_comment":	"that's all"
+    },
+
+    "_comment":		" that's all "
+}
diff --git a/examples/machine/machine-pbs-gaussian.json b/examples/machine/DeePMD-kit-1.0/machine-pbs-gaussian.json
similarity index 100%
rename from examples/machine/machine-pbs-gaussian.json
rename to examples/machine/DeePMD-kit-1.0/machine-pbs-gaussian.json
diff --git a/examples/machine/machine-hnu.json b/examples/machine/bk/machine-hnu.json
similarity index 100%
rename from examples/machine/machine-hnu.json
rename to examples/machine/bk/machine-hnu.json
diff --git a/examples/machine/machine-tiger-pwscf-della.json b/examples/machine/bk/machine-tiger-pwscf-della.json
similarity index 100%
rename from examples/machine/machine-tiger-pwscf-della.json
rename to examples/machine/bk/machine-tiger-pwscf-della.json
diff --git a/examples/machine/machine-tiger-vasp-della.json b/examples/machine/bk/machine-tiger-vasp-della.json
similarity index 100%
rename from examples/machine/machine-tiger-vasp-della.json
rename to examples/machine/bk/machine-tiger-vasp-della.json
diff --git a/examples/machine/machine-tiger.json b/examples/machine/bk/machine-tiger.json
similarity index 100%
rename from examples/machine/machine-tiger.json
rename to examples/machine/bk/machine-tiger.json
diff --git a/examples/machine/machine-ucloud.json b/examples/machine/bk/machine-ucloud.json
similarity index 100%
rename from examples/machine/machine-ucloud.json
rename to examples/machine/bk/machine-ucloud.json

From a61a6b3c67f03be8253984a1eecc9fda11e7d086 Mon Sep 17 00:00:00 2001
From: AnguseZhang <529133328@qq.con>
Date: Fri, 8 Nov 2019 23:51:01 +0800
Subject: [PATCH 009/109] Fix bugs in run_relax

---
 dpgen/data/gen.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/dpgen/data/gen.py b/dpgen/data/gen.py
index c8e98ff54..73bb8e25f 100644
--- a/dpgen/data/gen.py
+++ b/dpgen/data/gen.py
@@ -553,9 +553,9 @@ def run_vasp_relax(jdata, mdata, dispatcher):
         return
 
     relax_run_tasks = relax_tasks
-    for ii in relax_tasks : 
-        if not _vasp_check_fin(ii):
-            relax_run_tasks.append(ii)
+    #for ii in relax_tasks : 
+    #    if not _vasp_check_fin(ii):
+    #        relax_run_tasks.append(ii)
     run_tasks = [os.path.basename(ii) for ii in relax_run_tasks]
 
     #dlog.info(run_tasks)

From de7e1294f93c40512c68e50b9f4e4bcaad19c11d Mon Sep 17 00:00:00 2001
From: AnguseZhang <529133328@qq.con>
Date: Tue, 12 Nov 2019 11:45:19 +0800
Subject: [PATCH 010/109] Allow check_fin

---
 dpgen/data/gen.py | 26 +++++++++++++++-----------
 1 file changed, 15 insertions(+), 11 deletions(-)

diff --git a/dpgen/data/gen.py b/dpgen/data/gen.py
index 73bb8e25f..1ae838fbc 100644
--- a/dpgen/data/gen.py
+++ b/dpgen/data/gen.py
@@ -63,14 +63,12 @@ def replace (file_name, pattern, subst) :
 global_dirname_04 = '02.md'
 
 def out_dir_name(jdata) :
-    cell_type = jdata['cell_type']
     elements = jdata['elements']
     super_cell = jdata['super_cell']    
     from_poscar = False
     if 'from_poscar' in jdata :
         from_poscar = jdata['from_poscar']
         from_poscar_path = jdata['from_poscar_path']
-
     if from_poscar:
         poscar_name = os.path.basename(from_poscar_path)
         cell_str = "%02d" % (super_cell[0])
@@ -78,6 +76,7 @@ def out_dir_name(jdata) :
             cell_str = cell_str + ("x%02d" % super_cell[ii])
         return poscar_name + '.' + cell_str
     else :
+        cell_type = jdata['cell_type']
         ele_str = ""
         for ii in elements:
             ele_str = ele_str + ii.lower()
@@ -408,6 +407,12 @@ def pert_scaled(jdata) :
             os.chdir(cwd)
 
 def make_vasp_md(jdata) :
+    ## If restart_md is true, md folders won't be created again.
+    restart_md = False
+    if "restart_md" in jdata and jdata["restart_md"]:
+        restart_md = True
+    if restart_md:
+        return
     out_dir = jdata['out_dir']
     potcars = jdata['potcars']
     scale = jdata['scale']   
@@ -552,10 +557,10 @@ def run_vasp_relax(jdata, mdata, dispatcher):
     if len(relax_tasks) == 0:
         return
 
-    relax_run_tasks = relax_tasks
-    #for ii in relax_tasks : 
-    #    if not _vasp_check_fin(ii):
-    #        relax_run_tasks.append(ii)
+    relax_run_tasks = [t for t in relax_tasks]
+    for ii in relax_tasks : 
+        if not _vasp_check_fin(ii):
+            relax_run_tasks.append(ii)
     run_tasks = [os.path.basename(ii) for ii in relax_run_tasks]
 
     #dlog.info(run_tasks)
@@ -595,11 +600,10 @@ def run_vasp_md(jdata, mdata, dispatcher):
     if len(md_tasks) == 0:
         return
 
-    md_run_tasks = md_tasks
-    #for ii in md_tasks : 
-    #    if not _vasp_check_fin(ii):
-    #        md_run_tasks.append(ii)
-
+    md_run_tasks = [t for t in md_tasks]
+    for ii in md_tasks : 
+        if not _vasp_check_fin(ii):
+            md_run_tasks.append(ii)
     run_tasks = [ii.replace(work_dir+"/", "") for ii in md_run_tasks]
     #dlog.info("md_work_dir", work_dir)
     #dlog.info("run_tasks",run_tasks)

From 6e846709429ff558ebaab69e537ef623cc1498c9 Mon Sep 17 00:00:00 2001
From: Yuan Fengbo <yuanfengbo888@pku.edu.cn>
Date: Tue, 12 Nov 2019 12:42:57 +0800
Subject: [PATCH 011/109]  stat time

---
 dpgen/tools/run_report.py  |   4 +-
 dpgen/tools/stat_iter.py   |  18 ++-----
 dpgen/tools/stat_time.py   | 105 +++++++++++++++++++++++++++++++++++++
 dpgen/tools/update_time.sh |  88 -------------------------------
 setup.py                   |   2 +-
 5 files changed, 113 insertions(+), 104 deletions(-)
 create mode 100755 dpgen/tools/stat_time.py
 delete mode 100755 dpgen/tools/update_time.sh

diff --git a/dpgen/tools/run_report.py b/dpgen/tools/run_report.py
index ec0ef6dbd..17751c9c7 100755
--- a/dpgen/tools/run_report.py
+++ b/dpgen/tools/run_report.py
@@ -4,8 +4,8 @@
 import numpy as np
 import subprocess as sp
 from dpgen.tools.stat_sys import stat_sys
-from dpgen.tools.stat_iter import stat_iter, stat_time
-
+from dpgen.tools.stat_iter import stat_iter 
+from dpgen.tools.stat_time import stat_time
 
 def run_report(args):
     report_count = 0
diff --git a/dpgen/tools/stat_iter.py b/dpgen/tools/stat_iter.py
index 228b051d4..519486727 100644
--- a/dpgen/tools/stat_iter.py
+++ b/dpgen/tools/stat_iter.py
@@ -22,10 +22,10 @@ def stat_iter(target_folder,
             num, relative_path_doc = line.strip().split(' ')
             path_doc = os.path.abspath(relative_path_doc)
             num = int(num)
-            prefix, iter_dirname, stage, out_filename = path_doc.rsplit('/',3)
+            prefix, iter_dirname, stage, out_filename = path_doc.rsplit('/',3) # pylint: disable=unused-variable
             pk_id, out_filename = path_doc.rsplit('/', 1)
-            iter = int(iter_dirname.split('.')[-1])
-            out_id = int(out_filename.strip().split('.')[-2])
+            iter = int(iter_dirname.split('.')[-1]) # pylint: disable=unused-variable
+            out_id = int(out_filename.strip().split('.')[-2]) # pylint: disable=unused-variable
             out_type = out_filename.strip().split('.')[0]
             iter_dict[pk_id][out_type] += num
     # for ii in 
@@ -39,7 +39,7 @@ def stat_iter(target_folder,
         if line:
             # [/home/felix/workplace/SiC/iter.000002/02.fp/task.018.000040/OUTCAR]
             path_doc = os.path.abspath(line)
-            pk_id, task_dirname, OUTCAR_filename=path_doc.rsplit('/', 2)
+            pk_id, task_dirname, OUTCAR_filename=path_doc.rsplit('/', 2) # pylint: disable=unused-variable
             try:
                 _sys = dpdata.LabeledSystem(path_doc, type_map = jdata['type_map'] )
             except:
@@ -65,13 +65,5 @@ def stat_iter(target_folder,
                 f":OUTCAR_not_convergence:{value['OUTCAR_not_convergence']}"
                 f":reff:{value['reff']}")
 
-def stat_time(target_folder, 
-            param_file = 'param.json', 
-            verbose = True, 
-            mute = False):
-    script = os.path.join(os.path.dirname(__file__), 'update_time.sh')
-    output = subprocess.run([f'bash {script} {target_folder}'],
-        shell=True,stdout=subprocess.PIPE).stdout
-    data = output.decode()
-    print(data)
+
 
diff --git a/dpgen/tools/stat_time.py b/dpgen/tools/stat_time.py
new file mode 100755
index 000000000..8e3a286fc
--- /dev/null
+++ b/dpgen/tools/stat_time.py
@@ -0,0 +1,105 @@
+import subprocess
+import os
+def stat_time(target_folder, 
+            param_file = 'param.json', 
+            verbose = True, 
+            mute = False):
+    train_dirs = subprocess.run([f"ls -d -1 {target_folder}/iter.??????/00.train/", ],
+        shell=True,stdout=subprocess.PIPE).stdout.decode().strip().split('\n')
+    for dir in train_dirs:
+        abs_dir = os.path.abspath(dir)
+        stage = os.path.basename(os.path.dirname(dir))
+        train_time_logs = subprocess.run([f"grep -H --text 'wall time' {dir}/???/train.log", ],
+            shell=True,stdout=subprocess.PIPE).stdout.decode().strip().split('\n')
+        upload_task_dir_num = subprocess.run([f"ls -1 -d {dir}/??? |wc -l", ], 
+            shell=True, stdout=subprocess.PIPE).stdout.decode().strip('\n')
+        total_core_sec = float(0)
+        
+        # assume training on single GPU
+        paral_cores = 1
+        finished_task_file_num = len(train_time_logs)
+        # gpu_type_set = set([])
+        for log in train_time_logs:
+            # log example : 
+            #   .//iter.000000/00.train//003/train.log:# DEEPMD: wall time: 7960.265 s
+            # print(log.split(':'))
+            file_path, text1, text2, wall_time = log.split(':') # pylint: disable=unused-variable
+            abs_file_path = os.path.abspath(file_path)
+            # stage=='00.train'
+            
+            wall_time_sec = float(wall_time.strip('s').strip(' '))
+            total_core_sec += wall_time_sec * paral_cores
+
+            # r'd\nja\1lgje' leading 'r' means dont treat '\' as  Escape character
+            # gpu_type = subprocess.run([fr"grep -e 'physical GPU' {abs_file_path} |sed -n -E -e 's|^.*name: (.*), pci.*|\1|p'", ],
+            #     shell=True,stdout=subprocess.PIPE).stdout.decode().strip().split('\n').pop()
+            # gpu_type_set.add(gpu_type)
+        
+        total_core_hour = total_core_sec * paral_cores / 3600 
+        print(f"{stage}:{abs_dir}"
+            f"paral_cores:{paral_cores}"
+            f":upload_task_dir_num:{upload_task_dir_num}"
+            f":finished_task_file_num:{finished_task_file_num}"
+            f":total_core_hour:{total_core_hour:.3f}")
+    
+    model_devi_dirs = subprocess.run([f"ls -d -1 {target_folder}/iter.??????/01.model_devi/", ],
+        shell=True,stdout=subprocess.PIPE).stdout.decode().strip().split('\n')
+    # print(model_devi_dirs)
+    for dir in model_devi_dirs:
+        abs_dir = os.path.abspath(dir)
+        stage = os.path.basename(os.path.dirname(dir))
+        # print(dir)
+        model_devi_time_logs = subprocess.run([f"grep -H --text 'wall time' {dir}/task.*/log.lammps", ],
+            shell=True,stdout=subprocess.PIPE).stdout.decode().strip().split('\n')
+        upload_task_dir_num = subprocess.run([f"ls -1 -d {dir}/task.* |wc -l", ], 
+            shell=True, stdout=subprocess.PIPE).stdout.decode().strip('\n')
+        total_core_sec = float(0)
+        finished_task_file_num = len(model_devi_time_logs)
+        # assume model_devi lammps job running on GPUs , set paral_cores==1
+        paral_cores = 1 
+        for log in model_devi_time_logs:
+            # log example:
+            #   .//iter.000002/01.model_devi//task.018.000075/log.lammps:Total wall time: 0:00:39
+            # print(log)
+            file_path, text1, hour, min, sec = log.split(':') # pylint: disable=unused-variable
+            abs_file_path =  os.path.abspath(file_path)
+            wall_time_sec = 3600*int(hour) + 60*int(min) + 1*int(sec)
+            total_core_sec += wall_time_sec * paral_cores
+        total_core_hour = total_core_sec / 3600
+
+        print(f"{stage}:{abs_dir}"
+            f":paral_cores:{paral_cores}"
+            f":upload_task_dir_num:{upload_task_dir_num}"
+            f":finished_task_file_num:{finished_task_file_num}"
+            f":total_core_hour:{total_core_hour:.3f}")
+
+    fp_dirs = subprocess.run([f"ls -d -1 {target_folder}/iter.??????/02.fp/", ],
+        shell=True,stdout=subprocess.PIPE).stdout.decode().strip().split('\n')
+    for dir in fp_dirs:
+        abs_dir = os.path.abspath(dir)
+        stage = os.path.basename(os.path.dirname(dir))
+        fp_time_logs = subprocess.run([f"grep -H --text 'CPU time' {dir}/task.*/OUTCAR", ],
+            shell=True,stdout=subprocess.PIPE).stdout.decode().strip().split('\n')
+        upload_task_dir_num = subprocess.run([f"ls -1 -d {dir}/task.* |wc -l", ], 
+            shell=True, stdout=subprocess.PIPE).stdout.decode().strip('\n')
+        total_core_sec = float(0)
+        finished_task_file_num = len(fp_time_logs)
+        for log in fp_time_logs:
+            # log example:
+            #   .//iter.000002/02.fp//task.018.000048/OUTCAR:                  Total CPU time used (sec):      288.395
+            file_path, text1, sec = log.split(':')
+            abs_file_path = os.path.abspath(file_path)
+            wall_time_sec = float(sec)
+            paral_cores = subprocess.run([fr"head -n 1000 {abs_file_path} | grep 'running on' | sed -n -E -e 's|running on\s+([0-9]+)+\s.*|\1|p' ", ],
+                shell=True,stdout=subprocess.PIPE).stdout.decode().strip()
+            total_core_sec += wall_time_sec * int(paral_cores)
+        total_core_hour = total_core_sec /3600
+
+        print(f"{stage}:{abs_dir}"
+            f":paral_cores:{paral_cores}"
+            f":upload_task_dir_num:{upload_task_dir_num}"
+            f":finished_task_file_num:{finished_task_file_num}"
+            f":total_core_hour:{total_core_hour:.3f}")
+
+if __name__=='__main__':
+    stat_time(target_folder="./")
\ No newline at end of file
diff --git a/dpgen/tools/update_time.sh b/dpgen/tools/update_time.sh
deleted file mode 100755
index 29a08e311..000000000
--- a/dpgen/tools/update_time.sh
+++ /dev/null
@@ -1,88 +0,0 @@
-#!/bin/bash
-
-model_devi_paral_cores=1
-
-if [[ -n $1 ]]
-then
-        target_dir=$1
-else
-        target_dir="./"
-fi
-
-if [[ -a time.log ]]
-then
-	rm time.log
-fi
-for train_dir in `ls -d -1 $target_dir/iter.??????/00.train/`;do
-sec=0
-tothour=0
-upload_task_dir_num=0
-recycle_task_file_num=0
-# echo $train_dir
-upload_task_dir_num=$(ls -1 -d $train_dir/??? |wc -l)
-if [[ -a train_time.log ]]
-then
-	rm train_time.log
-fi
-grep -H --text 'wall time'  $train_dir/???/train.log > train_time.log
-recycle_task_file_num=$(wc -l < train_time.log)
- while read line; do
-mysec=$(echo "$line" |cut -d: -f4 |sed 's/s\| //g')
-sec=$(echo "$mysec + $sec" | bc)
-   done < train_time.log
-# echo $hour:$min:$sec
-tothour=$(echo "scale=3; $sec/3600"|bc)
-echo "00.train:$(realpath $train_dir):paral_cores:GPUV100:upload_task_dir_num:$upload_task_dir_num:recycle_task_file_num:$recycle_task_file_num:total core hour:$tothour" | tee -a time.log
-done
-
-for model_devi_dir in `ls -d -1 $target_dir/iter.??????/01.model_devi/`;do
-sec=0
-min=0
-hour=0
-tothour=0
-upload_task_dir_num=0
-recycle_task_file_num=0
-# echo $model_devi_dir
-upload_task_dir_num=$(ls -1 -d $model_devi_dir/task.* |wc -l)
-if [[ -a model_devi_time.log ]]
-then
-	rm model_devi_time.log
-fi
-grep -H --text 'wall'  $model_devi_dir/task.*/log.lammps > model_devi_time.log
-recycle_task_file_num=$(wc -l < model_devi_time.log)
- while read line; do
-mysec=$(echo "$line" |cut -d: -f5)
-sec=$(echo "$mysec + $sec" | bc)
-mymin=$(echo "$line" |cut -d: -f4)
-min=$(echo "$mymin + $min" | bc)
-myhour=$(echo "$line" |cut -d: -f3)
-hour=$(echo "$myhour + $hour" | bc)
-   done < model_devi_time.log
-# echo $hour:$min:$sec
-tothour=$(echo "scale=3; ($hour*3600+$min*60+$sec)*$model_devi_paral_cores/3600"|bc)
-echo "01.model_devi:$(realpath $model_devi_dir):paral_cores:$model_devi_paral_cores:upload_task_dir_num:$upload_task_dir_num:recycle_task_file_num:$recycle_task_file_num:total core hour:$tothour" | tee -a time.log
-done
-
-for fp_dir in `ls -d -1 $target_dir/iter.??????/02.fp/`;do
-core_sec=0
-tothour=0
-upload_task_dir_num=0
-recycle_task_file_num=0
-# echo $fp_dir
-upload_task_dir_num=$(ls -1 -d $fp_dir/task.* |wc -l)
-if [[ -a fp_time.log ]]
-then
-	rm fp_time.log
-fi
-grep -H --text 'CPU time' $fp_dir/task.*/OUTCAR > fp_time.log
-recycle_task_file_num=$(wc -l < fp_time.log)
- while read line;do
-mysec=$(echo "$line" |cut -d: -f3 |sed 's| ||g')
-file_name=$(echo "$line" | cut -d: -f1)
-fp_paral_cores=$(grep 'total cores' $file_name |grep -o '[0-9]*')
-core_sec=$(echo "$mysec * $fp_paral_cores + $core_sec" | bc)
-   done < fp_time.log
-tothour=$(echo "scale=3; $core_sec/3600"|bc)
-echo "02.fp:$(realpath $fp_dir):paral_cores:$fp_paral_cores:upload_task_dir_num:$upload_task_dir_num:recycle_task_file_num:$recycle_task_file_num:total core hour:$tothour" | tee -a time.log
-done
-wc -l $target_dir/iter.??????/02.fp/*out> candi_fail_accu.log
diff --git a/setup.py b/setup.py
index c2451552d..e2fa458bf 100755
--- a/setup.py
+++ b/setup.py
@@ -42,7 +42,7 @@
               'dpgen/database',
               'dpgen/tools'
     ],
-    data_files = [('dpgen/tools/', ['dpgen/tools/update_time.sh', ])],
+    # data_files = [('dpgen/tools/', ['dpgen/tools/update_time.sh', ])],
     # package_data={'example':['*.json']},
     classifiers=[
         "Programming Language :: Python :: 3.6",

From d32dc4cd4e694d261a36bad9171f7599cbf01473 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Tue, 12 Nov 2019 14:17:06 -0500
Subject: [PATCH 012/109] print iter_name with task_name

---
 dpgen/generator/run.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dpgen/generator/run.py b/dpgen/generator/run.py
index 7fa034773..8855c5187 100644
--- a/dpgen/generator/run.py
+++ b/dpgen/generator/run.py
@@ -1701,7 +1701,7 @@ def run_iter (param_file, machine_file) :
             if ii * max_tasks + jj <= iter_rec[0] * max_tasks + iter_rec[1] :
                 continue
             task_name="task %02d"%jj
-            sepline(task_name,'-')
+            sepline("{} {}".format(iter_name, task_name),'-')
             if   jj == 0 :
                 log_iter ("make_train", ii, jj)
                 make_train (ii, jdata, mdata)

From a23ed53822125ae3758a7e6d63f01454790f4c36 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Fri, 15 Nov 2019 08:56:44 -0500
Subject: [PATCH 013/109] add init_reaction

---
 dpgen/data/reaction.py | 158 +++++++++++++++++++++++++++++++++++++++++
 dpgen/main.py          |   9 +++
 2 files changed, 167 insertions(+)
 create mode 100644 dpgen/data/reaction.py

diff --git a/dpgen/data/reaction.py b/dpgen/data/reaction.py
new file mode 100644
index 000000000..588f01da9
--- /dev/null
+++ b/dpgen/data/reaction.py
@@ -0,0 +1,158 @@
+"""
+input: trajectory
+00: build dataset (mddatasetbuilder)
+01: fp (gaussian)
+02: convert to deepmd data
+output: data
+"""
+
+import argparse
+import glob
+import json
+import os
+
+import dpdata
+from dpgen import dlog
+from dpgen.dispatcher.Dispatcher import make_dispatcher
+from dpgen.generator.run import create_path, make_fp_task_name
+from dpgen.util import sepline
+
+build_path = "00.build"
+fp_path = "01.fp"
+data_path = "02.data"
+
+trj_path = "lammpstrj"
+dataset_name = "dpgen_init"
+
+
+def link_trj(jdata):
+    """link lammpstrj"""
+    create_path(build_path)
+    task_path = os.path.join(build_path, "task.000")
+    create_path(task_path)
+
+    os.symlink(os.path.abspath(jdata["lammpstrj"]), os.path.abspath(
+        os.path.join(task_path, trj_path)))
+
+
+def run_build_dataset(jdata, mdata, dispatcher, log_file="log"):
+    work_path = build_path
+    build_command = "{cmd} -n {dataset_name} -a {type_map} -d {lammpstrj} -c {cutoff} -i {interval} -s {dataset_size} -k \"{qmkeywords}\" --nprocjob {nprocjob} --nproc {nproc}".format(
+        cmd=mdata["build_command"],
+        type_map=" ".join(jdata["type_map"]),
+        lammpstrj=trj_path,
+        cutoff=jdata["cutoff"],
+        interval=jdata["interval"],
+        dataset_size=jdata["dataset_size"],
+        qmkeywords=jdata["qmkeywords"],
+        nprocjob=mdata["fp_resources"]["task_per_node"],
+        nproc=mdata["build_resources"]["task_per_node"],
+        dataset_name=dataset_name
+    )
+    run_tasks = glob.glob(os.path.join(work_path, 'task.*'))
+    run_tasks.sort()
+    run_tasks = [os.path.basename(ii) for ii in run_tasks]
+
+    dispatcher.run_jobs(mdata['build_resources'],
+                        [build_command],
+                        work_path,
+                        run_tasks,
+                        1,
+                        [],
+                        [trj_path],
+                        [f"{dataset_name}_gjf"],
+                        outlog=log_file,
+                        errlog=log_file)
+
+
+def link_fp_input():
+    all_input_file = glob.glob(os.path.join(
+        build_path, "task.*", f"{dataset_name}_gjf", "*", "*.gjf"))
+    work_path = fp_path
+    create_path(work_path)
+
+    for ii, fin in enumerate(all_input_file):
+        dst_path = os.path.join(work_path, make_fp_task_name(0, ii))
+        create_path(dst_path)
+        os.symlink(os.path.abspath(fin), os.path.abspath(
+            os.path.join(dst_path, "input")))
+
+
+def run_fp(jdata,
+           mdata,
+           dispatcher,
+           log_file="log",
+           forward_common_files=[]):
+    fp_command = mdata['fp_command']
+    fp_group_size = mdata['fp_group_size']
+    work_path = fp_path
+
+    fp_tasks = glob.glob(os.path.join(work_path, 'task.*'))
+    fp_tasks.sort()
+    if len(fp_tasks) == 0:
+        return
+
+    fp_run_tasks = fp_tasks
+
+    run_tasks = [os.path.basename(ii) for ii in fp_run_tasks]
+
+    dispatcher.run_jobs(mdata['fp_resources'],
+                        [fp_command],
+                        work_path,
+                        run_tasks,
+                        fp_group_size,
+                        [],
+                        ["input"],
+                        ["output"],
+                        outlog=log_file,
+                        errlog=log_file)
+
+
+def convert_data():
+    s = dpdata.MultiSystems(*[dpdata.LabeledSystem(x, fmt="gaussian/log")
+                              for x in glob.glob(os.path.join(fp_path, "*", "output"))])
+    s.to_deepmd_npy(data_path)
+
+
+def gen_init_reaction(args):
+    try:
+        import ruamel
+        from monty.serialization import loadfn, dumpfn
+        warnings.simplefilter(
+            'ignore', ruamel.yaml.error.MantissaNoDotYAML1_1Warning)
+        jdata = loadfn(args.PARAM)
+        if args.MACHINE is not None:
+            mdata = loadfn(args.MACHINE)
+    except:
+        with open(args.PARAM, 'r') as fp:
+            jdata = json.load(fp)
+        if args.MACHINE is not None:
+            with open(args.MACHINE, "r") as fp:
+                mdata = json.load(fp)
+
+    record = "record.reaction"
+    iter_rec = -1
+    numb_task = 5
+    if os.path.isfile(record):
+        with open(record) as frec:
+            for line in frec:
+                iter_rec = int(line.strip())
+        dlog.info("continue from task %02d" % iter_rec)
+    for ii in range(numb_task):
+        sepline(ii, '-')
+        if ii <= iter_rec:
+            continue
+        elif ii == 0:
+            dispatcher = make_dispatcher(mdata["build_machine"])
+            link_trj(jdata)
+        elif ii == 1:
+            run_build_dataset(jdata, mdata, dispatcher)
+        elif ii == 2:
+            link_fp_input()
+        elif ii == 3:
+            dispatcher = make_dispatcher(mdata["fp_machine"])
+            run_fp(jdata, mdata, dispatcher)
+        elif ii == 4:
+            convert_data()
+        with open(record, "a") as frec:
+            frec.write(ii)
diff --git a/dpgen/main.py b/dpgen/main.py
index c3a37260f..01cd909a5 100644
--- a/dpgen/main.py
+++ b/dpgen/main.py
@@ -9,6 +9,7 @@
 from dpgen.generator.run import gen_run
 from dpgen.data.gen import gen_init_bulk
 from dpgen.data.surf import gen_init_surf
+from dpgen.data.reaction import gen_init_reaction
 from dpgen.auto_test.run import gen_test
 from dpgen.database.run import db_run
 from dpgen.tools.run_report import run_report
@@ -67,6 +68,14 @@ def main():
     #                             help="directory to process (default to .)")
     # parser_init.set_defaults(func=gen_data)
 
+    parser_init_reaction = subparsers.add_parser(
+        "init_reaction", help="Generating initial data for reactive systems.")
+    parser_init_reaction.add_argument('PARAM', type=str, 
+                             help="parameter file, json/yaml format")
+    parser_init_reaction.add_argument('MACHINE', type=str,default=None,nargs="?",
+                        help="machine file, json/yaml format")
+    parser_init_reaction.set_defaults(func=gen_init_reaction)
+
     # run 
     parser_run = subparsers.add_parser(
         "run",

From ddd78bddd240e1957b2650b8fee2470214d83167 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Fri, 15 Nov 2019 10:07:52 -0500
Subject: [PATCH 014/109] apply type_map for data

---
 dpgen/data/reaction.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/dpgen/data/reaction.py b/dpgen/data/reaction.py
index 588f01da9..6e36b8caf 100644
--- a/dpgen/data/reaction.py
+++ b/dpgen/data/reaction.py
@@ -108,9 +108,10 @@ def run_fp(jdata,
                         errlog=log_file)
 
 
-def convert_data():
+def convert_data(jdata):
     s = dpdata.MultiSystems(*[dpdata.LabeledSystem(x, fmt="gaussian/log")
-                              for x in glob.glob(os.path.join(fp_path, "*", "output"))])
+                              for x in glob.glob(os.path.join(fp_path, "*", "output"))],
+                            type_map=jdata["type_map"])
     s.to_deepmd_npy(data_path)
 
 
@@ -153,6 +154,6 @@ def gen_init_reaction(args):
             dispatcher = make_dispatcher(mdata["fp_machine"])
             run_fp(jdata, mdata, dispatcher)
         elif ii == 4:
-            convert_data()
+            convert_data(jdata)
         with open(record, "a") as frec:
             frec.write(ii)

From d40298945aadc1d5ee1a6711134e6e8593fe0851 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Fri, 15 Nov 2019 11:53:31 -0500
Subject: [PATCH 015/109] fix bug

---
 dpgen/data/reaction.py | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/dpgen/data/reaction.py b/dpgen/data/reaction.py
index 6e36b8caf..45d3274c7 100644
--- a/dpgen/data/reaction.py
+++ b/dpgen/data/reaction.py
@@ -60,14 +60,14 @@ def run_build_dataset(jdata, mdata, dispatcher, log_file="log"):
                         1,
                         [],
                         [trj_path],
-                        [f"{dataset_name}_gjf"],
+                        [f"dataset_{dataset_name}_gjf"],
                         outlog=log_file,
                         errlog=log_file)
 
 
 def link_fp_input():
     all_input_file = glob.glob(os.path.join(
-        build_path, "task.*", f"{dataset_name}_gjf", "*", "*.gjf"))
+        build_path, "task.*", f"dataset_{dataset_name}_gjf", "*", "*.gjf"))
     work_path = fp_path
     create_path(work_path)
 
@@ -81,7 +81,7 @@ def link_fp_input():
 def run_fp(jdata,
            mdata,
            dispatcher,
-           log_file="log",
+           log_file="output",
            forward_common_files=[]):
     fp_command = mdata['fp_command']
     fp_group_size = mdata['fp_group_size']
@@ -103,7 +103,7 @@ def run_fp(jdata,
                         fp_group_size,
                         [],
                         ["input"],
-                        ["output"],
+                        [log_file],
                         outlog=log_file,
                         errlog=log_file)
 
@@ -113,6 +113,7 @@ def convert_data(jdata):
                               for x in glob.glob(os.path.join(fp_path, "*", "output"))],
                             type_map=jdata["type_map"])
     s.to_deepmd_npy(data_path)
+    dlog.info("Initial data is avaiable in %s" % os.path.abspath(data_path))
 
 
 def gen_init_reaction(args):
@@ -140,13 +141,13 @@ def gen_init_reaction(args):
                 iter_rec = int(line.strip())
         dlog.info("continue from task %02d" % iter_rec)
     for ii in range(numb_task):
-        sepline(ii, '-')
+        sepline(str(ii), '-')
         if ii <= iter_rec:
             continue
         elif ii == 0:
-            dispatcher = make_dispatcher(mdata["build_machine"])
             link_trj(jdata)
         elif ii == 1:
+            dispatcher = make_dispatcher(mdata["build_machine"])
             run_build_dataset(jdata, mdata, dispatcher)
         elif ii == 2:
             link_fp_input()
@@ -156,4 +157,4 @@ def gen_init_reaction(args):
         elif ii == 4:
             convert_data(jdata)
         with open(record, "a") as frec:
-            frec.write(ii)
+            frec.write(str(ii)+'\n')

From c315d9c1f0cabaf93653fab1761e6b4c89004860 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Fri, 15 Nov 2019 12:48:42 -0500
Subject: [PATCH 016/109] add reaxff

---
 dpgen/data/reaction.py | 93 +++++++++++++++++++++++++++++++++++++-----
 1 file changed, 82 insertions(+), 11 deletions(-)

diff --git a/dpgen/data/reaction.py b/dpgen/data/reaction.py
index 45d3274c7..a609e33b3 100644
--- a/dpgen/data/reaction.py
+++ b/dpgen/data/reaction.py
@@ -1,8 +1,9 @@
 """
 input: trajectory
-00: build dataset (mddatasetbuilder)
-01: fp (gaussian)
-02: convert to deepmd data
+00: ReaxFF MD (lammps)
+01: build dataset (mddatasetbuilder)
+02: fp (gaussian)
+03: convert to deepmd data
 output: data
 """
 
@@ -10,6 +11,7 @@
 import glob
 import json
 import os
+import random
 
 import dpdata
 from dpgen import dlog
@@ -17,14 +19,79 @@
 from dpgen.generator.run import create_path, make_fp_task_name
 from dpgen.util import sepline
 
-build_path = "00.build"
-fp_path = "01.fp"
-data_path = "02.data"
+reaxff_path = "00.reaxff"
+build_path = "01.build"
+fp_path = "02.fp"
+data_path = "03.data"
 
 trj_path = "lammpstrj"
+ff_path = "ffield.reax"
+data_path = "data.init"
+control_path = "lmp_control"
+lmp_path = "in.lmp"
 dataset_name = "dpgen_init"
 
 
+def link_reaxff(jdata):
+    create_path(reaxff_path)
+    task_path = os.path.join(reaxff_path, "task.000")
+    create_path(task_path)
+
+    rdata = jdata['reaxff']
+    os.symlink(os.path.abspath(rdata["data"]), os.path.abspath(
+        os.path.join(task_path, data_path)))
+    os.symlink(os.path.abspath(rdata["ff"]), os.path.abspath(
+        os.path.join(task_path, ff_path)))
+    os.symlink(os.path.abspath(rdata["control"]), os.path.abspath(
+        os.path.join(task_path, control_path)))
+    with open(os.path.join(task_path, lmp_path)) as f:
+        f.write(make_lmp(jdata))
+
+
+def make_lmp(jdata):
+    rdata = jdata['reaxff']
+    lmp_string = """units real
+atom_style charge
+read_data data.init
+pair_style reax/c lmp_control
+pair_coeff * * ffield.reax.cho {type_map}
+velocity all create {temp} {rand}
+fix 1 all nvt temp {temp} {temp} {tau_t}
+fix 2 all qeq/reax 1 0.0 10.0 1.0e-6 reax/c
+dump 1 all custom {dump_freq} lammpstrj id type x y z 
+timestep {dt}
+run	{nstep}
+""".format(
+        type_map=" ".join(jdata['type_map']),
+        temp=rdata['temp'],
+        rand=random.randrange(1000000-1)+1,
+        tau_t=rdata['tau_t'],
+        dump_frep=rdata['dump_freq'],
+        dt=rdata['dt'],
+        nstep=rdata['nstep']
+    )
+    return lmp_string
+
+
+def run_reaxff(jdata, mdata, dispatcher, log_file="reaxff_log"):
+    work_path = reaxff_path
+    reaxff_command = "{} -in {}".format(mdata["reaxff_command"], lmp_path)
+    run_tasks = glob.glob(os.path.join(work_path, 'task.*'))
+    run_tasks.sort()
+    run_tasks = [os.path.basename(ii) for ii in run_tasks]
+
+    dispatcher.run_jobs(mdata['reaxff_resources'],
+                        [reaxff_command],
+                        work_path,
+                        run_tasks,
+                        1,
+                        [],
+                        [ff_path, data_path, control_path, lmp_path],
+                        [trj_path],
+                        outlog=log_file,
+                        errlog=log_file)
+
+
 def link_trj(jdata):
     """link lammpstrj"""
     create_path(build_path)
@@ -35,7 +102,7 @@ def link_trj(jdata):
         os.path.join(task_path, trj_path)))
 
 
-def run_build_dataset(jdata, mdata, dispatcher, log_file="log"):
+def run_build_dataset(jdata, mdata, dispatcher, log_file="build_log"):
     work_path = build_path
     build_command = "{cmd} -n {dataset_name} -a {type_map} -d {lammpstrj} -c {cutoff} -i {interval} -s {dataset_size} -k \"{qmkeywords}\" --nprocjob {nprocjob} --nproc {nproc}".format(
         cmd=mdata["build_command"],
@@ -134,7 +201,7 @@ def gen_init_reaction(args):
 
     record = "record.reaction"
     iter_rec = -1
-    numb_task = 5
+    numb_task = 7
     if os.path.isfile(record):
         with open(record) as frec:
             for line in frec:
@@ -147,14 +214,18 @@ def gen_init_reaction(args):
         elif ii == 0:
             link_trj(jdata)
         elif ii == 1:
+            link_trj(jdata)
+        elif ii == 2:
+            link_trj(jdata)
+        elif ii == 3:
             dispatcher = make_dispatcher(mdata["build_machine"])
             run_build_dataset(jdata, mdata, dispatcher)
-        elif ii == 2:
+        elif ii == 4:
             link_fp_input()
-        elif ii == 3:
+        elif ii == 5:
             dispatcher = make_dispatcher(mdata["fp_machine"])
             run_fp(jdata, mdata, dispatcher)
-        elif ii == 4:
+        elif ii == 6:
             convert_data(jdata)
         with open(record, "a") as frec:
             frec.write(str(ii)+'\n')

From 051e0c5d4eb3f88aefbd07957c118e2de09a1ec1 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Fri, 15 Nov 2019 12:58:41 -0500
Subject: [PATCH 017/109] remove interval

---
 dpgen/data/reaction.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/dpgen/data/reaction.py b/dpgen/data/reaction.py
index a609e33b3..f8ea38c82 100644
--- a/dpgen/data/reaction.py
+++ b/dpgen/data/reaction.py
@@ -104,12 +104,11 @@ def link_trj(jdata):
 
 def run_build_dataset(jdata, mdata, dispatcher, log_file="build_log"):
     work_path = build_path
-    build_command = "{cmd} -n {dataset_name} -a {type_map} -d {lammpstrj} -c {cutoff} -i {interval} -s {dataset_size} -k \"{qmkeywords}\" --nprocjob {nprocjob} --nproc {nproc}".format(
+    build_command = "{cmd} -n {dataset_name} -a {type_map} -d {lammpstrj} -c {cutoff} -s {dataset_size} -k \"{qmkeywords}\" --nprocjob {nprocjob} --nproc {nproc}".format(
         cmd=mdata["build_command"],
         type_map=" ".join(jdata["type_map"]),
         lammpstrj=trj_path,
         cutoff=jdata["cutoff"],
-        interval=jdata["interval"],
         dataset_size=jdata["dataset_size"],
         qmkeywords=jdata["qmkeywords"],
         nprocjob=mdata["fp_resources"]["task_per_node"],

From a3dde9db38c405c5e4121b46d38ca5e69dfa9dd3 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Fri, 15 Nov 2019 12:59:38 -0500
Subject: [PATCH 018/109] fix machine

---
 dpgen/data/reaction.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/dpgen/data/reaction.py b/dpgen/data/reaction.py
index f8ea38c82..a37842315 100644
--- a/dpgen/data/reaction.py
+++ b/dpgen/data/reaction.py
@@ -211,9 +211,10 @@ def gen_init_reaction(args):
         if ii <= iter_rec:
             continue
         elif ii == 0:
-            link_trj(jdata)
+            link_reaxff(jdata)
         elif ii == 1:
-            link_trj(jdata)
+            dispatcher = make_dispatcher(mdata["reaxff_machine"])
+            run_reaxff(jdata, mdata, dispatcher)
         elif ii == 2:
             link_trj(jdata)
         elif ii == 3:

From 36c493a5a5543b0d0fb394ae6369702ef420e38b Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Fri, 15 Nov 2019 13:34:34 -0500
Subject: [PATCH 019/109] fix bug

---
 dpgen/data/reaction.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/dpgen/data/reaction.py b/dpgen/data/reaction.py
index a37842315..5e1f4f120 100644
--- a/dpgen/data/reaction.py
+++ b/dpgen/data/reaction.py
@@ -26,7 +26,7 @@
 
 trj_path = "lammpstrj"
 ff_path = "ffield.reax"
-data_path = "data.init"
+data_init_path = "data.init"
 control_path = "lmp_control"
 lmp_path = "in.lmp"
 dataset_name = "dpgen_init"
@@ -39,12 +39,12 @@ def link_reaxff(jdata):
 
     rdata = jdata['reaxff']
     os.symlink(os.path.abspath(rdata["data"]), os.path.abspath(
-        os.path.join(task_path, data_path)))
+        os.path.join(task_path, data_init_path)))
     os.symlink(os.path.abspath(rdata["ff"]), os.path.abspath(
         os.path.join(task_path, ff_path)))
     os.symlink(os.path.abspath(rdata["control"]), os.path.abspath(
         os.path.join(task_path, control_path)))
-    with open(os.path.join(task_path, lmp_path)) as f:
+    with open(os.path.join(task_path, lmp_path), 'w') as f:
         f.write(make_lmp(jdata))
 
 
@@ -54,7 +54,7 @@ def make_lmp(jdata):
 atom_style charge
 read_data data.init
 pair_style reax/c lmp_control
-pair_coeff * * ffield.reax.cho {type_map}
+pair_coeff * * ffield.reax {type_map}
 velocity all create {temp} {rand}
 fix 1 all nvt temp {temp} {temp} {tau_t}
 fix 2 all qeq/reax 1 0.0 10.0 1.0e-6 reax/c
@@ -66,7 +66,7 @@ def make_lmp(jdata):
         temp=rdata['temp'],
         rand=random.randrange(1000000-1)+1,
         tau_t=rdata['tau_t'],
-        dump_frep=rdata['dump_freq'],
+        dump_freq=rdata['dump_freq'],
         dt=rdata['dt'],
         nstep=rdata['nstep']
     )
@@ -86,7 +86,7 @@ def run_reaxff(jdata, mdata, dispatcher, log_file="reaxff_log"):
                         run_tasks,
                         1,
                         [],
-                        [ff_path, data_path, control_path, lmp_path],
+                        [ff_path, data_init_path, control_path, lmp_path],
                         [trj_path],
                         outlog=log_file,
                         errlog=log_file)
@@ -98,7 +98,7 @@ def link_trj(jdata):
     task_path = os.path.join(build_path, "task.000")
     create_path(task_path)
 
-    os.symlink(os.path.abspath(jdata["lammpstrj"]), os.path.abspath(
+    os.symlink(os.path.abspath(os.path.join(reaxff_path, "task.000", trj_path)), os.path.abspath(
         os.path.join(task_path, trj_path)))
 
 

From 8a653d2fab4d999b383f20f8537062fb385f7b59 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Fri, 15 Nov 2019 13:51:07 -0500
Subject: [PATCH 020/109] add example for init_reaction

---
 examples/init/reaction.json | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)
 create mode 100644 examples/init/reaction.json

diff --git a/examples/init/reaction.json b/examples/init/reaction.json
new file mode 100644
index 000000000..46e327a9f
--- /dev/null
+++ b/examples/init/reaction.json
@@ -0,0 +1,19 @@
+{
+    "type_map": [
+        "H",
+        "O"
+    ],
+    "reaxff": {
+        "data": "data.hydrogen",
+        "ff": "ffield.reax.cho",
+        "control": "lmp_control",
+        "temp": 3000,
+        "tau_t": 100,
+        "dt": 0.1,
+        "nstep": 10000,
+        "dump_freq": 100
+    },
+    "cutoff": 3.5,
+    "dataset_size": 100,
+    "qmkeywords": "b3lyp/6-31g** force"
+}
\ No newline at end of file

From 841767a9ea4bdd14e6d42867203351cb5973ae47 Mon Sep 17 00:00:00 2001
From: Han Wang <wang_han@iapcm.ac.cn>
Date: Wed, 20 Nov 2019 09:11:11 +0800
Subject: [PATCH 021/109] apply the changes from cwj

---
 README.md                               |  12 +-
 dpgen/auto_test/cmpt_00_equi.py         |  45 ++--
 dpgen/auto_test/cmpt_03_vacancy.py      |  56 +++--
 dpgen/auto_test/cmpt_04_interstitial.py |  46 ++--
 dpgen/auto_test/lib/lammps.py           |   2 +-
 dpgen/auto_test/lib/vasp.py             |   2 +-
 dpgen/auto_test/run.py                  | 270 +++++++++---------------
 examples/.DS_Store                      | Bin 8196 -> 0 bytes
 examples/test/deepmd_param.json         | 102 ++++-----
 examples/test/meam_param.json           | 115 +++++-----
 examples/test/vasp_param.json           | 106 +++++-----
 examples/test/vasp_poscar_param.json    |  75 +++----
 12 files changed, 407 insertions(+), 424 deletions(-)
 delete mode 100644 examples/.DS_Store

diff --git a/README.md b/README.md
index 3ebd81725..bae195ce2 100644
--- a/README.md
+++ b/README.md
@@ -58,7 +58,7 @@ One can download the source code of dpgen by
 ```bash
 git clone https://github.com/deepmodeling/dpgen.git
 ```
-then you may install DP-GEN easily by: 
+then you may install DP-GEN easily by:
 ```bash
 cd dpgen
 pip install --user .
@@ -123,7 +123,7 @@ You may prepare initial data for bulk systems with VASP by:
 ```bash
 dpgen init_bulk PARAM [MACHINE]
 ```
-The MACHINE configure file is optional. If this parameter exists, then the optimization 
+The MACHINE configure file is optional. If this parameter exists, then the optimization
 tasks or MD tasks will be submitted automatically according to MACHINE.json.
 
 Basically `init_bulk` can be devided into four parts , denoted as `stages` in `PARAM`:
@@ -273,7 +273,7 @@ The bold notation of key (such as **Elements**) means that it's a necessary key.
 | **layer_numb** | Integer | 3 | Number of equavilent layers of slab.
 | **vacuum_max** | Float | 9 | Maximal thickness of vacuum (Angstrom).
 | **vacuum_resol** | List of float | [0.5, 1 ] | Interval of thichness of vacuum. If size of `vacuum_resol` is 1, the interval is fixed to its value. If size of `vacuum_resol` is 2, the interval is `vacuum_resol[0]` before `mid_point`, otherwise `vacuum_resol[1]` after `mid_point`.
-| **millers** | List of list of Integer | [[1,0,0]] | Miller indices. 
+| **millers** | List of list of Integer | [[1,0,0]] | Miller indices.
 | relax_incar | String | "....../INCAR" | Path of INCAR for relaxation in VASP. **Necessary** if `stages` include 1.
 | **scale** | List of float | [0.980, 1.000, 1.020] | Scales for transforming cells.
 | **skip_relax** | Boolean | False | If it's true, you may directly run stage 2 (pertub and scale) using an unrelaxed POSCAR.
@@ -580,9 +580,9 @@ The second part is the computational settings for vasp and lammps. According to
 The last part is the optional settings for various tasks mentioned above. You can change the parameters according to actual needs.
 ```json
     "_comment":"00.equi",
-    "store_stable":true,
+    "alloy_shift":false,
 ```
-+ `store_stable`:(boolean) whether to store the stable energy and volume
++ `alloy_shift`:(boolean) whether to compute the alloy formation energy. If you test alloy and set 'true', you need to compute the energies of corresponding elements respectively first of ßall. Please set 'false' when test single element.
 
 ```json
     "_comment": "01.eos",
@@ -763,7 +763,7 @@ mem_limit | Interger | 16 | Maximal memory permitted to apply for the job.
     - Index of `sys_configs` and `sys_idx`
 
 2. Please verify the directories of `sys_configs`. If there isnt's any POSCAR for `01.model_devi` in one iteration, it may happen that you write the false path of `sys_configs`.
-3. Correct format of JSON file. 
+3. Correct format of JSON file.
 4. In `02.fp`, total cores you require through `task_per_node` should be devided by `npar` times `kpar`.
 5. The frames of one system should be larger than `batch_size` and `numb_test` in `default_training_param`. It happens that one iteration adds only a few structures and causes error in next iteration's training. In this condition, you may let `fp_task_min` be larger than `numb_test`.
 ## License
diff --git a/dpgen/auto_test/cmpt_00_equi.py b/dpgen/auto_test/cmpt_00_equi.py
index a716eb0c6..cffa8faf5 100755
--- a/dpgen/auto_test/cmpt_00_equi.py
+++ b/dpgen/auto_test/cmpt_00_equi.py
@@ -21,40 +21,36 @@ def comput_e_shift(poscar, task_name) :
             ener_shift += a_natoms[ii] * ener
     return ener_shift
 
-def comput_lmp_nev(conf_dir, task_name, write_stable = False) :
+def comput_lmp_nev(conf_dir, task_name,write_shift = False) :
     conf_path = re.sub('confs', global_equi_name, conf_dir)
     conf_path = os.path.abspath(conf_path)
     poscar = os.path.join(conf_path, 'POSCAR')
-    if write_stable :
-        ele_types = vasp.get_poscar_types(poscar) 
-        if len(ele_types) > 1 :
-            raise RuntimeError('stable energy and volume only for one element, current you have %s from POSCAR' % str(ele_types))
-    ener_shift = comput_e_shift(poscar, task_name)
+    ele_types = vasp.get_poscar_types(poscar)
 
     lmp_path = os.path.join(conf_path, task_name)
     log_lammps = os.path.join(lmp_path, 'log.lammps')
     if os.path.isfile(log_lammps):
         natoms, epa, vpa = lammps.get_nev(log_lammps)
-        epa = (epa * natoms - ener_shift) / natoms
-        if write_stable :
+        if write_shift and len(ele_types)>1:
+            ener_shift = comput_e_shift(poscar, task_name)
+            shift = (epa * natoms - ener_shift) / natoms
+            return natoms,epa,vpa,shift
+        if len(ele_types)==1:
             stable_dir = 'stables'
             os.makedirs(stable_dir, exist_ok=True)
             name_prefix=os.path.join(stable_dir,'%s.%s' % (ele_types[0], task_name))
             open(name_prefix + '.e', 'w').write('%.16f\n' % (epa))
             open(name_prefix + '.v', 'w').write('%.16f\n' % (vpa))
-        return natoms, epa, vpa
+        return natoms, epa, vpa , None
     else :
-        return None, None, None
+        return None, None, None, None
+
+def comput_vasp_nev(jdata, conf_dir, write_shift = False) :
 
-def comput_vasp_nev(jdata, conf_dir, write_stable = False) :
-    
     conf_path = re.sub('confs', global_equi_name, conf_dir)
     conf_path = os.path.abspath(conf_path)
     poscar = os.path.join(conf_path, 'POSCAR')
-    if write_stable :
-        ele_types = vasp.get_poscar_types(poscar) 
-        if len(ele_types) > 1 :
-            raise RuntimeError('stable energy and volume only for one element, current you have %s from POSCAR' % str(ele_types))
+    ele_types = vasp.get_poscar_types(poscar)
 
     if 'relax_incar' in jdata.keys():
         vasp_str='vasp-relax_incar'
@@ -62,7 +58,6 @@ def comput_vasp_nev(jdata, conf_dir, write_stable = False) :
         kspacing = jdata['vasp_params']['kspacing']
         vasp_str='vasp-k%.2f' % kspacing
 
-    ener_shift = comput_e_shift(poscar, vasp_str)
     vasp_path = os.path.join(conf_path, vasp_str)
     outcar = os.path.join(vasp_path, 'OUTCAR')
     # tag_fin = os.path.join(vasp_path, 'tag_finished')
@@ -72,22 +67,25 @@ def comput_vasp_nev(jdata, conf_dir, write_stable = False) :
         warnings.warn("incomplete job "+vasp_path+" use the last frame")
     if os.path.isfile(outcar):
         natoms, epa, vpa = vasp.get_nev(outcar)
-        epa = (epa * natoms - ener_shift) / natoms
-        if write_stable :
+        if write_shift and len(ele_types)>1:
+            ener_shift = comput_e_shift(poscar, vasp_str)
+            shift = (epa * natoms - ener_shift) / natoms
+            return natoms,epa,vpa,shift
+        if len(ele_types)==1:
             stable_dir = 'stables'
             os.makedirs(stable_dir, exist_ok=True)
             name_prefix=os.path.join(stable_dir,'%s.'% (ele_types[0])+vasp_str)
             open(name_prefix + '.e', 'w').write('%.16f\n' % (epa))
             open(name_prefix + '.v', 'w').write('%.16f\n' % (vpa))
-        return natoms, epa, vpa
+        return natoms, epa, vpa, None
     else :
-        return None, None, None
+        return None, None, None, None
 
 def _main():
     parser = argparse.ArgumentParser(
         description="cmpt 00.equi")
-    parser.add_argument('TASK', type=str, 
-                        choices = ['all', 'vasp', 'deepmd', 'meam'], 
+    parser.add_argument('TASK', type=str,
+                        choices = ['all', 'vasp', 'deepmd', 'meam'],
                         help='the task of generation, vasp or lammps')
     parser.add_argument('PARAM', type=str,
                         help='the json param')
@@ -120,4 +118,3 @@ def _main():
 
 if __name__ == '__main__' :
     _main()
-    
diff --git a/dpgen/auto_test/cmpt_03_vacancy.py b/dpgen/auto_test/cmpt_03_vacancy.py
index 0c3c201e5..a53602d38 100755
--- a/dpgen/auto_test/cmpt_03_vacancy.py
+++ b/dpgen/auto_test/cmpt_03_vacancy.py
@@ -23,7 +23,7 @@ def comput_e_shift(poscar, task_name) :
         ref_e_file = os.path.join('stables', ref_e_file)
         ener = float(open(ref_e_file).read())
         ener_shift += a_natoms[ii] * ener
-    return ener_shift 
+    return ener_shift
 
 def cmpt_vasp(jdata, conf_dir, supercell) :
 
@@ -37,7 +37,7 @@ def cmpt_vasp(jdata, conf_dir, supercell) :
     equi_path = os.path.join(equi_path, vasp_str)
     equi_path = os.path.abspath(equi_path)
     equi_outcar = os.path.join(equi_path, 'OUTCAR')
-    task_path = re.sub('confs', global_task_name, conf_dir)    
+    task_path = re.sub('confs', global_task_name, conf_dir)
     task_path = os.path.join(task_path, vasp_str)
     task_path = os.path.abspath(task_path)
     print("# ", task_path)
@@ -51,17 +51,23 @@ def cmpt_vasp(jdata, conf_dir, supercell) :
     if len(struct_path_list) == 0:
         print("# cannot find results for conf %s supercell %s" % (conf_dir, supercell))
     sys.stdout.write ("Structure: \tVac_E(eV)  E(eV) equi_E(eV)\n")
-    for ii in struct_path_list :
-        struct_poscar = os.path.join(ii, 'POSCAR')
-        energy_shift = comput_e_shift(struct_poscar, vasp_str)
-        structure_dir = os.path.basename(ii)
-        outcar = os.path.join(ii, 'OUTCAR')
-        natoms, epa, vpa = vasp.get_nev(outcar)
-        evac = epa * natoms - equi_epa * natoms
-        sys.stdout.write ("%s: %7.3f  %7.3f %7.3f \n" % (structure_dir, evac, epa * natoms, equi_epa*natoms))
+    result = os.path.join(task_path,'result')
+    with open(result,'w') as fp:
+        fp.write('conf_dir:%s\n'% (conf_dir))
+        fp.write("Structure: \tVac_E(eV)  E(eV) equi_E(eV)\n")
+        for ii in struct_path_list :
+            struct_poscar = os.path.join(ii, 'POSCAR')
+            #energy_shift = comput_e_shift(struct_poscar, vasp_str)
+            structure_dir = os.path.basename(ii)
+            outcar = os.path.join(ii, 'OUTCAR')
+            natoms, epa, vpa = vasp.get_nev(outcar)
+            evac = epa * natoms - equi_epa * natoms
+            sys.stdout.write ("%s: %7.3f  %7.3f %7.3f \n" % (structure_dir, evac, epa * natoms, equi_epa*natoms))
+            fp.write("%s: %7.3f  %7.3f %7.3f \n" % (structure_dir, evac, epa * natoms, equi_epa*natoms))
+    fp.close()
         # evac = epa * natoms - energy_shift
         # sys.stdout.write ("%s: %7.3f  %7.3f %7.3f \n" % (structure_dir, evac, epa * natoms, energy_shift))
-        # sys.stdout.write ("%s: %7.3f \n" % (structure_dir, evac))    
+        # sys.stdout.write ("%s: %7.3f \n" % (structure_dir, evac))
 
 def cmpt_deepmd_lammps(jdata, conf_dir, supercell, task_name) :
     equi_path = re.sub('confs', global_equi_name, conf_dir)
@@ -82,14 +88,20 @@ def cmpt_deepmd_lammps(jdata, conf_dir, supercell, task_name) :
     if len(struct_path_list) == 0:
         print("# cannot find results for conf %s supercell %s" % (conf_dir, supercell))
     sys.stdout.write ("Structure: \tVac_E(eV)  E(eV) equi_E(eV)\n")
-    for ii in struct_path_list :
-        struct_poscar = os.path.join(ii, 'POSCAR')
-        energy_shift = comput_e_shift(struct_poscar, task_name)
-        structure_dir = os.path.basename(ii)
-        lmp_log = os.path.join(ii, 'log.lammps')
-        natoms, epa, vpa = lammps.get_nev(lmp_log)
-        evac = epa * natoms - equi_epa * natoms
-        sys.stdout.write ("%s: %7.3f  %7.3f %7.3f \n" % (structure_dir, evac, epa * natoms, equi_epa * natoms))
+    result = os.path.join(task_path,'result')
+    with open(result,'w') as fp:
+        fp.write('conf_dir:%s\n'% (conf_dir))
+        fp.write("Structure: \tVac_E(eV)  E(eV) equi_E(eV)\n")
+        for ii in struct_path_list :
+            struct_poscar = os.path.join(ii, 'POSCAR')
+            #energy_shift = comput_e_shift(struct_poscar, task_name)
+            structure_dir = os.path.basename(ii)
+            lmp_log = os.path.join(ii, 'log.lammps')
+            natoms, epa, vpa = lammps.get_nev(lmp_log)
+            evac = epa * natoms - equi_epa * natoms
+            sys.stdout.write ("%s: %7.3f  %7.3f %7.3f \n" % (structure_dir, evac, epa * natoms, equi_epa * natoms))
+            fp.write("%s: %7.3f  %7.3f %7.3f \n" % (structure_dir, evac, epa * natoms, equi_epa*natoms))
+    fp.close()
         # evac = epa * natoms - energy_shift
         # sys.stdout.write ("%s: %7.3f  %7.3f %7.3f \n" % (structure_dir, evac, epa * natoms, energy_shift))
         # sys.stdout.write ("%s: %7.3f\n" % (structure_dir, evac))
@@ -112,15 +124,13 @@ def _main() :
 
 #    print('# generate %s task with conf %s' % (args.TASK, args.CONF))
     if args.TASK == 'vasp':
-        cmpt_vasp(jdata, args.CONF, args.COPY)               
+        cmpt_vasp(jdata, args.CONF, args.COPY)
     elif args.TASK == 'deepmd' :
         cmpt_deepmd_lammps(jdata, args.CONF, args.COPY, args.TASK)
     elif args.TASK == 'meam' :
         cmpt_deepmd_lammps(jdata, args.CONF, args.COPY, args.TASK)
     else :
         raise RuntimeError("unknow task ", args.TASK)
-    
+
 if __name__ == '__main__' :
     _main()
-
-    
diff --git a/dpgen/auto_test/cmpt_04_interstitial.py b/dpgen/auto_test/cmpt_04_interstitial.py
index c87ef741e..6b029909c 100755
--- a/dpgen/auto_test/cmpt_04_interstitial.py
+++ b/dpgen/auto_test/cmpt_04_interstitial.py
@@ -41,13 +41,19 @@ def _cmpt_vasp(jdata, conf_dir, supercell, insert_ele) :
     struct_path_list.sort()
     if len(struct_path_list) == 0:
         print("# cannot find results for conf %s supercell %s" % (conf_dir, supercell))
-    sys.stdout.write ("Insert_ele-Struct: Inter_E(eV)\n")
-    for ii in struct_path_list :
-        structure_dir = os.path.basename(ii)
-        outcar = os.path.join(ii, 'OUTCAR')
-        natoms, epa, vpa = vasp.get_nev(outcar)
-        evac = epa * natoms - equi_epa * natoms
-        sys.stdout.write ("%s: %7.3f \n" % (structure_dir, evac))    
+    sys.stdout.write ("Insert_ele-Struct: Inter_E(eV)  E(eV) equi_E(eV)\n")
+    result = os.path.join(task_path,'result')
+    with open(result,'w') as fp:
+        fp.write('conf_dir:%s\n'% (conf_dir))
+        fp.write ("Insert_ele-Struct: Inter_E(eV)  E(eV) equi_E(eV)\n")
+        for ii in struct_path_list :
+            structure_dir = os.path.basename(ii)
+            outcar = os.path.join(ii, 'OUTCAR')
+            natoms, epa, vpa = vasp.get_nev(outcar)
+            evac = epa * natoms - equi_epa * natoms
+            sys.stdout.write ("%s: %7.3f  %7.3f %7.3f \n" % (structure_dir, evac, epa * natoms, equi_epa * natoms))
+            fp.write ("%s: %7.3f  %7.3f %7.3f \n" % (structure_dir, evac, epa * natoms, equi_epa * natoms))
+    fp.close()
 
 def cmpt_deepmd_reprod_traj(jdata, conf_dir, supercell, insert_ele, task_name) :
     for ii in insert_ele:
@@ -63,7 +69,7 @@ def _cmpt_deepmd_reprod_traj(jdata, conf_dir, supercell, insert_ele, task_name)
     conf_path = os.path.abspath(conf_dir)
     task_path = re.sub('confs', global_task_name, conf_path)
     vasp_path = os.path.join(task_path, vasp_str)
-    lmps_path = os.path.join(task_path, task_name + vasp_str.replace('vasp',''))    
+    lmps_path = os.path.join(task_path, task_name + vasp_str.replace('vasp',''))
     copy_str = "%sx%sx%s" % (supercell[0], supercell[1], supercell[2])
     struct_widecard = os.path.join(vasp_path, 'struct-%s-%s-*' % (insert_ele,copy_str))
     vasp_struct = glob.glob(struct_widecard)
@@ -122,12 +128,18 @@ def _cmpt_deepmd_lammps(jdata, conf_dir, supercell, insert_ele, task_name) :
     if len(struct_path_list) == 0:
         print("# cannot find results for conf %s supercell %s" % (conf_dir, supercell))
     sys.stdout.write ("Insert_ele-Struct: Inter_E(eV)  E(eV) equi_E(eV)\n")
-    for ii in struct_path_list :
-        structure_dir = os.path.basename(ii)
-        lmp_log = os.path.join(ii, 'log.lammps')
-        natoms, epa, vpa = lammps.get_nev(lmp_log)
-        evac = epa * natoms - equi_epa * natoms
-        sys.stdout.write ("%s: %7.3f  %7.3f %7.3f \n" % (structure_dir, evac, epa * natoms, equi_epa * natoms))
+    result = os.path.join(task_path,'result')
+    with open(result,'w') as fp:
+        fp.write('conf_dir:%s\n'% (conf_dir))
+        fp.write ("Insert_ele-Struct: Inter_E(eV)  E(eV) equi_E(eV)\n")
+        for ii in struct_path_list :
+            structure_dir = os.path.basename(ii)
+            lmp_log = os.path.join(ii, 'log.lammps')
+            natoms, epa, vpa = lammps.get_nev(lmp_log)
+            evac = epa * natoms - equi_epa * natoms
+            sys.stdout.write ("%s: %7.3f  %7.3f %7.3f \n" % (structure_dir, evac, epa * natoms, equi_epa * natoms))
+            fp.write ("%s: %7.3f  %7.3f %7.3f \n" % (structure_dir, evac, epa * natoms, equi_epa * natoms))
+    fp.close()
 
 def _main() :
     parser = argparse.ArgumentParser(
@@ -149,7 +161,7 @@ def _main() :
 
 #    print('# generate %s task with conf %s' % (args.TASK, args.CONF))
     if args.TASK == 'vasp':
-        cmpt_vasp(jdata, args.CONF, args.COPY, args.ELEMENT)               
+        cmpt_vasp(jdata, args.CONF, args.COPY, args.ELEMENT)
     elif args.TASK == 'deepmd' :
         cmpt_deepmd_lammps(jdata, args.CONF, args.COPY, args.ELEMENT, args.TASK)
     elif args.TASK == 'deepmd-reprod' :
@@ -160,8 +172,6 @@ def _main() :
         cmpt_deepmd_reprod_traj(jdata, args.CONF, args.COPY, args.ELEMENT, args.TASK)
     else :
         raise RuntimeError("unknow task ", args.TASK)
-    
+
 if __name__ == '__main__' :
     _main()
-
-    
diff --git a/dpgen/auto_test/lib/lammps.py b/dpgen/auto_test/lib/lammps.py
index f3788067f..b6a21f44a 100644
--- a/dpgen/auto_test/lib/lammps.py
+++ b/dpgen/auto_test/lib/lammps.py
@@ -395,7 +395,7 @@ def poscar_from_last_dump(dump, poscar_out, deepmd_type_map) :
     os.remove('tmp_dump')
     with open(poscar_out, 'r') as fp:
         lines = fp.read().split('\n')
-    types = [ deepmd_type_map[int(ii.split('_')[1])-1] for ii in lines[5].split()]
+    types = [ deepmd_type_map[int(ii.split('_')[1])] for ii in lines[5].split()]
     lines[5] = " ".join(types)
     with open(poscar_out, 'w') as fp:
         lines = fp.write("\n".join(lines))
diff --git a/dpgen/auto_test/lib/vasp.py b/dpgen/auto_test/lib/vasp.py
index 5022e4b74..f2610de82 100644
--- a/dpgen/auto_test/lib/vasp.py
+++ b/dpgen/auto_test/lib/vasp.py
@@ -148,7 +148,7 @@ def get_nev(fname) :
         ener = _get_energies(lines)[-1]
         return natoms, ener/natoms, vol/natoms
     except OutcarItemError:
-        return natoms, None, None
+        raise OutcarItemError("cannot find the result, please check the OUTCAR")
     # print(fname, natoms, vol, ener)
 
 def get_stress(fname) :
diff --git a/dpgen/auto_test/run.py b/dpgen/auto_test/run.py
index ce76ca69d..4b92664e6 100644
--- a/dpgen/auto_test/run.py
+++ b/dpgen/auto_test/run.py
@@ -29,7 +29,7 @@
 from dpgen.auto_test.lib.utils import create_path
 from dpgen.auto_test.lib.utils import copy_file_list
 from dpgen.auto_test.lib.utils import replace
-
+from dpgen.dispatcher.Dispatcher import make_dispatcher
 from dpgen.auto_test.lib.utils import log_iter
 from dpgen.auto_test.lib.utils import record_iter
 from dpgen.auto_test.lib.utils import log_iter
@@ -51,69 +51,6 @@
 
 lammps_task_type=['deepmd','meam','eam']
 
-def _run(machine,
-         machine_type,
-         ssh_sess,
-         resources,
-         command,
-         work_path,
-         run_tasks,
-         group_size,
-         common_files,
-         forward_files,
-         backward_files):
-
-    print("group_size",group_size)
-    if ssh_sess == None and machine_type == 'ucloud':
-        print("The first situation!")
-        ucloud_submit_jobs(machine,
-                            resources,
-                            command,
-                            work_path,
-                            run_tasks,
-                            group_size,
-                            common_files,
-                            forward_files,
-                            backward_files)
-    elif machine_type == 'slurm' :
-        print("The second situation!")
-        group_slurm_jobs(ssh_sess,
-                           resources,
-                           command,
-                           work_path,
-                           run_tasks,
-                           group_size,
-                           common_files,
-                           forward_files,
-                           backward_files,
-                           forward_task_deference =False)
-    elif machine_type == 'pbs' :
-        group_slurm_jobs(ssh_sess,
-                           resources,
-                           command,
-                           work_path,
-                           run_tasks,
-                           group_size,
-                           common_files,
-                           forward_files,
-                           backward_files,
-                          remote_job = PBSJob,
-                          forward_task_deference =False)
-    elif machine_type == 'local' :
-        group_local_jobs(ssh_sess,
-                           resources,
-                           command,
-                           work_path,
-                           run_tasks,
-                           group_size,
-                           common_files,
-                           forward_files,
-                           backward_files)
-    else :
-        raise RuntimeError("unknow machine type")
-
-
-
 def gen_equi(task_type,jdata,mdata):
     conf_dir=jdata['conf_dir']
     cwd=os.getcwd()
@@ -138,7 +75,7 @@ def run_equi(task_type,jdata,mdata):
         mdata=decide_fp_machine(mdata)
 
         forward_files = ['INCAR', 'POTCAR']
-        backward_files = ['OUTCAR','CONTCAR','OSZICAR']
+        backward_files = ['OUTCAR', 'log' , 'CONTCAR','OSZICAR']
         common_files=['POSCAR']
 
     #lammps
@@ -168,32 +105,36 @@ def run_equi(task_type,jdata,mdata):
     run_tasks = util.collect_task(all_task,task_type)
 
     machine,machine_type,ssh_sess,resources,command,group_size=util.get_machine_info(mdata,task_type)
+    disp = make_dispatcher(machine)
+    disp.run_jobs(resources,
+                  command,
+                  work_path,
+                  run_tasks,
+                  group_size,
+                  common_files,
+                  forward_files,
+                  backward_files,
+                  outlog='autotest.out',
+                  errlog='autotest.err')
 
-    _run(machine,
-         machine_type,
-         ssh_sess,
-         resources,
-         command,
-         work_path,
-         run_tasks,
-         group_size,
-         common_files,
-         forward_files,
-         backward_files)
 
 def cmpt_equi(task_type,jdata,mdata):
     conf_dir=jdata['conf_dir']
-    stable=jdata['store_stable']
+    cmpt_shift=jdata['alloy_shift']
     #vasp
     if task_type=="vasp":
-        n, e, v = cmpt_00_equi.comput_vasp_nev(jdata, conf_dir, stable)
+        n, e, v, s = cmpt_00_equi.comput_vasp_nev(jdata, conf_dir,cmpt_shift)
     #lammps
     elif task_type in lammps_task_type:
-        n, e, v = cmpt_00_equi.comput_lmp_nev(conf_dir, task_type, stable)
+        n, e, v, s = cmpt_00_equi.comput_lmp_nev(conf_dir, task_type,cmpt_shift)
     else :
         raise RuntimeError ("unknow task %s, something wrong" % task_type)
-    print('conf_dir:\t EpA(eV)  VpA(A^3)')
-    print("%s\t %8.4f  %7.3f " % (conf_dir, e, v))
+    if cmpt_shift:
+        print('conf_dir:\t EpA(eV)  VpA(A^3)  ener_shift(eV)')
+        print("%s\t %8.4f  %7.3f %8.4f" % (conf_dir, e, v, s))
+    else:
+        print('conf_dir:\t EpA(eV)  VpA(A^3)')
+        print("%s\t %8.4f  %7.3f " % (conf_dir, e, v))
 
 def gen_eos(task_type,jdata,mdata):
     conf_dir=jdata['conf_dir']
@@ -223,7 +164,7 @@ def run_eos(task_type,jdata,mdata):
         mdata=decide_fp_machine(mdata)
 
         forward_files = ['INCAR', 'POSCAR','POTCAR']
-        backward_files = ['OUTCAR','OSZICAR']
+        backward_files = ['OUTCAR', 'log' , 'OSZICAR']
         common_files=['INCAR','POTCAR']
 
     #lammps
@@ -252,17 +193,17 @@ def run_eos(task_type,jdata,mdata):
     run_tasks = util.collect_task(all_task,task_type)
 
     machine,machine_type,ssh_sess,resources,command,group_size=util.get_machine_info(mdata,task_type)
-    _run(machine,
-         machine_type,
-         ssh_sess,
-         resources,
-         command,
-         work_path,
-         run_tasks,
-         group_size,
-         common_files,
-         forward_files,
-         backward_files)
+    disp = make_dispatcher(machine)
+    disp.run_jobs(resources,
+                  command,
+                  work_path,
+                  run_tasks,
+                  group_size,
+                  common_files,
+                  forward_files,
+                  backward_files,
+                  outlog='autotest.out',
+                  errlog='autotest.err')
 
 def cmpt_eos(task_type,jdata,mdata):
     conf_dir=jdata['conf_dir']
@@ -299,7 +240,7 @@ def run_elastic(task_type,jdata,mdata):
         mdata=decide_fp_machine(mdata)
 
         forward_files = ['INCAR', 'POSCAR','POTCAR','KPOINTS']
-        backward_files = ['OUTCAR','CONTCAR','OSZICAR']
+        backward_files = ['OUTCAR', 'log' , 'CONTCAR','OSZICAR']
         common_files=['INCAR','POTCAR','KPOINTS']
 
     #lammps
@@ -327,17 +268,17 @@ def run_elastic(task_type,jdata,mdata):
 
     run_tasks = util.collect_task(all_task,task_type)
     machine,machine_type,ssh_sess,resources,command,group_size=util.get_machine_info(mdata,task_type)
-    _run(machine,
-         machine_type,
-         ssh_sess,
-         resources,
-         command,
-         work_path,
-         run_tasks,
-         group_size,
-         common_files,
-         forward_files,
-         backward_files)
+    disp = make_dispatcher(machine)
+    disp.run_jobs(resources,
+                  command,
+                  work_path,
+                  run_tasks,
+                  group_size,
+                  common_files,
+                  forward_files,
+                  backward_files,
+                  outlog='autotest.out',
+                  errlog='autotest.err')
 
 def cmpt_elastic(task_type,jdata,mdata):
     conf_dir=jdata['conf_dir']
@@ -372,7 +313,7 @@ def run_vacancy(task_type,jdata,mdata):
         mdata=decide_fp_machine(mdata)
 
         forward_files = ['INCAR', 'POSCAR','POTCAR']
-        backward_files = ['OUTCAR','OSZICAR']
+        backward_files = ['OUTCAR',  'log' , 'OSZICAR']
         common_files=['INCAR','POTCAR']
 
     #lammps
@@ -401,17 +342,17 @@ def run_vacancy(task_type,jdata,mdata):
 
     run_tasks = util.collect_task(all_task,task_type)
     machine,machine_type,ssh_sess,resources,command,group_size=util.get_machine_info(mdata,task_type)
-    _run(machine,
-         machine_type,
-         ssh_sess,
-         resources,
-         command,
-         work_path,
-         run_tasks,
-         group_size,
-         common_files,
-         forward_files,
-         backward_files)
+    disp = make_dispatcher(machine)
+    disp.run_jobs(resources,
+                  command,
+                  work_path,
+                  run_tasks,
+                  group_size,
+                  common_files,
+                  forward_files,
+                  backward_files,
+                  outlog='autotest.out',
+                  errlog='autotest.err')
 
 def cmpt_vacancy(task_type,jdata,mdata):
     conf_dir=jdata['conf_dir']
@@ -455,7 +396,7 @@ def run_interstitial(task_type,jdata,mdata):
         mdata=decide_fp_machine(mdata)
 
         forward_files = ['INCAR', 'POSCAR','POTCAR']
-        backward_files = ['OUTCAR','XDATCAR','OSZICAR']
+        backward_files = ['OUTCAR',  'log' , 'XDATCAR','OSZICAR']
         common_files=['INCAR']
 
     #lammps
@@ -498,37 +439,36 @@ def run_interstitial(task_type,jdata,mdata):
         raise RuntimeError ("unknow task %s, something wrong" % task_type)
 
     machine,machine_type,ssh_sess,resources,command,group_size=util.get_machine_info(mdata,task_type)
-
+    disp = make_dispatcher(machine)
     if reprod_opt:
         for ii in work_path:
             run_tasks=[]
             for jj in run_tasks_:
                 if ii in jj:
                     run_tasks.append(os.path.basename(jj))
-            _run(machine,
-             machine_type,
-             ssh_sess,
-             resources,
-             command,
-             ii,
-             run_tasks,
-             group_size,
-             common_files,
-             forward_files,
-             backward_files)
+
+            disp.run_jobs(resources,
+                          command,
+                          ii,
+                          run_tasks,
+                          group_size,
+                          common_files,
+                          forward_files,
+                          backward_files,
+                          outlog='autotest.out',
+                          errlog='autotest.err')
     else:
         run_tasks = util.collect_task(all_task,task_type)
-        _run(machine,
-             machine_type,
-             ssh_sess,
-             resources,
-             command,
-             work_path,
-             run_tasks,
-             group_size,
-             common_files,
-             forward_files,
-             backward_files)
+        disp.run_jobs(resources,
+                      command,
+                      work_path,
+                      run_tasks,
+                      group_size,
+                      common_files,
+                      forward_files,
+                      backward_files,
+                      outlog='autotest.log',
+                      errlog='autotest.log')
 
 def cmpt_interstitial(task_type,jdata,mdata):
     conf_dir=jdata['conf_dir']
@@ -577,7 +517,7 @@ def run_surf(task_type,jdata,mdata):
         mdata=decide_fp_machine(mdata)
 
         forward_files = ['INCAR', 'POSCAR','POTCAR']
-        backward_files = ['OUTCAR','OSZICAR']
+        backward_files = ['OUTCAR',  'log' , 'OSZICAR']
         common_files=['INCAR','POTCAR']
 
     #lammps
@@ -605,17 +545,17 @@ def run_surf(task_type,jdata,mdata):
 
     run_tasks = util.collect_task(all_task,task_type)
     machine,machine_type,ssh_sess,resources,command,group_size=util.get_machine_info(mdata,task_type)
-    _run(machine,
-         machine_type,
-         ssh_sess,
-         resources,
-         command,
-         work_path,
-         run_tasks,
-         group_size,
-         common_files,
-         forward_files,
-         backward_files)
+    disp = make_dispatcher(machine)
+    disp.run_jobs(resources,
+                  command,
+                  work_path,
+                  run_tasks,
+                  group_size,
+                  common_files,
+                  forward_files,
+                  backward_files,
+                  outlog='autotest.out',
+                  errlog='autotest.err')
 
 def cmpt_surf(task_type,jdata,mdata):
     conf_dir=jdata['conf_dir']
@@ -661,20 +601,20 @@ def run_phonon(task_type,jdata,mdata):
 
         run_tasks = util.collect_task(all_task,task_type)
         forward_files = ['INCAR', 'POTCAR','KPOINTS']
-        backward_files = ['OUTCAR','OSZICAR','vasprun.xml']
+        backward_files = ['OUTCAR',  'log' , 'OSZICAR','vasprun.xml']
         common_files=['POSCAR']
 
-        _run(machine,
-         machine_type,
-         ssh_sess,
-         resources,
-         command,
-         work_path,
-         run_tasks,
-         group_size,
-         common_files,
-         forward_files,
-         backward_files)
+        disp = make_dispatcher(machine)
+        disp.run_jobs(resources,
+                  command,
+                  work_path,
+                  run_tasks,
+                  group_size,
+                  common_files,
+                  forward_files,
+                  backward_files,
+                  outlog='autotest.out',
+                  errlog='autotest.err')
     #lammps
     elif task_type in lammps_task_type:
         None
diff --git a/examples/.DS_Store b/examples/.DS_Store
deleted file mode 100644
index c3ea4212bbee53087bdee5da47c738776a67b582..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 8196
zcmeHMU2IfE6h3DuaHmk2Vxhf@r7I1zVk<47rM3KQ+l5ko0<!(5Qts}(&>L^>Eqm|o
z($<=qXm}B$Mq|Xp#PEj5lSX;*#h>^Se_}973=jI^qtR&Mlkv=*yG>iVuWH~<GIM6;
zoU=3MoA1tKW&r@ZvU)Q>6#yudh2(3fSSAU*$akbj&{IMZ$sbaAcQR!;DQbUqhjrKy
zA`l`FA`l`FA`l|*e?WlFY>|{A=f2d2Wr#qCz&(k8*dJn)g^UMsPD+34pu)cbkQ7${
zKT(~F3dvX?<AI!$k~EZ1x{{Qx7+f(>x>J8Fu!{$BPD<$x7+gLu7#V{b3i_kd@5g*|
zz_^rQ86prOa3=!d^C<xj3~<3)cz;h#TBhqYHGPkf6(uWItyYv$rL?T{@#MIZPkNe{
zb-T38DbYHidE=>|-sd>hd|gWom_~k#S9CkJr<t~q@tsZGAd}HS)7G7Qmy@-1*Z-ZQ
zR458l*ch*xnQ7bA)DYXXvwgN9ChINjZL_mXsodJUxA)A%)bz}gvroMyW5QrxfOSh^
zyu2_#%guGm2a3q6g#dpj1jyJLwfFFmqhp+BUE`!-X`Y!g5}IeYV|<-Uk@lt?`$Wdb
zQB;Fz$I4FFq$y8pRys?@MyF*Z&1u6O@{9|fsExQz#_RSChfMMnePevR@IOB3nU3ua
z7#Wu&V{fh9P`zc_!%Z!Z?oZq-yKfyYU(eVGA!2*xS<}e$=S|%kAJ*KoVe6)SZZK!M
zX38@9Y;D3Q#}>wJZmg^-Q={qw1=Ug)h1V)W*_3%cYkE`u7@KQUHmcLik2$&+Md$A-
z%-7V`sOl-9>CPBb>Wbob*VU`6&!s5178o`(GS>G5EhTqp3S#2TJDJ*VX(_|vI+Jd#
zyO}zelcDk>L1)JvrVgc@i3tkPEysqSJKm|Pr^U!eHP<^nY1lNYPic5Rx*D}dQHF)K
zQ`c!$wefnn+&&Do@g~{g;xtjA0rtT`7=bj{kca2s5<Cx=;R;-bci;o~1ipZ;;A{8>
zZoyCR3;YJZ!yoV`{Ee%z0yknMR^fx#h>ze7Y{R{{ALH1Ghw(T*h9@zJqnJV+4Lpku
z=5Y$AaR#5kOZWo5h%e#mconbV+jt${!}sw9euy98m-sFIC}$Wc(Dj&X7fQTMwfXOn
z=o-AIMNThA*XY5XLy7-{uD8}oj^1CfVRLPL<IdfCI_KAN$?7enk<T4T4xcmyQuxca
zh>2po)#xTxU5w5B#5v=FB8bCROY2y4i$YxCK3UZ0)@YqVgys=n%eF_!Qd9T}X}p6q
zD@0JfQfhaxRz?)&B~shYIuv3mUnRBsSe%KEjDu`%jV74Fu^9h~vhPi}3OC?m_?%ex
z9o&YW;aB(z0aqdS85hM(SdH7T4j;np*nlnAO5AJ54ou(y?7>6Cza;kI01o0X@h|Q3
zZye8K20hFY59jb{V&SuRG2r3L_zJ#(SC+7@x4<_k@!CSx<&qiOa_n=|S77O4^RRVy
zAh1G)Q6=vG2a3P{-yOk*QG^JD2rP>LmL!siE>acS?!{d@M)?e7k%Zlxl)eiUiyi`~
jJ^zOx)njBTeByzelahKU{p$|_;rR~`p8r1g?&abiuZ7rN

diff --git a/examples/test/deepmd_param.json b/examples/test/deepmd_param.json
index 294455623..98e79afdb 100644
--- a/examples/test/deepmd_param.json
+++ b/examples/test/deepmd_param.json
@@ -1,60 +1,64 @@
 {
     "_comment": "models",
-    "potcar_map" : {
-	"Al" : "/somewhere/example/POTCAR" 
+    "potcar_map": {
+        "Al": "/somewhere/example/POTCAR"
     },
-    "conf_dir":"confs/Al/std-fcc",
-    "key_id":"key id of Material project",
-    "task_type":"deepmd",
-    "task":"all",
-
-    "vasp_params":	{
-	"ecut":		650,
-	"ediff":	1e-6,
-	"kspacing":	0.1,
-	"kgamma":	false,
-	"npar":		1,
-	"kpar":		1,
-	"_comment":	" that's all "
+    "conf_dir": "confs/Al/std-fcc",
+    "key_id": "key id of Material project",
+    "task_type": "deepmd",
+    "task": "all",
+    
+    "vasp_params": {
+        "ecut": 650,
+        "ediff": 1e-6,
+        "kspacing": 0.1,
+        "kgamma": false,
+        "npar": 1,
+        "kpar": 1,
+        "_comment": " that's all "
     },
-    "lammps_params":    {
-        "model_dir":"somewhere/example/Al_model",
-        "type_map":["Al"],
-        "model_name":false,
-        "model_param_type":false
+    "lammps_params": {
+        "model_dir": "somewhere/example/Al_model",
+        "type_map": [
+            "Al"
+        ],
+        "model_name": false,
+        "model_param_type": false
     },
-
-    "_comment":"00.equi",
-    "store_stable":true,
-
+    "_comment": "00.equi",
+    "alloy_shift": false,
     "_comment": "01.eos",
-    "vol_start":	12,
-    "vol_end":		22,
-    "vol_step":		0.5,
-
+    "vol_start": 12,
+    "vol_end": 22,
+    "vol_step": 0.5,
     "_comment": "02.elastic",
-    "norm_deform":	2e-2,
-    "shear_deform":	5e-2,
-    
-    "_comment":"03.vacancy",
-    "supercell":[3,3,3],
-
-    "_comment":"04.interstitial",
-    "insert_ele":["Al"],
-    "reprod-opt":false,
-
+    "norm_deform": 2e-2,
+    "shear_deform": 5e-2,
+    "_comment": "03.vacancy",
+    "supercell": [
+        3,
+        3,
+        3
+    ],
+    "_comment": "04.interstitial",
+    "insert_ele": [
+        "Al"
+    ],
+    "reprod-opt": false,
     "_comment": "05.surface",
-    "min_slab_size":	10,
-    "min_vacuum_size":	11,
+    "min_slab_size": 10,
+    "min_vacuum_size": 11,
     "_comment": "pert xz to work around vasp bug...",
-    "pert_xz":		0.01,
+    "pert_xz": 0.01,
     "max_miller": 2,
-    "static-opt":false,
-    "relax_box":false,    
-
-    "_comment":"06.phonon",
-    "supercell_matrix":[2,2,2],
-    "band":"0 1 0  0.5 1 0.5  0.375 0.75 0.375  0  0  0  0.5 0.5 0.5",
-
-    "_comment":	"that's all"
+    "static-opt": false,
+    "relax_box": false,
+    "_comment": "06.phonon",
+    "supercell_matrix": [
+        2,
+        2,
+        2
+    ],
+    "band": "0 1 0  0.5 1 0.5  0.375 0.75 0.375  0  0  0  0.5 0.5 0.5",
+    "_comment": "that's all"
 }
diff --git a/examples/test/meam_param.json b/examples/test/meam_param.json
index 5beff5324..f89b432e2 100644
--- a/examples/test/meam_param.json
+++ b/examples/test/meam_param.json
@@ -1,60 +1,77 @@
 {
     "_comment": "models",
-    "potcar_map" : {
-	"Al" : "/somewhere/example/POTCAR" 
+    "potcar_map": {
+        "Al": "/somewhere/example/POTCAR"
     },
-    "conf_dir":"confs/Al/std-fcc",
-    "key_id":"key id of Material project",
-    "task_type":"meam",
-    "task":"all",
-
-    "vasp_params":	{
-	"ecut":		650,
-	"ediff":	1e-6,
-	"kspacing":	0.1,
-	"kgamma":	false,
-	"npar":		1,
-	"kpar":		1,
-	"_comment":	" that's all "
+    "conf_dir": "confs/Al/std-fcc",
+    "key_id": "key id of Material project",
+    "task_type": "meam",
+    "task": "all",
+    
+    "vasp_params": {
+        "ecut": 650,
+        "ediff": 1e-6,
+        "kspacing": 0.1,
+        "kgamma": false,
+        "npar": 1,
+        "kpar": 1,
+        "_comment": " that's all "
     },
-    "lammps_params":    {
-        "model_dir":"somewhere/example/meam",
-        "type_map":["Al","Si","Mg","Cu","Fe"],
-        "model_name":["meam.AlSiMgCuFe","meam.library"],
-        "model_param_type":["AlS", "SiS", "MgS", "CuS", "FeS"]
+    "lammps_params": {
+        "model_dir": "somewhere/example/meam",
+        "type_map": [
+            "Al",
+            "Si",
+            "Mg",
+            "Cu",
+            "Fe"
+        ],
+        "model_name": [
+            "meam.AlSiMgCuFe",
+            "meam.library"
+        ],
+        "model_param_type": [
+            "AlS",
+            "SiS",
+            "MgS",
+            "CuS",
+            "FeS"
+        ]
     },
-
-    "_comment":"00.equi",
-    "store_stable":true,
-
+    "_comment": "00.equi",
+    "alloy_shift": false,
     "_comment": "01.eos",
-    "vol_start":	12,
-    "vol_end":		22,
-    "vol_step":		0.5,
-
+    "vol_start": 12,
+    "vol_end": 22,
+    "vol_step": 0.5,
     "_comment": "02.elastic",
-    "norm_deform":	2e-2,
-    "shear_deform":	5e-2,
-    
-    "_comment":"03.vacancy",
-    "supercell":[3,3,3],
-
-    "_comment":"04.interstitial",
-    "insert_ele":["Al"],
-    "reprod-opt":false,
-
+    "norm_deform": 2e-2,
+    "shear_deform": 5e-2,
+    "_comment": "03.vacancy",
+    "supercell": [
+        3,
+        3,
+        3
+    ],
+    "_comment": "04.interstitial",
+    "insert_ele": [
+        "Al"
+    ],
+    "reprod-opt": false,
     "_comment": "05.surface",
-    "min_slab_size":	10,
-    "min_vacuum_size":	11,
+    "min_slab_size": 10,
+    "min_vacuum_size": 11,
     "_comment": "pert xz to work around vasp bug...",
-    "pert_xz":		0.01,
+    "pert_xz": 0.01,
     "max_miller": 2,
-    "static-opt":false,
-    "relax_box":false,    
-
-    "_comment":"06.phonon",
-    "supercell_matrix":[2,2,2],
-    "band":"0 1 0  0.5 1 0.5  0.375 0.75 0.375  0  0  0  0.5 0.5 0.5",
-
-    "_comment":	"that's all"
+    "static-opt": false,
+    "relax_box": false,
+    "_comment": "06.phonon",
+    "supercell_matrix": [
+        2,
+        2,
+        2
+    ],
+    "band": "0 1 0  0.5 1 0.5  0.375 0.75 0.375  0  0  0  0.5 0.5 0.5",
+    "_comment": "that's all"
 }
diff --git a/examples/test/vasp_param.json b/examples/test/vasp_param.json
index 2ec2ed9df..4fab168ab 100644
--- a/examples/test/vasp_param.json
+++ b/examples/test/vasp_param.json
@@ -1,54 +1,56 @@
 {
-    "_comment": "models",
-    "potcar_map" : {
-	      "Al" : "/somewhere/example/POTCAR" 
-    },
-    "conf_dir":"confs/Al/std-fcc",
-    "key_id":"key id of Material project",
-    "task_type":"vasp",
-    "task":"all",
-
-  "vasp_params":	{
-	"ecut":		650,
-	"ediff":	1e-6,
-	"kspacing":	0.1,
-	"kgamma":	false,
-	"npar":		1,
-	"kpar":		1,
-	"_comment":	" that's all "
-    },
-
-    "_comment":"00.equi",
-    "store_stable":true,
-
-    "_comment": "01.eos",
-    "vol_start":	12,
-    "vol_end":		22,
-    "vol_step":		0.5,
-
-    "_comment": "02.elastic",
-    "norm_deform":	2e-2,
-    "shear_deform":	5e-2,
-    
-    "_comment":"03.vacancy",
-    "supercell":[3,3,3],
-
-    "_comment":"04.interstitial",
-    "insert_ele":["Al"],
-    "reprod-opt":false,
-
-    "_comment": "05.surface",
-    "min_slab_size":	10,
-    "min_vacuum_size":	11,
-    "_comment": "pert xz to work around vasp bug...",
-    "pert_xz":		0.01,
-    "max_miller": 2,
-    "static-opt":false,
-    "relax_box":false,    
-
-    "_comment":"06.phonon",
-    "supercell_matrix":[2,2,2],
-    "band":"0 1 0  0.5 1 0.5  0.375 0.75 0.375  0  0  0  0.5 0.5 0.5",
-
-    "_comment":	"that's all"
+  "_comment": "models",
+  "potcar_map": {
+    "Al": "/somewhere/example/POTCAR"
+  },
+  "conf_dir": "confs/Al/std-fcc",
+  "key_id": "key id of Material project",
+  "task_type": "vasp",
+  "task": "all",
+  
+  "vasp_params": {
+    "ecut": 650,
+    "ediff": 1e-6,
+    "kspacing": 0.1,
+    "kgamma": false,
+    "npar": 1,
+    "kpar": 1,
+    "_comment": " that's all "
+  },
+  "_comment": "00.equi",
+  "alloy_shift": false,
+  "_comment": "01.eos",
+  "vol_start": 12,
+  "vol_end": 22,
+  "vol_step": 0.5,
+  "_comment": "02.elastic",
+  "norm_deform": 2e-2,
+  "shear_deform": 5e-2,
+  "_comment": "03.vacancy",
+  "supercell": [
+    3,
+    3,
+    3
+  ],
+  "_comment": "04.interstitial",
+  "insert_ele": [
+    "Al"
+  ],
+  "reprod-opt": false,
+  "_comment": "05.surface",
+  "min_slab_size": 10,
+  "min_vacuum_size": 11,
+  "_comment": "pert xz to work around vasp bug...",
+  "pert_xz": 0.01,
+  "max_miller": 2,
+  "static-opt": false,
+  "relax_box": false,
+  "_comment": "06.phonon",
+  "supercell_matrix": [
+    2,
+    2,
+    2
+  ],
+  "band": "0 1 0  0.5 1 0.5  0.375 0.75 0.375  0  0  0  0.5 0.5 0.5",
+  "_comment": "that's all"
 }
diff --git a/examples/test/vasp_poscar_param.json b/examples/test/vasp_poscar_param.json
index eeca53d7d..7165e1dd1 100644
--- a/examples/test/vasp_poscar_param.json
+++ b/examples/test/vasp_poscar_param.json
@@ -1,47 +1,50 @@
 {
     "_comment": "models",
-    "potcar_map" : {
-	      "Al" : "/somewhere/example/POTCAR" 
+    "potcar_map": {
+        "Al": "/somewhere/example/POTCAR"
     },
-    "conf_dir":"confs/Al/std-fcc",
-    "key_id":"key id of Material project",
-    "task_type":"vasp",
-    "task":"all",
+    "conf_dir": "confs/Al/std-fcc",
+    "key_id": "key id of Material project",
+    "task_type": "vasp",
+    "task": "all",
     
-   "relax_incar":"somewhere/relax_incar",
-   "scf_incar":"somewhere/scf_incar",
-
-    "_comment":"00.equi",
-    "store_stable":true,
+    "relax_incar": "somewhere/relax_incar",
+    "scf_incar": "somewhere/scf_incar",
 
+    "_comment": "00.equi",
+    "alloy_shift": false,
     "_comment": "01.eos",
-    "vol_start":	12,
-    "vol_end":		22,
-    "vol_step":		0.5,
-
+    "vol_start": 12,
+    "vol_end": 22,
+    "vol_step": 0.5,
     "_comment": "02.elastic",
-    "norm_deform":	2e-2,
-    "shear_deform":	5e-2,
-    
-    "_comment":"03.vacancy",
-    "supercell":[3,3,3],
-
-    "_comment":"04.interstitial",
-    "insert_ele":["Al"],
-    "reprod-opt":false,
-
+    "norm_deform": 2e-2,
+    "shear_deform": 5e-2,
+    "_comment": "03.vacancy",
+    "supercell": [
+        3,
+        3,
+        3
+    ],
+    "_comment": "04.interstitial",
+    "insert_ele": [
+        "Al"
+    ],
+    "reprod-opt": false,
     "_comment": "05.surface",
-    "min_slab_size":	10,
-    "min_vacuum_size":	11,
+    "min_slab_size": 10,
+    "min_vacuum_size": 11,
     "_comment": "pert xz to work around vasp bug...",
-    "pert_xz":		0.01,
+    "pert_xz": 0.01,
     "max_miller": 2,
-    "static-opt":false,
-    "relax_box":false,    
-
-    "_comment":"06.phonon",
-    "supercell_matrix":[2,2,2],
-    "band":"0 1 0  0.5 1 0.5  0.375 0.75 0.375  0  0  0  0.5 0.5 0.5",
-
-    "_comment":	"that's all"
+    "static-opt": false,
+    "relax_box": false,
+    "_comment": "06.phonon",
+    "supercell_matrix": [
+        2,
+        2,
+        2
+    ],
+    "band": "0 1 0  0.5 1 0.5  0.375 0.75 0.375  0  0  0  0.5 0.5 0.5",
+    "_comment": "that's all"
 }

From 0973a9165141b4b5cee1188b317a0f129d909c56 Mon Sep 17 00:00:00 2001
From: BaozCWJ <baoz@pku.edu.cn>
Date: Wed, 20 Nov 2019 18:52:08 +0800
Subject: [PATCH 022/109] fix the list of backward file

---
 dpgen/auto_test/run.py | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/dpgen/auto_test/run.py b/dpgen/auto_test/run.py
index 4b92664e6..8b7ffbd07 100644
--- a/dpgen/auto_test/run.py
+++ b/dpgen/auto_test/run.py
@@ -75,7 +75,7 @@ def run_equi(task_type,jdata,mdata):
         mdata=decide_fp_machine(mdata)
 
         forward_files = ['INCAR', 'POTCAR']
-        backward_files = ['OUTCAR', 'log' , 'CONTCAR','OSZICAR']
+        backward_files = ['OUTCAR', 'autotest.out' , 'CONTCAR','OSZICAR']
         common_files=['POSCAR']
 
     #lammps
@@ -83,7 +83,7 @@ def run_equi(task_type,jdata,mdata):
         mdata = decide_model_devi_machine(mdata)
 
         forward_files = ['conf.lmp', 'lammps.in']
-        backward_files = ['dump.relax','log.lammps', 'model_devi.log']
+        backward_files = ['dump.relax','log.lammps', 'autotest.out']
 
         fp_params = jdata['lammps_params']
         model_dir = fp_params['model_dir']
@@ -164,7 +164,7 @@ def run_eos(task_type,jdata,mdata):
         mdata=decide_fp_machine(mdata)
 
         forward_files = ['INCAR', 'POSCAR','POTCAR']
-        backward_files = ['OUTCAR', 'log' , 'OSZICAR']
+        backward_files = ['OUTCAR', 'autotest.out' , 'OSZICAR']
         common_files=['INCAR','POTCAR']
 
     #lammps
@@ -181,7 +181,7 @@ def run_eos(task_type,jdata,mdata):
         else:
             models = [os.path.join(model_dir,ii) for ii in model_name]
         forward_files = ['conf.lmp', 'lammps.in']+model_name
-        backward_files = ['log.lammps', 'model_devi.log']
+        backward_files = ['log.lammps', 'autotest.out']
         common_files=['lammps.in']+model_name
 
         if len(model_name)>1 and task_type == 'deepmd':
@@ -240,7 +240,7 @@ def run_elastic(task_type,jdata,mdata):
         mdata=decide_fp_machine(mdata)
 
         forward_files = ['INCAR', 'POSCAR','POTCAR','KPOINTS']
-        backward_files = ['OUTCAR', 'log' , 'CONTCAR','OSZICAR']
+        backward_files = ['OUTCAR', 'autotest.out' , 'CONTCAR','OSZICAR']
         common_files=['INCAR','POTCAR','KPOINTS']
 
     #lammps
@@ -257,7 +257,7 @@ def run_elastic(task_type,jdata,mdata):
         else:
             models = [os.path.join(model_dir,ii) for ii in model_name]
         forward_files = ['conf.lmp', 'lammps.in','strain.out']+model_name
-        backward_files = ['log.lammps', 'model_devi.log']
+        backward_files = ['log.lammps', 'autotest.out']
         common_files=['lammps.in']+model_name
 
         if len(model_name)>1 and task_type == 'deepmd':
@@ -313,7 +313,7 @@ def run_vacancy(task_type,jdata,mdata):
         mdata=decide_fp_machine(mdata)
 
         forward_files = ['INCAR', 'POSCAR','POTCAR']
-        backward_files = ['OUTCAR',  'log' , 'OSZICAR']
+        backward_files = ['OUTCAR',  'autotest.out' , 'OSZICAR']
         common_files=['INCAR','POTCAR']
 
     #lammps
@@ -331,7 +331,7 @@ def run_vacancy(task_type,jdata,mdata):
             models = [os.path.join(model_dir,ii) for ii in model_name]
         common_files = model_name
         forward_files = ['conf.lmp', 'lammps.in']+model_name
-        backward_files = ['log.lammps','model_devi.log']
+        backward_files = ['log.lammps','autotest.out']
         common_files=['lammps.in']+model_name
 
         if len(model_name)>1 and task_type == 'deepmd':
@@ -396,7 +396,7 @@ def run_interstitial(task_type,jdata,mdata):
         mdata=decide_fp_machine(mdata)
 
         forward_files = ['INCAR', 'POSCAR','POTCAR']
-        backward_files = ['OUTCAR',  'log' , 'XDATCAR','OSZICAR']
+        backward_files = ['OUTCAR',  'autotest.out' , 'XDATCAR','OSZICAR']
         common_files=['INCAR']
 
     #lammps
@@ -429,7 +429,7 @@ def run_interstitial(task_type,jdata,mdata):
         else:
             models = [os.path.join(model_dir,ii) for ii in model_name]
         forward_files = ['conf.lmp', 'lammps.in']+model_name
-        backward_files = ['log.lammps', 'model_devi.log']
+        backward_files = ['log.lammps', 'autotest.out']
         common_files=['lammps.in']+model_name
 
         if len(model_name)>1 and task_type == 'deepmd':
@@ -467,8 +467,8 @@ def run_interstitial(task_type,jdata,mdata):
                       common_files,
                       forward_files,
                       backward_files,
-                      outlog='autotest.log',
-                      errlog='autotest.log')
+                      outlog='autotest.out',
+                      errlog='autotest.err')
 
 def cmpt_interstitial(task_type,jdata,mdata):
     conf_dir=jdata['conf_dir']
@@ -517,7 +517,7 @@ def run_surf(task_type,jdata,mdata):
         mdata=decide_fp_machine(mdata)
 
         forward_files = ['INCAR', 'POSCAR','POTCAR']
-        backward_files = ['OUTCAR',  'log' , 'OSZICAR']
+        backward_files = ['OUTCAR',  'autotest.out' , 'OSZICAR']
         common_files=['INCAR','POTCAR']
 
     #lammps
@@ -534,7 +534,7 @@ def run_surf(task_type,jdata,mdata):
         else:
             models = [os.path.join(model_dir,ii) for ii in model_name]
         forward_files = ['conf.lmp', 'lammps.in']+model_name
-        backward_files = ['log.lammps','model_devi.log']
+        backward_files = ['log.lammps','autotest.out']
         common_files=['lammps.in']+model_name
 
         if len(model_name)>1 and task_type == 'deepmd':
@@ -601,7 +601,7 @@ def run_phonon(task_type,jdata,mdata):
 
         run_tasks = util.collect_task(all_task,task_type)
         forward_files = ['INCAR', 'POTCAR','KPOINTS']
-        backward_files = ['OUTCAR',  'log' , 'OSZICAR','vasprun.xml']
+        backward_files = ['OUTCAR',  'autotest.out' , 'OSZICAR','vasprun.xml']
         common_files=['POSCAR']
 
         disp = make_dispatcher(machine)

From 56b416ec15bf9de85d8a51fcc5cee8799b620f23 Mon Sep 17 00:00:00 2001
From: BaozCWJ <baoz@pku.edu.cn>
Date: Wed, 20 Nov 2019 19:27:50 +0800
Subject: [PATCH 023/109] fix the file list of dispatcher

---
 dpgen/auto_test/gen_05_surf.py | 50 ++++++++++++++++------------------
 dpgen/auto_test/run.py         |  8 ++++--
 2 files changed, 29 insertions(+), 29 deletions(-)

diff --git a/dpgen/auto_test/gen_05_surf.py b/dpgen/auto_test/gen_05_surf.py
index 776a0205f..a11303427 100755
--- a/dpgen/auto_test/gen_05_surf.py
+++ b/dpgen/auto_test/gen_05_surf.py
@@ -12,9 +12,13 @@
 
 def make_vasp(jdata, conf_dir, max_miller = 2, relax_box = False, static = False) :
 
+    min_slab_size = jdata['min_slab_size']
+    min_vacuum_size = jdata['min_vacuum_size']
+    pert_xz = jdata['pert_xz']
+
     if 'relax_incar' in jdata.keys():
         vasp_str='vasp-relax_incar'
-    else: 
+    else:
         kspacing = jdata['vasp_params']['kspacing']
         vasp_str='vasp-k%.2f' % (kspacing)
 
@@ -23,7 +27,7 @@ def make_vasp(jdata, conf_dir, max_miller = 2, relax_box = False, static = False
     # conf_poscar = os.path.join(conf_path, 'POSCAR')
     equi_path = re.sub('confs', global_equi_name, conf_dir)
     equi_path = os.path.join(equi_path, vasp_str)
-    equi_path = os.path.abspath(equi_path)    
+    equi_path = os.path.abspath(equi_path)
     equi_contcar = os.path.join(equi_path, 'CONTCAR')
     assert os.path.exists(equi_contcar),"Please compute the equilibrium state using vasp first"
     task_path = re.sub('confs', global_task_name, conf_dir)
@@ -64,9 +68,6 @@ def make_vasp(jdata, conf_dir, max_miller = 2, relax_box = False, static = False
             kpar = fp_params['kpar']
             kspacing = fp_params['kspacing']
             kgamma = fp_params['kgamma']
-            min_slab_size = jdata['min_slab_size']
-            min_vacuum_size = jdata['min_vacuum_size']
-            pert_xz = jdata['pert_xz']
             fc = vasp.make_vasp_static_incar(ecut, ediff, npar=npar,kpar=kpar, kspacing = kspacing, kgamma = kgamma)
     else :
         if  'relax_incar' in jdata.keys():
@@ -82,9 +83,6 @@ def make_vasp(jdata, conf_dir, max_miller = 2, relax_box = False, static = False
             kpar = fp_params['kpar']
             kspacing = fp_params['kspacing']
             kgamma = fp_params['kgamma']
-            min_slab_size = jdata['min_slab_size']
-            min_vacuum_size = jdata['min_vacuum_size']
-            pert_xz = jdata['pert_xz']
             fc = vasp.make_vasp_relax_incar(ecut, ediff, True, relax_box, False, npar=npar,kpar=kpar, kspacing = kspacing, kgamma = kgamma)
     with open(os.path.join(task_path, 'INCAR'), 'w') as fp :
         fp.write(fc)
@@ -101,7 +99,7 @@ def make_vasp(jdata, conf_dir, max_miller = 2, relax_box = False, static = False
         for fname in potcar_list:
             with open(fname) as infile:
                 outfile.write(infile.read())
-    # gen tasks    
+    # gen tasks
     cwd = os.getcwd()
     for ii in range(len(all_slabs)) :
         slab = all_slabs[ii]
@@ -130,7 +128,7 @@ def make_lammps(jdata, conf_dir, max_miller = 2, static = False, relax_box = Fal
     kspacing = jdata['vasp_params']['kspacing']
     fp_params = jdata['lammps_params']
     model_dir = fp_params['model_dir']
-    type_map = fp_params['type_map'] 
+    type_map = fp_params['type_map']
     model_dir = os.path.abspath(model_dir)
     model_name =fp_params['model_name']
     if not model_name and task_type=='deepmd':
@@ -153,13 +151,13 @@ def make_lammps(jdata, conf_dir, max_miller = 2, static = False, relax_box = Fal
     # conf_poscar = os.path.join(conf_path, 'POSCAR')
     if 'relax_incar' in jdata.keys():
         vasp_str='vasp-relax_incar'
-    else: 
+    else:
         vasp_str='vasp-k%.2f' % (kspacing)
 
     equi_path = re.sub('confs', global_equi_name, conf_dir)
     equi_path = os.path.join(equi_path, vasp_str)
-    equi_path = os.path.abspath(equi_path)    
-    equi_contcar = os.path.join(equi_path, 'CONTCAR')    
+    equi_path = os.path.abspath(equi_path)
+    equi_contcar = os.path.join(equi_path, 'CONTCAR')
     assert os.path.exists(equi_contcar),"Please compute the equilibrium state using vasp first"
     task_path = re.sub('confs', global_task_name, conf_dir)
     task_path = os.path.abspath(task_path)
@@ -182,27 +180,27 @@ def make_lammps(jdata, conf_dir, max_miller = 2, static = False, relax_box = Fal
     # make lammps.in
     if task_type =='deepmd':
         if static :
-            fc = lammps.make_lammps_eval('conf.lmp', 
-                                     ntypes, 
+            fc = lammps.make_lammps_eval('conf.lmp',
+                                     ntypes,
                                      lammps.inter_deepmd,
                                      model_name)
         else :
-            fc = lammps.make_lammps_equi('conf.lmp', 
-                                     ntypes, 
+            fc = lammps.make_lammps_equi('conf.lmp',
+                                     ntypes,
                                      lammps.inter_deepmd,
-                                     model_name, 
+                                     model_name,
                                      change_box = relax_box)
     elif task_type =='meam':
         if static :
-            fc = lammps.make_lammps_eval('conf.lmp', 
-                                     ntypes, 
+            fc = lammps.make_lammps_eval('conf.lmp',
+                                     ntypes,
                                      lammps.inter_meam,
                                      model_param)
         else :
-            fc = lammps.make_lammps_equi('conf.lmp', 
-                                     ntypes, 
+            fc = lammps.make_lammps_equi('conf.lmp',
+                                     ntypes,
                                      lammps.inter_meam,
-                                     model_param, 
+                                     model_param,
                                      change_box = relax_box)
     f_lammps_in = os.path.join(task_path, 'lammps.in')
     with open(f_lammps_in, 'w') as fp :
@@ -233,7 +231,7 @@ def make_lammps(jdata, conf_dir, max_miller = 2, static = False, relax_box = Fal
         vasp.regulate_poscar('POSCAR', 'POSCAR')
         lammps.cvt_lammps_conf('POSCAR', 'conf.lmp')
         ptypes = vasp.get_poscar_types('POSCAR')
-        lammps.apply_type_map('conf.lmp', type_map, ptypes)    
+        lammps.apply_type_map('conf.lmp', type_map, ptypes)
         # record miller
         np.savetxt('miller.out', slab.miller_index, fmt='%d')
         # link lammps.in
@@ -273,8 +271,6 @@ def _main() :
         make_lammps(jdata, args.CONF, args.MAX_MILLER, static = True, relax_box = args.relax_box, task_type = args.TASK)
     else :
         raise RuntimeError("unknow task ", args.TASK)
-    
+
 if __name__ == '__main__' :
     _main()
-
-    
diff --git a/dpgen/auto_test/run.py b/dpgen/auto_test/run.py
index 8b7ffbd07..a44206909 100644
--- a/dpgen/auto_test/run.py
+++ b/dpgen/auto_test/run.py
@@ -103,7 +103,7 @@ def run_equi(task_type,jdata,mdata):
         raise RuntimeError ("unknow task %s, something wrong" % task_type)
 
     run_tasks = util.collect_task(all_task,task_type)
-
+    if len(run_tasks)==0: return
     machine,machine_type,ssh_sess,resources,command,group_size=util.get_machine_info(mdata,task_type)
     disp = make_dispatcher(machine)
     disp.run_jobs(resources,
@@ -191,7 +191,7 @@ def run_eos(task_type,jdata,mdata):
         raise RuntimeError ("unknow task %s, something wrong" % task_type)
 
     run_tasks = util.collect_task(all_task,task_type)
-
+    if len(run_tasks)==0: return
     machine,machine_type,ssh_sess,resources,command,group_size=util.get_machine_info(mdata,task_type)
     disp = make_dispatcher(machine)
     disp.run_jobs(resources,
@@ -267,6 +267,7 @@ def run_elastic(task_type,jdata,mdata):
         raise RuntimeError ("unknow task %s, something wrong" % task_type)
 
     run_tasks = util.collect_task(all_task,task_type)
+    if len(run_tasks)==0: return
     machine,machine_type,ssh_sess,resources,command,group_size=util.get_machine_info(mdata,task_type)
     disp = make_dispatcher(machine)
     disp.run_jobs(resources,
@@ -341,6 +342,7 @@ def run_vacancy(task_type,jdata,mdata):
         raise RuntimeError ("unknow task %s, something wrong" % task_type)
 
     run_tasks = util.collect_task(all_task,task_type)
+    if len(run_tasks)==0: return
     machine,machine_type,ssh_sess,resources,command,group_size=util.get_machine_info(mdata,task_type)
     disp = make_dispatcher(machine)
     disp.run_jobs(resources,
@@ -459,6 +461,7 @@ def run_interstitial(task_type,jdata,mdata):
                           errlog='autotest.err')
     else:
         run_tasks = util.collect_task(all_task,task_type)
+        if len(run_tasks)==0: return
         disp.run_jobs(resources,
                       command,
                       work_path,
@@ -544,6 +547,7 @@ def run_surf(task_type,jdata,mdata):
         raise RuntimeError ("unknow task %s, something wrong" % task_type)
 
     run_tasks = util.collect_task(all_task,task_type)
+    if len(run_tasks)==0: return
     machine,machine_type,ssh_sess,resources,command,group_size=util.get_machine_info(mdata,task_type)
     disp = make_dispatcher(machine)
     disp.run_jobs(resources,

From 82b9972803e93b568b968cfccd0ac5b5accb2db8 Mon Sep 17 00:00:00 2001
From: BaozCWJ <baoz@pku.edu.cn>
Date: Wed, 20 Nov 2019 20:17:39 +0800
Subject: [PATCH 024/109] fix the upload of surf

---
 dpgen/auto_test/cmpt_05_surf.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dpgen/auto_test/cmpt_05_surf.py b/dpgen/auto_test/cmpt_05_surf.py
index 2cb1e40f2..fde28911d 100755
--- a/dpgen/auto_test/cmpt_05_surf.py
+++ b/dpgen/auto_test/cmpt_05_surf.py
@@ -96,7 +96,7 @@ def cmpt_deepmd_lammps(jdata, conf_dir, task_name, static = False) :
             sys.stdout.write ("%s: \t%7.3f    %8.3f %8.3f\n" % (structure_dir, evac, epa, equi_epa))
             fp.write("%s:\t %7.3f   %8.3f %8.3f\n" % (structure_dir, evac, epa, equi_epa))
     fp.close()
-    if 'upload_username' in jdata.keys() and task_name=='deepm':
+    if 'upload_username' in jdata.keys() and task_name=='deepmd':
         upload_username=jdata['upload_username']
         util.insert_data('surf','deepmd',upload_username,result)
 

From ea78a4da5303ea33e9bd639d15f36f0a04a77a62 Mon Sep 17 00:00:00 2001
From: AnguseZhang <529133328@qq.con>
Date: Sat, 23 Nov 2019 18:09:16 +0800
Subject: [PATCH 025/109] run_tasks bug

---
 dpgen/data/gen.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/dpgen/data/gen.py b/dpgen/data/gen.py
index 1ae838fbc..7543438c9 100644
--- a/dpgen/data/gen.py
+++ b/dpgen/data/gen.py
@@ -557,7 +557,7 @@ def run_vasp_relax(jdata, mdata, dispatcher):
     if len(relax_tasks) == 0:
         return
 
-    relax_run_tasks = [t for t in relax_tasks]
+    relax_run_tasks = []
     for ii in relax_tasks : 
         if not _vasp_check_fin(ii):
             relax_run_tasks.append(ii)
@@ -600,7 +600,7 @@ def run_vasp_md(jdata, mdata, dispatcher):
     if len(md_tasks) == 0:
         return
 
-    md_run_tasks = [t for t in md_tasks]
+    md_run_tasks = []
     for ii in md_tasks : 
         if not _vasp_check_fin(ii):
             md_run_tasks.append(ii)

From 866f684e8ea4afcf55b80b086016a94c070951d0 Mon Sep 17 00:00:00 2001
From: AnguseZhang <529133328@qq.con>
Date: Sat, 23 Nov 2019 18:10:17 +0800
Subject: [PATCH 026/109] Revert "run_tasks bug"

This reverts commit ea78a4da5303ea33e9bd639d15f36f0a04a77a62.
---
 dpgen/data/gen.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/dpgen/data/gen.py b/dpgen/data/gen.py
index 7543438c9..1ae838fbc 100644
--- a/dpgen/data/gen.py
+++ b/dpgen/data/gen.py
@@ -557,7 +557,7 @@ def run_vasp_relax(jdata, mdata, dispatcher):
     if len(relax_tasks) == 0:
         return
 
-    relax_run_tasks = []
+    relax_run_tasks = [t for t in relax_tasks]
     for ii in relax_tasks : 
         if not _vasp_check_fin(ii):
             relax_run_tasks.append(ii)
@@ -600,7 +600,7 @@ def run_vasp_md(jdata, mdata, dispatcher):
     if len(md_tasks) == 0:
         return
 
-    md_run_tasks = []
+    md_run_tasks = [t for t in md_tasks]
     for ii in md_tasks : 
         if not _vasp_check_fin(ii):
             md_run_tasks.append(ii)

From cda3c98ea4e0a3996f5a3b01e3df5028c54fdff2 Mon Sep 17 00:00:00 2001
From: AnguseZhang <529133328@qq.con>
Date: Sat, 23 Nov 2019 18:10:40 +0800
Subject: [PATCH 027/109] Revert "Allow check_fin"

This reverts commit de7e1294f93c40512c68e50b9f4e4bcaad19c11d.
---
 dpgen/data/gen.py | 26 +++++++++++---------------
 1 file changed, 11 insertions(+), 15 deletions(-)

diff --git a/dpgen/data/gen.py b/dpgen/data/gen.py
index 1ae838fbc..73bb8e25f 100644
--- a/dpgen/data/gen.py
+++ b/dpgen/data/gen.py
@@ -63,12 +63,14 @@ def replace (file_name, pattern, subst) :
 global_dirname_04 = '02.md'
 
 def out_dir_name(jdata) :
+    cell_type = jdata['cell_type']
     elements = jdata['elements']
     super_cell = jdata['super_cell']    
     from_poscar = False
     if 'from_poscar' in jdata :
         from_poscar = jdata['from_poscar']
         from_poscar_path = jdata['from_poscar_path']
+
     if from_poscar:
         poscar_name = os.path.basename(from_poscar_path)
         cell_str = "%02d" % (super_cell[0])
@@ -76,7 +78,6 @@ def out_dir_name(jdata) :
             cell_str = cell_str + ("x%02d" % super_cell[ii])
         return poscar_name + '.' + cell_str
     else :
-        cell_type = jdata['cell_type']
         ele_str = ""
         for ii in elements:
             ele_str = ele_str + ii.lower()
@@ -407,12 +408,6 @@ def pert_scaled(jdata) :
             os.chdir(cwd)
 
 def make_vasp_md(jdata) :
-    ## If restart_md is true, md folders won't be created again.
-    restart_md = False
-    if "restart_md" in jdata and jdata["restart_md"]:
-        restart_md = True
-    if restart_md:
-        return
     out_dir = jdata['out_dir']
     potcars = jdata['potcars']
     scale = jdata['scale']   
@@ -557,10 +552,10 @@ def run_vasp_relax(jdata, mdata, dispatcher):
     if len(relax_tasks) == 0:
         return
 
-    relax_run_tasks = [t for t in relax_tasks]
-    for ii in relax_tasks : 
-        if not _vasp_check_fin(ii):
-            relax_run_tasks.append(ii)
+    relax_run_tasks = relax_tasks
+    #for ii in relax_tasks : 
+    #    if not _vasp_check_fin(ii):
+    #        relax_run_tasks.append(ii)
     run_tasks = [os.path.basename(ii) for ii in relax_run_tasks]
 
     #dlog.info(run_tasks)
@@ -600,10 +595,11 @@ def run_vasp_md(jdata, mdata, dispatcher):
     if len(md_tasks) == 0:
         return
 
-    md_run_tasks = [t for t in md_tasks]
-    for ii in md_tasks : 
-        if not _vasp_check_fin(ii):
-            md_run_tasks.append(ii)
+    md_run_tasks = md_tasks
+    #for ii in md_tasks : 
+    #    if not _vasp_check_fin(ii):
+    #        md_run_tasks.append(ii)
+
     run_tasks = [ii.replace(work_dir+"/", "") for ii in md_run_tasks]
     #dlog.info("md_work_dir", work_dir)
     #dlog.info("run_tasks",run_tasks)

From c297c1c20f1f4a4c028f1a3113e7b876c6d57ee9 Mon Sep 17 00:00:00 2001
From: robinzhuang <38876805+robinzyb@users.noreply.github.com>
Date: Sat, 23 Nov 2019 19:03:26 +0100
Subject: [PATCH 028/109] check readme

---
 README.md | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/README.md b/README.md
index 9e98fc48f..b567274d7 100644
--- a/README.md
+++ b/README.md
@@ -507,19 +507,19 @@ The bold notation of key (such aas **type_map**) means that it's a necessary key
 |**fp_params["mixingweight"]** | Float| 0.05 | Proportion a of output Density Matrix to be used for the input Density Matrix of next SCF cycle (linear mixing).
 |**fp_params["NumberPulay"]** | Integer| 5 | Controls the Pulay convergence accelerator.
 | *fp_style == cp2k*
-| **fp_params** | Dict | | Parameters for cp2k calculation. find detail in manual.cp2k.org. if it is not remarked with "optional", the parameter must be set. we assume that you have basic knowledge for cp2k input.
-|**fp_params["cutoff"]**| String | 400 |
-|**fp_params["rel_cutoff"]**| String | 50 |
-|**fp_params["functional"]**| String | PBE |
-|**fp_params["max_scf"]**| String | 50 |
-|**fp_params["pair_potential_type"]**| String | DFTD3 | This is optional.
-|**fp_params["pair_potential_path"]**| String | "./cp2k_basis_pp_file/dftd3.dat" | must be set if you set the "pair_potential_type"
-|**fp_params["pair_ref_functional"]**| String | PBE | must be set if you set the "pair_potential_type"
-|**fp_params["basis_path"]**| String | "./cp2k_basis_pp_file/BASIS_MOLOPT" |
-|**fp_params["pp_path"]**| String | "./cp2k_basis_pp_file/GTH_POTENTIALS" |
-|**fp_params["element_list"]**| List | ["H","C","N"] |
-|**fp_params["basis_list"]**| List | ["DZVP_MOLOPT_GTH","DZVP_MOLOPT_GTH","DZVP_MOLOPT_GTH"] | Must be same order with element_list
-|**fp_params["pp_list"]**| List | ["GTH-PBE-q1","GTH-PBE-q4","GTH-PBE-q5"] | Must be same order with element_list
+| **fp_params** | Dict | | Parameters for cp2k calculation. find detail in manual.cp2k.org. only the kind section must be set before use.  we assume that you have basic knowledge for cp2k input.
+|**fp_params is dictionary**|  |  |
+|**example 1 for cp2k ** | dict |      "user_fp_params":   {
+         "FORCE_EVAL":{
+          "SUBSYS":{
+              "KIND":{
+                 "_": ["N","C","H"],
+                "POTENTIAL": ["GTH-PBE-q5","GTH-PBE-q4", "GTH-PBE-q1"],
+                "BASIS_SET": ["DZVP-MOLOPT-GTH","DZVP-MOLOPT-GTH","DZVP-MOLOPT-GTH"]
+                  }
+              }
+          }
+      } |
 
 
 

From dc5dd226c114341904e457cef69d1ca9d083677f Mon Sep 17 00:00:00 2001
From: robinzhuang <38876805+robinzyb@users.noreply.github.com>
Date: Sat, 23 Nov 2019 19:17:34 +0100
Subject: [PATCH 029/109] modify

---
 README.md | 758 ++++++++++++++++++++++++++++--------------------------
 1 file changed, 388 insertions(+), 370 deletions(-)

diff --git a/README.md b/README.md
index b567274d7..5b2d92fc8 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,6 @@
 # DP-GEN Manual
 
+
 ## Table of Contents
    * [DP-GEN Manual](#dp-gen-manual)
       * [Table of Contents](#table-of-contents)
@@ -38,14 +39,14 @@ DP-GEN (Deep Generator)  is a software written in Python, delicately designed to
 
 + tests : unittest tools for developers.
 
-One can easily run DP-GEN with :
-```
-dpgen TASK PARAM MACHINE
-```
+    One can easily run DP-GEN with :
+    ```
+    dpgen TASK PARAM MACHINE
+    ```
 
-where TASK is the key word, PARAM and MACHINE are both JSON files.
+    where TASK is the key word, PARAM and MACHINE are both JSON files.
 
-Options for TASK:
+    Options for TASK:
 * `init_bulk` : Generating initial data for bulk systems.
 * `init_surf` : Generating initial data for surface systems.
 * `run` : Main process of Deep Generator.
@@ -81,13 +82,13 @@ Path:    /home/me/miniconda3/envs/py363/lib/python3.6/site-packages/dpgen-0.5.1.
 
 Dependency
 ------------
-     numpy     1.17.2   /home/me/miniconda3/envs/py363/lib/python3.6/site-packages/numpy
-    dpdata     0.1.10   /home/me/miniconda3/envs/py363/lib/python3.6/site-packages/dpdata-0.1.10-py3.6.egg/dpdata
-  pymatgen   2019.7.2   /home/me/miniconda3/envs/py363/lib/python3.6/site-packages/pymatgen
-     monty      2.0.4   /home/me/miniconda3/envs/py363/lib/python3.6/site-packages/monty
-       ase     3.17.0   /home/me/miniconda3/envs/py363/lib/python3.6/site-packages/ase-3.17.0-py3.6.egg/ase
-  paramiko      2.6.0   /home/me/miniconda3/envs/py363/lib/python3.6/site-packages/paramiko
- custodian  2019.2.10   /home/me/miniconda3/envs/py363/lib/python3.6/site-packages/custodian
+numpy     1.17.2   /home/me/miniconda3/envs/py363/lib/python3.6/site-packages/numpy
+dpdata     0.1.10   /home/me/miniconda3/envs/py363/lib/python3.6/site-packages/dpdata-0.1.10-py3.6.egg/dpdata
+pymatgen   2019.7.2   /home/me/miniconda3/envs/py363/lib/python3.6/site-packages/pymatgen
+monty      2.0.4   /home/me/miniconda3/envs/py363/lib/python3.6/site-packages/monty
+ase     3.17.0   /home/me/miniconda3/envs/py363/lib/python3.6/site-packages/ase-3.17.0-py3.6.egg/ase
+paramiko      2.6.0   /home/me/miniconda3/envs/py363/lib/python3.6/site-packages/paramiko
+custodian  2019.2.10   /home/me/miniconda3/envs/py363/lib/python3.6/site-packages/custodian
 
 Description
 ------------
@@ -99,17 +100,17 @@ commands with their own options. To see the options for the sub-commands, type
 "dpgen sub-command -h".
 
 positional arguments:
-  {init_surf,init_bulk,run,run/report,test,db}
-    init_surf           Generating initial data for surface systems.
-    init_bulk           Generating initial data for bulk systems.
-    run                 Main process of Deep Potential Generator.
-    run/report          Report the systems and the thermodynamic conditions of
-                        the labeled frames.
-    test                Auto-test for Deep Potential.
-    db                  Collecting data from Deep Generator.
+{init_surf,init_bulk,run,run/report,test,db}
+init_surf           Generating initial data for surface systems.
+init_bulk           Generating initial data for bulk systems.
+run                 Main process of Deep Potential Generator.
+run/report          Report the systems and the thermodynamic conditions of
+the labeled frames.
+test                Auto-test for Deep Potential.
+db                  Collecting data from Deep Generator.
 
 optional arguments:
-  -h, --help            show this help message and exit
+-h, --help            show this help message and exit
 
 ```
 
@@ -139,23 +140,23 @@ If MACHINE is None, there should be only one stage in stages. Corresponding task
 Following is an example for `PARAM`, which generates data from a typical structure hcp.
 ```json
 {
-    "stages" : [1,2,3,4],
-    "cell_type":    "hcp",
-    "latt":     4.479,
-    "super_cell":   [2, 2, 2],
-    "elements":     ["Mg"],
-    "potcars":      ["....../POTCAR"],
-    "relax_incar": "....../INCAR_metal_rlx",
-    "md_incar" : "....../INCAR_metal_md",
-    "scale":        [1.00],
-    "skip_relax":   false,
-    "pert_numb":    2,
-    "md_nstep" : 5,
-    "pert_box":     0.03,
-    "pert_atom":    0.01,
-    "coll_ndata":   5000,
-    "type_map" : [ "Mg", "Al"],
-    "_comment":     "that's all"
+"stages" : [1,2,3,4],
+"cell_type":    "hcp",
+"latt":     4.479,
+"super_cell":   [2, 2, 2],
+"elements":     ["Mg"],
+"potcars":      ["....../POTCAR"],
+"relax_incar": "....../INCAR_metal_rlx",
+"md_incar" : "....../INCAR_metal_md",
+"scale":        [1.00],
+"skip_relax":   false,
+"pert_numb":    2,
+"md_nstep" : 5,
+"pert_box":     0.03,
+"pert_atom":    0.01,
+"coll_ndata":   5000,
+"type_map" : [ "Mg", "Al"],
+"_comment":     "that's all"
 }
 ```
 
@@ -169,7 +170,7 @@ The following table gives explicit descriptions on keys in `PARAM`.
 
 The bold notation of key (such as **Elements**) means that it's a necessary key.
 
- Key  | Type          | Example                                                      | Discription                                                      |
+Key  | Type          | Example                                                      | Discription                                                      |
 | :---------------- | :--------------------- | :-------------------------------------- | :-------------------------------------------------------------|
 | **stages** | List of Integer | [1,2,3,4] | Stages for `init_bulk`
 | **Elements** | List of String | ["Mg"] | Atom types
@@ -207,56 +208,56 @@ All stages must be **in order**.
 Following is an example for `PARAM`, which generates data from a typical structure hcp.
 ```json
 {
-  "stages": [
-    1,
-    2
-  ],
-  "cell_type": "fcc",
-  "latt": 4.034,
-  "super_cell": [
-    2,
-    2,
-    2
-  ],
-  "layer_numb": 3,
-  "vacuum_max": 9,
-  "vacuum_resol": [
-    0.5,
-    1
-  ],
-  "mid_point": 4.0,
-  "millers": [
-    [
-      1,
-      0,
-      0
-    ],
-    [
-      1,
-      1,
-      0
-    ],
-    [
-      1,
-      1,
-      1
-    ]
-  ],
-  "elements": [
-    "Al"
-  ],
-  "potcars": [
-    "....../POTCAR"
-  ],
-  "relax_incar": "....../INCAR_metal_rlx_low",
-  "scale": [
-    1.0
-  ],
-  "skip_relax": true,
-  "pert_numb": 2,
-  "pert_box": 0.03,
-  "pert_atom": 0.01,
-  "_comment": "that's all"
+"stages": [
+1,
+2
+],
+"cell_type": "fcc",
+"latt": 4.034,
+"super_cell": [
+2,
+2,
+2
+],
+"layer_numb": 3,
+"vacuum_max": 9,
+"vacuum_resol": [
+0.5,
+1
+],
+"mid_point": 4.0,
+"millers": [
+[
+1,
+0,
+0
+],
+[
+1,
+1,
+0
+],
+[
+1,
+1,
+1
+]
+],
+"elements": [
+"Al"
+],
+"potcars": [
+"....../POTCAR"
+],
+"relax_incar": "....../INCAR_metal_rlx_low",
+"scale": [
+1.0
+],
+"skip_relax": true,
+"pert_numb": 2,
+"pert_box": 0.03,
+"pert_atom": 0.01,
+"_comment": "that's all"
 }
 ```
 
@@ -264,7 +265,7 @@ The following table gives explicit descriptions on keys in `PARAM`.
 
 The bold notation of key (such as **Elements**) means that it's a necessary key.
 
- Key  | Type          | Example                                                      | Discription                                                      |
+Key  | Type          | Example                                                      | Discription                                                      |
 | :---------------- | :--------------------- | :-------------------------------------- | :-------------------------------------------------------------|
 | **stages** | List of Integer | [1,2,3,4] | Stages for `init_surf`
 | **Elements** | List of String | ["Mg"] | Atom types
@@ -300,9 +301,9 @@ In each iteration, there are three stages of work, namely, `00.train  01.model_d
 
 + 02.fp : Selected structures will be calculated by first principles methods(default VASP). DP-GEN will obtain some new data and put them together with initial data and data generated in previous iterations. After that a new training will be set up and DP-GEN will enter next iteration!
 
-DP-GEN identifies the current stage by a record file, `record.dpgen`, which will be created and upgraded by codes.Each line contains two number: the first is index of iteration, and the second ,ranging from 0 to 9 ,records which stage in each iteration is currently running.
+    DP-GEN identifies the current stage by a record file, `record.dpgen`, which will be created and upgraded by codes.Each line contains two number: the first is index of iteration, and the second ,ranging from 0 to 9 ,records which stage in each iteration is currently running.
 
-0,1,2 correspond to make_train, run_train, post_train. DP-GEN will write scripts in `make_train`, run the task by specific machine in `run_train` and collect result in `post_train`. The records for model_devi and fp stage follow similar rules.
+    0,1,2 correspond to make_train, run_train, post_train. DP-GEN will write scripts in `make_train`, run the task by specific machine in `run_train` and collect result in `post_train`. The records for model_devi and fp stage follow similar rules.
 
 
 In `PARAM`, you can specialize the task as you expect.
@@ -310,134 +311,134 @@ In `PARAM`, you can specialize the task as you expect.
 
 ```json
 {
-  "type_map": [
-    "H",
-    "C"
-  ],
-  "mass_map": [
-    1,
-    12
-  ],
-  "init_data_prefix": "....../init/",
-  "init_data_sys": [
-    "CH4.POSCAR.01x01x01/02.md/sys-0004-0001/deepmd"
-  ],
-  "init_batch_size": [
-    8
-  ],
-  "sys_configs_prefix": "....../init/",
-  "sys_configs": [
-    [
-      "CH4.POSCAR.01x01x01/01.scale_pert/sys-0004-0001/scale*/00000*/POSCAR"
-    ],
-    [
-      "CH4.POSCAR.01x01x01/01.scale_pert/sys-0004-0001/scale*/00001*/POSCAR"
-    ]
-  ],
-  "sys_batch_size": [
-    8,
-    8,
-    8,
-    8
-  ],
-  "_comment": " that's all ",
-  "numb_models": 4,
-  "train_param": "input.json",
-  "default_training_param": {
-    "_comment": "that's all",
-    "use_smooth": true,
-    "sel_a": [
-      16,
-      4
-    ],
-    "rcut_smth": 0.5,
-    "rcut": 5,
-    "filter_neuron": [
-      10,
-      20,
-      40
-    ],
-    "filter_resnet_dt": false,
-    "n_axis_neuron": 12,
-    "n_neuron": [
-      100,
-      100,
-      100
-    ],
-    "resnet_dt": true,
-    "coord_norm": true,
-    "type_fitting_net": false,
-    "systems": [],
-    "set_prefix": "set",
-    "stop_batch": 40000,
-    "batch_size": 1,
-    "start_lr": 0.001,
-    "decay_steps": 200,
-    "decay_rate": 0.95,
-    "seed": 0,
-    "start_pref_e": 0.02,
-    "limit_pref_e": 2,
-    "start_pref_f": 1000,
-    "limit_pref_f": 1,
-    "start_pref_v": 0.0,
-    "limit_pref_v": 0.0,
-    "disp_file": "lcurve.out",
-    "disp_freq": 1000,
-    "numb_test": 4,
-    "save_freq": 1000,
-    "save_ckpt": "model.ckpt",
-    "load_ckpt": "model.ckpt",
-    "disp_training": true,
-    "time_training": true,
-    "profiling": false,
-    "profiling_file": "timeline.json"
-  },
-  "model_devi_dt": 0.002,
-  "model_devi_skip": 0,
-  "model_devi_f_trust_lo": 0.05,
-  "model_devi_f_trust_hi": 0.15,
-  "model_devi_clean_traj": true,
-  "model_devi_jobs": [
-    {
-      "sys_idx": [
-        0
-      ],
-      "temps": [
-        100
-      ],
-      "press": [
-        1.0
-      ],
-      "trj_freq": 10,
-      "nsteps": 300,
-      "ensemble": "nvt",
-      "_idx": "00"
-    },
-    {
-      "sys_idx": [
-        1
-      ],
-      "temps": [
-        100
-      ],
-      "press": [
-        1.0
-      ],
-      "trj_freq": 10,
-      "nsteps": 3000,
-      "ensemble": "nvt",
-      "_idx": "01"
-    }
-  ],
-  "fp_style": "vasp",
-  "shuffle_poscar": false,
-  "fp_task_max": 20,
-  "fp_task_min": 1,
-  "fp_pp_path": "....../methane/",
-  "fp_pp_files": [
-    "POTCAR"
-  ],
-  "fp_incar": "....../INCAR_methane"
+"type_map": [
+"H",
+"C"
+],
+"mass_map": [
+1,
+12
+],
+"init_data_prefix": "....../init/",
+"init_data_sys": [
+"CH4.POSCAR.01x01x01/02.md/sys-0004-0001/deepmd"
+],
+"init_batch_size": [
+8
+],
+"sys_configs_prefix": "....../init/",
+"sys_configs": [
+[
+"CH4.POSCAR.01x01x01/01.scale_pert/sys-0004-0001/scale*/00000*/POSCAR"
+],
+[
+"CH4.POSCAR.01x01x01/01.scale_pert/sys-0004-0001/scale*/00001*/POSCAR"
+]
+],
+"sys_batch_size": [
+8,
+8,
+8,
+8
+],
+"_comment": " that's all ",
+"numb_models": 4,
+"train_param": "input.json",
+"default_training_param": {
+"_comment": "that's all",
+"use_smooth": true,
+"sel_a": [
+16,
+4
+],
+"rcut_smth": 0.5,
+"rcut": 5,
+"filter_neuron": [
+10,
+20,
+40
+],
+"filter_resnet_dt": false,
+"n_axis_neuron": 12,
+"n_neuron": [
+100,
+100,
+100
+],
+"resnet_dt": true,
+"coord_norm": true,
+"type_fitting_net": false,
+"systems": [],
+"set_prefix": "set",
+"stop_batch": 40000,
+"batch_size": 1,
+"start_lr": 0.001,
+"decay_steps": 200,
+"decay_rate": 0.95,
+"seed": 0,
+"start_pref_e": 0.02,
+"limit_pref_e": 2,
+"start_pref_f": 1000,
+"limit_pref_f": 1,
+"start_pref_v": 0.0,
+"limit_pref_v": 0.0,
+"disp_file": "lcurve.out",
+"disp_freq": 1000,
+"numb_test": 4,
+"save_freq": 1000,
+"save_ckpt": "model.ckpt",
+"load_ckpt": "model.ckpt",
+"disp_training": true,
+"time_training": true,
+"profiling": false,
+"profiling_file": "timeline.json"
+},
+"model_devi_dt": 0.002,
+"model_devi_skip": 0,
+"model_devi_f_trust_lo": 0.05,
+"model_devi_f_trust_hi": 0.15,
+"model_devi_clean_traj": true,
+"model_devi_jobs": [
+{
+"sys_idx": [
+0
+],
+"temps": [
+100
+],
+"press": [
+1.0
+],
+"trj_freq": 10,
+"nsteps": 300,
+"ensemble": "nvt",
+"_idx": "00"
+},
+{
+"sys_idx": [
+1
+],
+"temps": [
+100
+],
+"press": [
+1.0
+],
+"trj_freq": 10,
+"nsteps": 3000,
+"ensemble": "nvt",
+"_idx": "01"
+}
+],
+"fp_style": "vasp",
+"shuffle_poscar": false,
+"fp_task_max": 20,
+"fp_task_min": 1,
+"fp_pp_path": "....../methane/",
+"fp_pp_files": [
+"POTCAR"
+],
+"fp_incar": "....../INCAR_methane"
 }
 ```
 
@@ -445,20 +446,20 @@ The following table gives explicit descriptions on keys in `PARAM`.
 
 The bold notation of key (such aas **type_map**) means that it's a necessary key.
 
- Key  | Type          | Example                                                      | Discription                                                      |
+Key  | Type          | Example                                                      | Discription                                                      |
 | :---------------- | :--------------------- | :-------------------------------------- | :-------------------------------------------------------------|
 | *#Basics*
 | **type_map** | List of string | ["H", "C"] | Atom types
 | **mass_map** | List of float |  [1, 12] | Standard atom weights.
 | **use_ele_temp** | int | 0 | Currently only support fp_style vasp. 0(default): no electron temperature. 1: eletron temperature as frame parameter. 2: electron temperature as atom parameter.
 | *#Data*
- | init_data_prefix | String | "/sharedext4/.../data/" | Prefix of initial data directories
- | ***init_data_sys*** | List of string|["CH4.POSCAR.01x01x01/.../deepmd"] |Directories of initial data. You may use either absolute or relative path here.
- | ***sys_format*** | String | "vasp/poscar" | Format of initial data. It will be `vasp/poscar` if not set.
- | init_multi_systems | Boolean | false | If set to `true`, `init_data_sys` directories should contain sub-directories of various systems. DP-GEN will regard all of these sub-directories as inital data systems.
- | **init_batch_size**   | String of integer     | [8]                                                            | Each number is the batch_size of corresponding system  for training in `init_data_sys`. One recommended rule for setting the `sys_batch_size` and `init_batch_size` is that `batch_size` mutiply number of atoms ot the stucture should be larger than 32. If set to `auto`, batch size will be 32 divided by number of atoms. |
-  | sys_configs_prefix | String | "/sharedext4/.../data/" | Prefix of `sys_configs`
- | **sys_configs**   | List of list of string         | [<br />["/sharedext4/.../POSCAR"], <br />["....../POSCAR"]<br />] | Containing directories of structures to be explored in iterations.Wildcard characters are supported here. |
+| init_data_prefix | String | "/sharedext4/.../data/" | Prefix of initial data directories
+| ***init_data_sys*** | List of string|["CH4.POSCAR.01x01x01/.../deepmd"] |Directories of initial data. You may use either absolute or relative path here.
+| ***sys_format*** | String | "vasp/poscar" | Format of initial data. It will be `vasp/poscar` if not set.
+| init_multi_systems | Boolean | false | If set to `true`, `init_data_sys` directories should contain sub-directories of various systems. DP-GEN will regard all of these sub-directories as inital data systems.
+| **init_batch_size**   | String of integer     | [8]                                                            | Each number is the batch_size of corresponding system  for training in `init_data_sys`. One recommended rule for setting the `sys_batch_size` and `init_batch_size` is that `batch_size` mutiply number of atoms ot the stucture should be larger than 32. If set to `auto`, batch size will be 32 divided by number of atoms. |
+| sys_configs_prefix | String | "/sharedext4/.../data/" | Prefix of `sys_configs`
+| **sys_configs**   | List of list of string         | [<br />["/sharedext4/.../POSCAR"], <br />["....../POSCAR"]<br />] | Containing directories of structures to be explored in iterations.Wildcard characters are supported here. |
 | **sys_batch_size**      | List of integer   | [8, 8]                                                 | Each number  is the batch_size for training of corresponding system in `sys_configs`. If set to `auto`, batch size will be 32 divided by number of atoms. |
 | *#Training*
 | **numb_models**      | Integer      | 4 (recommend)                                                           | Number of models to be trained in `00.train`. |
@@ -467,7 +468,7 @@ The bold notation of key (such aas **type_map**) means that it's a necessary key
 | **model_devi_dt** | Float | 0.002 (recommend) | Timestep for MD |
 | **model_devi_skip** | Integer | 0 | Number of structures skipped for fp in each MD
 | **model_devi_f_trust_lo** | Float | 0.05 | Lower bound of forces for the selection.
- | **model_devi_f_trust_hi** | Float | 0.15 | Upper bound of forces for the selection
+| **model_devi_f_trust_hi** | Float | 0.15 | Upper bound of forces for the selection
 | **model_devi_e_trust_lo**  | Float | 1e10                                                         | Lower bound of energies for the selection. Recommend to set them a high number, since forces provide more precise information. Special cases such as energy minimization may need this. |
 | **model_devi_e_trust_hi**  | Float | 1e10                                                         | Upper bound of energies for the selection. |
 | **model_devi_clean_traj**  | Boolean | true                                                         | Deciding whether to clean traj folders in MD since they are too large. |
@@ -507,21 +508,38 @@ The bold notation of key (such aas **type_map**) means that it's a necessary key
 |**fp_params["mixingweight"]** | Float| 0.05 | Proportion a of output Density Matrix to be used for the input Density Matrix of next SCF cycle (linear mixing).
 |**fp_params["NumberPulay"]** | Integer| 5 | Controls the Pulay convergence accelerator.
 | *fp_style == cp2k*
-| **fp_params** | Dict | | Parameters for cp2k calculation. find detail in manual.cp2k.org. only the kind section must be set before use.  we assume that you have basic knowledge for cp2k input.
-|**fp_params is dictionary**|  |  |
-|**example 1 for cp2k ** | dict |      "user_fp_params":   {
-         "FORCE_EVAL":{
-          "SUBSYS":{
-              "KIND":{
-                 "_": ["N","C","H"],
-                "POTENTIAL": ["GTH-PBE-q5","GTH-PBE-q4", "GTH-PBE-q1"],
-                "BASIS_SET": ["DZVP-MOLOPT-GTH","DZVP-MOLOPT-GTH","DZVP-MOLOPT-GTH"]
-                  }
-              }
-          }
-      } |
-
+| **fp_params** | Dict |  Parameters for cp2k calculation. find detail in manual.cp2k.org. only the kind section must be set before use.  we assume that you have basic knowledge for cp2k input. |
+#### Rules for cp2k input at dictionary form
+Converting cp2k input is very simple as dictionary used to dpgen input. You just need follow some simple rule:
+- kind section parameter must be provide
+- replace `keyword` in cp2k as `keyword` in dict.
+- replace `keyword parameter` in cp2k as `value` in dict.
+- replace `section name` in cp2k as `keyword` in dict. . The corresponding value is a `dict`.
+- repalce `section parameter` in cp2k as `value` with dict. keyword `"_"`(underscore)
+- `repeat section` in cp2k just need to be written once with repeat parameter as list.
+
+    Here are examples for setting:
+
+    ```python
+
+    #minimal information you should provide for input
+    "user_fp_params":   {
+    "FORCE_EVAL":{
+    "DFT":{
+    "BASIS_SET_FILE_NAME": "path",
+    "POTENTIAL_FILE_NAME": "path"
+    }
+    "SUBSYS":{
+    "KIND":{
+    "_": ["N","C","H"],
+    "POTENTIAL": ["GTH-PBE-q5","GTH-PBE-q4", "GTH-PBE-q1"],
+    "BASIS_SET": ["DZVP-MOLOPT-GTH","DZVP-MOLOPT-GTH","DZVP-MOLOPT-GTH"]
+    }
+    }
+    }
 
+    }
+```
 
 
 ## Test: Auto-test for Deep Generator
@@ -550,14 +568,14 @@ The whole program contains a series of tasks shown as follows. In each task, the
 We take Al as an example to show the parameter settings of `param.json`.
 The first part is the fundamental setting for particular alloy system.
 ```json
-    "_comment": "models",
-    "potcar_map" : {
-	"Al" : "/somewhere/POTCAR"
-    },
-    "conf_dir":"confs/Al/std-fcc",
-    "key_id":"API key of Material project",
-    "task_type":"deepmd",
-    "task":"eos",
+"_comment": "models",
+"potcar_map" : {
+"Al" : "/somewhere/POTCAR"
+},
+"conf_dir":"confs/Al/std-fcc",
+"key_id":"API key of Material project",
+"task_type":"deepmd",
+"task":"eos",
 ```
 You need to add the specified paths of necessary `POTCAR` files in "potcar_map". The different `POTCAR` paths are separated by commas.
 Then you also need to add the folder path of particular configuration, which contains `POSCAR` file.
@@ -572,65 +590,65 @@ Usually, if you add the relative path of POSCAR as the above format,
 + `task_type` contains 3 optional types for testing, i.e. **vasp**, **deepmd** and **meam**.
 + `task` contains 7 options, **equi**, **eos**, **elastic**, **vacancy**, **interstitial**, **surf** and **all**. The option **all** can do all the tasks.
 
-It is worth noting that the subsequent tasks need to rely on the calculation results of the equilibrium state, so it is necessary to give priority to the calculation of the equilibrium state while testing. And due to the stable consideration, we recommand you to test the equilibrium state of **vasp** before other tests.
-
-The second part is the computational settings for vasp and lammps. According to your actual needs， you can choose to add the paths of specific INCAR or use the simplified INCAR by setting `vasp_params`. The priority of specified INCAR is higher than using `vasp_params`. The most important setting is to add the folder path `model_dir` of **deepmd** model and supply the corresponding element type map. Besides, `dpgen test` also is able to call common lammps packages, such as **meam**.
-```json
-"relax_incar":"somewhere/relax_incar",
-"scf_incar":"somewhere/scf_incar",
-"vasp_params":	{
-	"ecut":		650,
-	"ediff":	1e-6,
-	"kspacing":	0.1,
-	"kgamma":	false,
-	"npar":		1,
-	"kpar":		1,
-	"_comment":	" that's all "
+    It is worth noting that the subsequent tasks need to rely on the calculation results of the equilibrium state, so it is necessary to give priority to the calculation of the equilibrium state while testing. And due to the stable consideration, we recommand you to test the equilibrium state of **vasp** before other tests.
+
+    The second part is the computational settings for vasp and lammps. According to your actual needs， you can choose to add the paths of specific INCAR or use the simplified INCAR by setting `vasp_params`. The priority of specified INCAR is higher than using `vasp_params`. The most important setting is to add the folder path `model_dir` of **deepmd** model and supply the corresponding element type map. Besides, `dpgen test` also is able to call common lammps packages, such as **meam**.
+    ```json
+    "relax_incar":"somewhere/relax_incar",
+    "scf_incar":"somewhere/scf_incar",
+    "vasp_params":	{
+    "ecut":		650,
+    "ediff":	1e-6,
+    "kspacing":	0.1,
+    "kgamma":	false,
+    "npar":		1,
+    "kpar":		1,
+    "_comment":	" that's all "
     },
     "lammps_params":    {
-        "model_dir":"somewhere/example/Al_model",
-        "type_map":["Al"],
-        "model_name":false,
-        "model_param_type":false
+    "model_dir":"somewhere/example/Al_model",
+    "type_map":["Al"],
+    "model_name":false,
+    "model_param_type":false
     },
-```
-The last part is the optional settings for various tasks mentioned above. You can change the parameters according to actual needs.
-```json
+    ```
+    The last part is the optional settings for various tasks mentioned above. You can change the parameters according to actual needs.
+    ```json
     "_comment":"00.equi",
     "alloy_shift":false,
-```
+    ```
 + `alloy_shift`:(boolean) whether to compute the alloy formation energy. If you test alloy and set 'true', you need to compute the energies of corresponding elements respectively first of ßall. Please set 'false' when test single element.
 
-```json
+    ```json
     "_comment": "01.eos",
     "vol_start":	12,
     "vol_end":		22,
     "vol_step":		0.5,
-```
+    ```
 + `vol_start`, `vol_end` and `vol_step` determine the volumetric range and accuracy of the **eos**.
 
-```json
+    ```json
     "_comment": "02.elastic",
     "norm_deform":	2e-2,
     "shear_deform":	5e-2,
-```
+    ```
 + `norm_deform` and `shear_deform` are the scales of material deformation.
-This task uses the stress-strain relationship to calculate the elastic constant.
+    This task uses the stress-strain relationship to calculate the elastic constant.
 
-```json
+    ```json
     "_comment":"03.vacancy",
     "supercell":[3,3,3],
-```
+    ```
 + `supercell`:(list of integer) the supercell size used to generate vacancy defect and interstitial defect
-```json
+    ```json
     "_comment":"04.interstitial",
     "insert_ele":["Al"],
     "reprod-opt":false,
-```
+    ```
 + `insert_ele`:(list of string) the elements used to generate point interstitial defect
 + `repord-opt`:(boolean) whether to reproduce trajectories of interstitial defect
 
-```json
+    ```json
     "_comment": "05.surface",
     "min_slab_size":	10,
     "min_vacuum_size":	11,
@@ -639,7 +657,7 @@ This task uses the stress-strain relationship to calculate the elastic constant.
     "max_miller": 2,
     "static-opt":false,
     "relax_box":false,
-```
+    ```
 + `min_slab_size` and `min_vacuum_size` are the minimum size of slab thickness  and  the vacuume width.
 + `pert_xz` is the perturbation through xz direction used to compute surface energy.
 + `max_miller` (integer) is the maximum miller index
@@ -655,90 +673,90 @@ When switching into a new machine, you may modifying the `MACHINE`, according to
 An example for `MACHINE` is:
 ```json
 {
-  "train": [
-    {
-      "machine": {
-        "machine_type": "slurm",
-        "hostname": "localhost",
-        "port": 22,
-        "username": "Angus",
-        "work_path": "....../work"
-      },
-      "resources": {
-        "numb_node": 1,
-        "numb_gpu": 1,
-        "task_per_node": 4,
-        "partition": "AdminGPU",
-        "exclude_list": [],
-        "source_list": [
-          "....../train_tf112_float.env"
-        ],
-        "module_list": [],
-        "time_limit": "23:0:0",
-        "qos": "data"
-      },
-      "deepmd_path": "....../tf1120-lowprec"
-    }
-  ],
-  "model_devi": [
-    {
-      "machine": {
-        "machine_type": "slurm",
-        "hostname": "localhost",
-        "port": 22,
-        "username": "Angus",
-        "work_path": "....../work"
-      },
-      "resources": {
-        "numb_node": 1,
-        "numb_gpu": 1,
-        "task_per_node": 2,
-        "partition": "AdminGPU",
-        "exclude_list": [],
-        "source_list": [
-          "......./lmp_tf112_float.env"
-        ],
-        "module_list": [],
-        "time_limit": "23:0:0",
-        "qos": "data"
-      },
-      "command": "lmp_serial",
-      "group_size": 1
-    }
-  ],
-  "fp": [
-    {
-      "machine": {
-        "machine_type": "slurm",
-        "hostname": "localhost",
-        "port": 22,
-        "username": "Angus",
-        "work_path": "....../work"
-      },
-      "resources": {
-        "task_per_node": 4,
-        "numb_gpu": 1,
-        "exclude_list": [],
-        "with_mpi": false,
-        "source_list": [],
-        "module_list": [
-          "mpich/3.2.1-intel-2017.1",
-          "vasp/5.4.4-intel-2017.1",
-          "cuda/10.1"
-        ],
-        "time_limit": "120:0:0",
-        "partition": "AdminGPU",
-        "_comment": "that's All"
-      },
-      "command": "vasp_gpu",
-      "group_size": 1
-    }
-  ]
+"train": [
+{
+"machine": {
+"machine_type": "slurm",
+"hostname": "localhost",
+"port": 22,
+"username": "Angus",
+"work_path": "....../work"
+},
+"resources": {
+"numb_node": 1,
+"numb_gpu": 1,
+"task_per_node": 4,
+"partition": "AdminGPU",
+"exclude_list": [],
+"source_list": [
+"....../train_tf112_float.env"
+],
+"module_list": [],
+"time_limit": "23:0:0",
+"qos": "data"
+},
+"deepmd_path": "....../tf1120-lowprec"
+}
+],
+"model_devi": [
+{
+"machine": {
+"machine_type": "slurm",
+"hostname": "localhost",
+"port": 22,
+"username": "Angus",
+"work_path": "....../work"
+},
+"resources": {
+"numb_node": 1,
+"numb_gpu": 1,
+"task_per_node": 2,
+"partition": "AdminGPU",
+"exclude_list": [],
+"source_list": [
+"......./lmp_tf112_float.env"
+],
+"module_list": [],
+"time_limit": "23:0:0",
+"qos": "data"
+},
+"command": "lmp_serial",
+"group_size": 1
+}
+],
+"fp": [
+{
+"machine": {
+"machine_type": "slurm",
+"hostname": "localhost",
+"port": 22,
+"username": "Angus",
+"work_path": "....../work"
+},
+"resources": {
+"task_per_node": 4,
+"numb_gpu": 1,
+"exclude_list": [],
+"with_mpi": false,
+"source_list": [],
+"module_list": [
+"mpich/3.2.1-intel-2017.1",
+"vasp/5.4.4-intel-2017.1",
+"cuda/10.1"
+],
+"time_limit": "120:0:0",
+"partition": "AdminGPU",
+"_comment": "that's All"
+},
+"command": "vasp_gpu",
+"group_size": 1
+}
+]
 }
 ```
 Following table illustrates which key is needed for three types of machine: `train`,`model_devi`  and `fp`. Each of them is a list of dicts. Each dict can be considered as an independent environmnet for calculation.
 
- Key   | `train`          | `model_devi`                                                    | `fp`                                                     |
+Key   | `train`          | `model_devi`                                                    | `fp`                                                     |
 | :---------------- | :--------------------- | :-------------------------------------- | :-------------------------------------------------------------|
 | machine | NEED  | NEED | NEED
 | resources | NEED | NEED | NEED
@@ -749,7 +767,7 @@ Following table illustrates which key is needed for three types of machine: `tra
 The following table gives explicit descriptions on keys in param.json.
 
 
- Key   | Type       | Example                                                  | Discription                                                     |
+Key   | Type       | Example                                                  | Discription                                                     |
 | :---------------- | :--------------------- | :-------------------------------------- | :-------------------------------------------------------------|
 |deepmd_path | String |"......tf1120-lowprec" | Installed directory of DeepMD-Kit 0.x, which should contain `bin lib include`.
 | python_path | String | "....../python3.6/bin/python" | Python path for DeePMD-kit 1.x installed. This option should not be used with `deepmd_path` together.
@@ -780,9 +798,9 @@ mem_limit | Interger | 16 | Maximal memory permitted to apply for the job.
     - Size of `sel_a` and actual types of atoms in your system.
     - Index of `sys_configs` and `sys_idx`
 
-2. Please verify the directories of `sys_configs`. If there isnt's any POSCAR for `01.model_devi` in one iteration, it may happen that you write the false path of `sys_configs`.
-3. Correct format of JSON file.
-4. In `02.fp`, total cores you require through `task_per_node` should be devided by `npar` times `kpar`.
-5. The frames of one system should be larger than `batch_size` and `numb_test` in `default_training_param`. It happens that one iteration adds only a few structures and causes error in next iteration's training. In this condition, you may let `fp_task_min` be larger than `numb_test`.
+        2. Please verify the directories of `sys_configs`. If there isnt's any POSCAR for `01.model_devi` in one iteration, it may happen that you write the false path of `sys_configs`.
+        3. Correct format of JSON file.
+        4. In `02.fp`, total cores you require through `task_per_node` should be devided by `npar` times `kpar`.
+        5. The frames of one system should be larger than `batch_size` and `numb_test` in `default_training_param`. It happens that one iteration adds only a few structures and causes error in next iteration's training. In this condition, you may let `fp_task_min` be larger than `numb_test`.
 ## License
 The project dpgen is licensed under [GNU LGPLv3.0](./LICENSE).

From d78e500c9b6fe276986f71bed5e043bcd9123d90 Mon Sep 17 00:00:00 2001
From: robinzhuang <38876805+robinzyb@users.noreply.github.com>
Date: Sat, 23 Nov 2019 19:23:24 +0100
Subject: [PATCH 030/109] modify

---
 README.md | 51 ++++++++++++++++++++++++++-------------------------
 1 file changed, 26 insertions(+), 25 deletions(-)

diff --git a/README.md b/README.md
index 5b2d92fc8..e1a8ed8b6 100644
--- a/README.md
+++ b/README.md
@@ -509,39 +509,40 @@ Key  | Type          | Example
 |**fp_params["NumberPulay"]** | Integer| 5 | Controls the Pulay convergence accelerator.
 | *fp_style == cp2k*
 | **fp_params** | Dict |  Parameters for cp2k calculation. find detail in manual.cp2k.org. only the kind section must be set before use.  we assume that you have basic knowledge for cp2k input. |
+
 #### Rules for cp2k input at dictionary form
-Converting cp2k input is very simple as dictionary used to dpgen input. You just need follow some simple rule:
+  Converting cp2k input is very simple as dictionary used to dpgen input. You just need follow some simple rule:
 - kind section parameter must be provide
 - replace `keyword` in cp2k as `keyword` in dict.
 - replace `keyword parameter` in cp2k as `value` in dict.
 - replace `section name` in cp2k as `keyword` in dict. . The corresponding value is a `dict`.
-- repalce `section parameter` in cp2k as `value` with dict. keyword `"_"`(underscore)
-- `repeat section` in cp2k just need to be written once with repeat parameter as list.
-
-    Here are examples for setting:
-
-    ```python
-
-    #minimal information you should provide for input
-    "user_fp_params":   {
-    "FORCE_EVAL":{
-    "DFT":{
-    "BASIS_SET_FILE_NAME": "path",
-    "POTENTIAL_FILE_NAME": "path"
-    }
-    "SUBSYS":{
-    "KIND":{
-    "_": ["N","C","H"],
-    "POTENTIAL": ["GTH-PBE-q5","GTH-PBE-q4", "GTH-PBE-q1"],
-    "BASIS_SET": ["DZVP-MOLOPT-GTH","DZVP-MOLOPT-GTH","DZVP-MOLOPT-GTH"]
-    }
-    }
-    }
-
-    }
+- repalce `section parameter` in cp2k as `value` with dict. keyword `"_"`
+- `repeat section` in cp2k just need to be written once with repeat parameter as list. 
+
+Here are examples for setting:
+
+```python
+
+#minimal information you should provide for input
+"user_fp_params":   {
+"FORCE_EVAL":{
+"DFT":{
+"BASIS_SET_FILE_NAME": "path",
+"POTENTIAL_FILE_NAME": "path"
+}
+"SUBSYS":{
+"KIND":{
+"_": ["N","C","H"],
+"POTENTIAL": ["GTH-PBE-q5","GTH-PBE-q4", "GTH-PBE-q1"],
+"BASIS_SET": ["DZVP-MOLOPT-GTH","DZVP-MOLOPT-GTH","DZVP-MOLOPT-GTH"]
+}
+}
+}
+}
 ```
 
 
+
 ## Test: Auto-test for Deep Generator
 At this step, we assume that you have prepared some graph files like `graph.*.pb` and the particular pseudopotential `POTCAR`.
 

From 711db9a40a8c9646cf879fef9c4635f2330a7e37 Mon Sep 17 00:00:00 2001
From: robinzhuang <38876805+robinzyb@users.noreply.github.com>
Date: Sat, 23 Nov 2019 19:26:55 +0100
Subject: [PATCH 031/109] modify

---
 README.md | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index e1a8ed8b6..1897fe836 100644
--- a/README.md
+++ b/README.md
@@ -508,7 +508,7 @@ Key  | Type          | Example
 |**fp_params["mixingweight"]** | Float| 0.05 | Proportion a of output Density Matrix to be used for the input Density Matrix of next SCF cycle (linear mixing).
 |**fp_params["NumberPulay"]** | Integer| 5 | Controls the Pulay convergence accelerator.
 | *fp_style == cp2k*
-| **fp_params** | Dict |  Parameters for cp2k calculation. find detail in manual.cp2k.org. only the kind section must be set before use.  we assume that you have basic knowledge for cp2k input. |
+| **fp_params** | Dict |  |Parameters for cp2k calculation. find detail in manual.cp2k.org. only the kind section must be set before use.  we assume that you have basic knowledge for cp2k input. 
 
 #### Rules for cp2k input at dictionary form
   Converting cp2k input is very simple as dictionary used to dpgen input. You just need follow some simple rule:
@@ -524,6 +524,8 @@ Here are examples for setting:
 ```python
 
 #minimal information you should provide for input
+#other we have set other parameters in code, if you want to
+#use your own paramter, just write a corresponding dictionary
 "user_fp_params":   {
 "FORCE_EVAL":{
 "DFT":{

From 31b96ff25c4a4b06c8cbb44eacd579c0acddcfb5 Mon Sep 17 00:00:00 2001
From: robinzhuang <38876805+robinzyb@users.noreply.github.com>
Date: Sat, 23 Nov 2019 19:30:28 +0100
Subject: [PATCH 032/109] construct new cp2k input format

---
 README.md                                     |   2 +-
 dpgen/generator/lib/cp2k.py                   | 286 ++++++++++--------
 .../param-pyridine-cp2k-choose-vdw.json       | 115 -------
 tests/generator/param-pyridine-cp2k.json      |  22 +-
 tests/generator/test_make_fp.py               | 107 -------
 5 files changed, 178 insertions(+), 354 deletions(-)
 delete mode 100644 tests/generator/param-pyridine-cp2k-choose-vdw.json

diff --git a/README.md b/README.md
index 1897fe836..52862ef58 100644
--- a/README.md
+++ b/README.md
@@ -524,7 +524,7 @@ Here are examples for setting:
 ```python
 
 #minimal information you should provide for input
-#other we have set other parameters in code, if you want to
+#we have set other parameters in code, if you want to
 #use your own paramter, just write a corresponding dictionary
 "user_fp_params":   {
 "FORCE_EVAL":{
diff --git a/dpgen/generator/lib/cp2k.py b/dpgen/generator/lib/cp2k.py
index ec3edf963..bbe877786 100644
--- a/dpgen/generator/lib/cp2k.py
+++ b/dpgen/generator/lib/cp2k.py
@@ -1,24 +1,173 @@
 import dpdata
 import numpy as np
 
-def make_section(section_name, section_value = None):
-    if section_value == None :
-        temp_section = '&' + section_name + '\n'
-        temp_section += '&END ' + section_name + '\n'
-    else :
-        temp_section = '&' + section_name + ' ' + section_value + '\n'
-        temp_section += '&END ' + section_name + '\n'
-    return temp_section
-
-def section_add_subsection(section_string, subsection_string):
-    section_string, section_end = section_string.rsplit('\n', 2)[0:2]
-    section_string += '\n' + subsection_string + section_end + '\n'
-    return section_string
-
-def section_add_keyword_and_value(section_string, keyword, keyword_value):
-    section_string, section_end = section_string.rsplit('\n', 2)[0:2]
-    section_string += '\n' + keyword + ' ' + keyword_value + '\n' + section_end + '\n'
-    return section_string
+default_config={
+  "GLOBAL": {
+    "PROJECT": "DPGEN"
+  },
+  "FORCE_EVAL": {
+    "METHOD": "QS",
+    "STRESS_TENSOR": "ANALYTICAL",
+    "DFT": {
+      "BASIS_SET_FILE_NAME": "./cp2k_basis_pp_file/BASIS_MOLOPT",
+      "POTENTIAL_FILE_NAME": "./cp2k_basis_pp_file/GTH_POTENTIALS",
+      "CHARGE": 0,
+      "UKS": "F",
+      "MULTIPLICITY": 1,
+      "MGRID": {
+        "CUTOFF": 400,
+        "REL_CUTOFF": 50,
+        "NGRIDS": 4
+      },
+      "QS": {
+        "EPS_DEFAULT": "1.0E-12"
+      },
+      "SCF": {
+        "SCF_GUESS": "ATOMIC",
+        "EPS_SCF": "1.0E-6",
+        "MAX_SCF": 50,
+        "OT": {
+          "MINIMIZER": "DIIS",
+          "PRECONDITIONER": "FULL_SINGLE_INVERSE"
+        }
+      },
+      "XC": {
+        "XC_FUNCTIONAL": {
+          "_": "PBE"
+        }
+
+      }
+    },
+    "SUBSYS": {
+        "CELL":{
+            "A": "10 .0 .0",
+            "B": ".0 10 .0",
+            "C": ".0 .0 10"
+            },
+      "COORD": {"@include": "coord.xyz"},
+      "KIND": {
+          "_": ["H","C","N"],
+          "POTENTIAL": ["GTH-PBE-q1","GTH-PBE-q4", "GTH-PBE-q5"],
+          "BASIS_SET": ["DZVP-MOLOPT-GTH","DZVP-MOLOPT-GTH","DZVP-MOLOPT-GTH"]
+          }
+      },
+    "PRINT": {
+      "FORCES": {
+        "_": "ON"
+    }
+  }
+}
+}
+
+def update_dict(old_d, update_d):
+    """
+    a method to recursive update dict
+    :old_d: old dictionary
+    :update_d: some update value written in dictionary form
+    """
+    import collections
+    for k, v in update_d.items():
+        if (k in old_d and isinstance(old_d[k], dict) and isinstance(update_d[k], collections.Mapping)):
+            update_dict(old_d[k], update_d[k])
+        else:
+            old_d[k] = update_d[k]
+
+def iterdict(d, out_list, flag=None):
+    """
+    :doc: a recursive expansion of dictionary into cp2k input
+    :k: current key
+    :v: current value
+    :d: current dictionary under expansion
+    :flag: used to record dictionary state. if flag is None,
+    it means we are in top level dict. flag is a string.
+    """
+    for k,v in d.items():
+        k=str(k) # cast key into string
+        #if value is dictionary
+        if isinstance(v, dict):
+            # flag == None, it is now in top level section of cp2k
+            if flag==None :
+                out_list.append("&"+k)
+                out_list.append("&END "+k)
+                iterdict(v, out_list, k)
+            # flag is not None, now it has name of section
+            else:
+                index = out_list.index("&END " + flag)
+                out_list.insert(index, "&"+k)
+                out_list.insert(index+1,"&END "+k )
+                iterdict(v, out_list, k)
+        elif isinstance(v, list):
+#            print("we have encountered the repeat section!")
+            index = out_list.index("&"+flag)
+            # delete the current constructed repeat section
+            del out_list[index:index+2]
+            # do a loop over key and corresponding list
+            k_tmp_list = []
+            v_list_tmp_list = []
+            for k_tmp, v_tmp in d.items():
+                k_tmp_list.append(str(k_tmp))
+                v_list_tmp_list.append(v_tmp)
+            for repeat_keyword in zip(*v_list_tmp_list):
+                out_list.insert(index,"&" + flag)
+                out_list.insert(index+1, "&END " + flag)
+                for idx, k_tmp in enumerate(k_tmp_list):
+                    if k_tmp == "_":
+                        out_list[index] = "&" + flag + " " + repeat_keyword[idx]
+                    else:
+                        out_list.insert(index+1, k_tmp+" "+repeat_keyword[idx])
+
+            break
+
+        else:
+            v=str(v)
+            if flag==None :
+                out_list.append(k+" "+v)
+                print (k,":",v)
+            else:
+                if k == "_":
+                    index = out_list.index("&" + flag)
+                    out_list[index] = ("&" + flag + " " + v)
+
+                else:
+                    index = out_list.index("&END "+flag)
+                    out_list.insert(index, k+" "+v)
+
+
+def make_cp2k_input(sys_data, fp_params):
+    #covert cell to cell string
+    cell = sys_data['cells'][0]
+    cell = np.reshape(cell, [3,3])
+    cell_a = np.array2string(cell[0,:])
+    cell_a = cell_a[1:-1]
+    cell_b = np.array2string(cell[1,:])
+    cell_b = cell_b[1:-1]
+    cell_c = np.array2string(cell[2,:])
+    cell_c = cell_c[1:-1]
+
+    #get update from user
+    user_config=fp_params
+    #get update from cell
+    cell_config={"FORCE_EVAL":{
+        "SUBSYS":{
+            "CELL":{
+                "A": cell_a,
+                "B": cell_b,
+                "C": cell_c
+                }
+            }
+        }
+            }
+
+    update_dict(default_config, user_config)
+    update_dict(default_config, cell_config)
+    #output list
+    input_str = []
+    iterdict(default_config, input_str)
+    string="\n".join(input_str)
+    return string
+
+
+
 
 def make_cp2k_xyz(sys_data):
     #get structral information
@@ -34,107 +183,6 @@ def make_cp2k_xyz(sys_data):
         x += str(kind) + ' ' + str(coord[:])[1:-1] + '\n'
     return x
 
-def make_cp2k_input(sys_data, fp_params):
-
-    #covert cell to cell string
-    cell = sys_data['cells'][0]
-    cell = np.reshape(cell, [3,3])
-    cell_a = np.array2string(cell[0,:])
-    cell_a = cell_a[1:-1]
-    cell_b = np.array2string(cell[1,:])
-    cell_b = cell_b[1:-1]
-    cell_c = np.array2string(cell[2,:])
-    cell_c = cell_c[1:-1]
-
-
-    #made global section
-    global_section = make_section('GLOBAL')
-    global_section = section_add_keyword_and_value(global_section, 'PROJECT', 'DPGEN')
-
-    #made force_eval section
-    mgrid_section = make_section('MGRID')
-    mgrid_section = section_add_keyword_and_value(mgrid_section, 'CUTOFF', fp_params['cutoff'])
-    mgrid_section = section_add_keyword_and_value(mgrid_section, 'REL_CUTOFF', fp_params['rel_cutoff'])
-    mgrid_section = section_add_keyword_and_value(mgrid_section, 'NGRIDS', '4')
-
-    qs_section = make_section('QS')
-    qs_section = section_add_keyword_and_value(qs_section, 'EPS_DEFAULT', '1.0E-12')
-
-    ot_section = make_section('OT')
-    ot_section = section_add_keyword_and_value(ot_section, 'MINIMIZER', 'DIIS')
-    ot_section = section_add_keyword_and_value(ot_section, 'PRECONDITIONER', 'FULL_SINGLE_INVERSE')
-
-#    outer_scf_section = make_section('OUTER_SCF')
-#    outer_scf_section = section_add_keyword_and_value(outer_scf_section, 'MAX_SCF', None)
-#    outer_scf_section = section_add_keyword_and_value(outer_scf_section, 'EPS_SCF', None)
-
-    scf_section = make_section('SCF')
-    scf_section = section_add_keyword_and_value(scf_section, 'SCF_GUESS', 'ATOMIC')
-    scf_section = section_add_keyword_and_value(scf_section, 'EPS_SCF', '1.0E-6')
-    scf_section = section_add_keyword_and_value(scf_section, 'MAX_SCF', fp_params['max_scf'])
-    scf_section = section_add_subsection(scf_section, ot_section)
-#    scf_section = section_add_subsection(scf_section, outer_scf_section)
-
-    xc_functional_section = make_section('XC_FUNCTIONAL', fp_params['functional'])
-    if 'pair_potential_type' in fp_params :
-        pair_potential_section = make_section('PAIR_POTENTIAL')
-        pair_potential_section = section_add_keyword_and_value(pair_potential_section, 'TYPE', fp_params['pair_potential_type'])
-        pair_potential_section = section_add_keyword_and_value(pair_potential_section, 'PARAMETER_FILE_NAME', fp_params['pair_potential_path'])
-        pair_potential_section = section_add_keyword_and_value(pair_potential_section, 'REFERENCE_FUNCTIONAL',fp_params['pair_ref_functional'])
-
-    if 'pair_potential_type' in fp_params :
-        vdw_potential_section = make_section('VDW_POTENTIAL')
-        vdw_potential_section = section_add_keyword_and_value(vdw_potential_section, 'DISPERSION_FUNCTIONAL', 'PAIR_POTENTIAL')
-        vdw_potential_section = section_add_subsection(vdw_potential_section, pair_potential_section)
-
-
-    xc_section = make_section('XC')
-    xc_section = section_add_subsection(xc_section, xc_functional_section)
-    if 'pair_potential_type' in fp_params :
-        xc_section = section_add_subsection(xc_section, vdw_potential_section)
-
-
-    dft_section = make_section('DFT')
-    dft_section = section_add_keyword_and_value(dft_section, 'BASIS_SET_FILE_NAME', fp_params['basis_path'])
-    dft_section = section_add_keyword_and_value(dft_section, 'POTENTIAL_FILE_NAME', fp_params['pp_path'])
-    dft_section = section_add_keyword_and_value(dft_section, 'CHARGE', '0')
-    dft_section = section_add_keyword_and_value(dft_section, 'UKS', 'F')
-    dft_section = section_add_keyword_and_value(dft_section, 'MULTIPLICITY', '1')
-    dft_section = section_add_subsection(dft_section, mgrid_section)
-    dft_section = section_add_subsection(dft_section, qs_section)
-    dft_section = section_add_subsection(dft_section, scf_section)
-    dft_section = section_add_subsection(dft_section, xc_section)
-
-    cell_section = make_section('CELL')
-    cell_section = section_add_keyword_and_value(cell_section, 'A', cell_a)
-    cell_section = section_add_keyword_and_value(cell_section, 'B', cell_b)
-    cell_section = section_add_keyword_and_value(cell_section, 'C', cell_c)
-
-    coord_section = make_section('COORD')
-    coord_section = section_add_keyword_and_value(coord_section, '@include', 'coord.xyz')
-
-    subsys_section = make_section('SUBSYS')
-    subsys_section = section_add_subsection(subsys_section, cell_section)
-    subsys_section = section_add_subsection(subsys_section, coord_section)
-
-    for kind, basis, potential in zip(fp_params['element_list'], fp_params['basis_list'], fp_params['pp_list']) :
-        kind_section = make_section('KIND', kind)
-        kind_section = section_add_keyword_and_value(kind_section, 'BASIS_SET', basis)
-        kind_section = section_add_keyword_and_value(kind_section, 'POTENTIAL', potential)
-        subsys_section = section_add_subsection(subsys_section, kind_section)
-
-    forces_section = make_section('FORCES', 'ON')
-
-    print_section = make_section('PRINT')
-    print_section = section_add_subsection(print_section, forces_section)
-
-    force_eval_section = make_section('FORCE_EVAL')
-    force_eval_section = section_add_keyword_and_value(force_eval_section, 'METHOD', 'QS')
-    force_eval_section = section_add_keyword_and_value(force_eval_section, 'STRESS_TENSOR', 'ANALYTICAL')
-    force_eval_section = section_add_subsection(force_eval_section, dft_section)
-    force_eval_section = section_add_subsection(force_eval_section, subsys_section)
-    force_eval_section = section_add_subsection(force_eval_section, print_section)
-    return global_section + force_eval_section
 
 
 
diff --git a/tests/generator/param-pyridine-cp2k-choose-vdw.json b/tests/generator/param-pyridine-cp2k-choose-vdw.json
deleted file mode 100644
index 824613714..000000000
--- a/tests/generator/param-pyridine-cp2k-choose-vdw.json
+++ /dev/null
@@ -1,115 +0,0 @@
-{    
-    "type_map":		["C", "H", "N"],
-    "mass_map":		[16, 2, 14],
-    
-    "init_data_prefix":	"/home/linfengz/SCR/wanghan/deepgen.pyridine/init",
-    "init_data_sys":	["Pyridine-I",
-			 "Pyridine-II"
-			],
-    "init_batch_size":	[1, 1],
-    "sys_configs":	[
-	["/home/linfengz/SCR/wanghan/data/pyridine/pyI.POSCAR.01x01x01/01.scale_pert/sys-0080-0080-0016/scale-1.000/00009?/POSCAR"],
-	["/home/linfengz/SCR/wanghan/data/pyridine/pyI.POSCAR.01x01x01/01.scale_pert/sys-0080-0080-0016/scale-1.000/0000[7-8]?/POSCAR"],
-	["/home/linfengz/SCR/wanghan/data/pyridine/pyI.POSCAR.01x01x01/01.scale_pert/sys-0080-0080-0016/scale-1.000/0000[5-6]?/POSCAR"],
-	["/home/linfengz/SCR/wanghan/data/pyridine/pyI.POSCAR.01x01x01/01.scale_pert/sys-0080-0080-0016/scale-1.000/0000[0-4]?/POSCAR"],
-	["/home/linfengz/SCR/wanghan/data/pyridine/pyII.POSCAR.01x01x01/01.scale_pert/sys-0080-0080-0016/scale-1.000/00009?/POSCAR"],
-	["/home/linfengz/SCR/wanghan/data/pyridine/pyII.POSCAR.01x01x01/01.scale_pert/sys-0080-0080-0016/scale-1.000/0000[7-8]?/POSCAR"],
-	["/home/linfengz/SCR/wanghan/data/pyridine/pyII.POSCAR.01x01x01/01.scale_pert/sys-0080-0080-0016/scale-1.000/0000[5-6]?/POSCAR"],
-	["/home/linfengz/SCR/wanghan/data/pyridine/pyII.POSCAR.01x01x01/01.scale_pert/sys-0080-0080-0016/scale-1.000/0000[0-4]?/POSCAR"]
-    ],
-    "_comment":		"0  1  2  3",
-    "_comment":		"4  5  6  7",
-    "sys_batch_size":	[1, 1, 1, 1,
-			 1, 1, 1, 1
-			],
-
-    "_comment":		" 00.train ",
-    "numb_models":	4,
-    "train_param":	"input.json",
-    "default_training_param" : {
-	"_comment": " model parameters",
-	"use_smooth":		true,
-	"sel_a":		[81, 81, 20],
-	"rcut_smth":		0.50,
-	"rcut":			6.50,
-	"filter_neuron":	[25, 50, 100],
-	"filter_resnet_dt":	false,
-	"n_axis_neuron":	12,
-	"n_neuron":		[240, 240, 240],
-	"resnet_dt":		true,
-	"coord_norm":		true,
-	"type_fitting_net":	false,
-
-	"_comment": " traing controls",
-	"systems":		[],
-	"set_prefix":		"set",    
-	"stop_batch":		400000,
-	"batch_size":		1,
-	"start_lr":		0.002,
-	"decay_steps":		2000,
-	"decay_rate":		0.95,
-	"seed":			0,
-
-	"start_pref_e":		0.02,
-	"limit_pref_e":		2,
-	"start_pref_f":		1000,
-	"limit_pref_f":		1,
-	"start_pref_v":		0.0,
-	"limit_pref_v":		0.0,
-
-	"_comment": " display and restart",
-	"_comment": " frequencies counted in batch",
-	"disp_file":		"lcurve.out",
-	"disp_freq":		2000,
-	"numb_test":		10,
-	"save_freq":		20000,
-	"save_ckpt":		"model.ckpt",
-	"load_ckpt":		"model.ckpt",
-	"disp_training":	true,
-	"time_training":	true,
-	"profiling":		false,
-	"profiling_file":	"timeline.json",
-
-	"_comment":		"that's all"
-    },
-
-    "_comment":		" 01.model_devi ",
-    "_comment": "model_devi_skip: the first x of the recorded frames",
-    "model_devi_dt":		0.001,
-    "model_devi_skip":		0,
-    "model_devi_f_trust_lo":	0.050,
-    "model_devi_f_trust_hi":	0.150,
-    "model_devi_e_trust_lo":	1e10,
-    "model_devi_e_trust_hi":	1e10,
-    "model_devi_clean_traj":	false,
-    "model_devi_jobs":	[
-	{"sys_idx": [0,4], "temps": [  50], "press": [1e0,1e1,1e2,1e3,1e4,2e4,4e4], "trj_freq": 10, "nsteps": 1000, "ensemble": "npt", "_idx": "00"},
-	{"sys_idx": [1,5], "temps": [  50], "press": [1e0,1e1,1e2,1e3,1e4,2e4,4e4], "trj_freq": 10, "nsteps": 1000, "ensemble": "npt", "_idx": "01"},
-	{"sys_idx": [0,4], "temps": [  50], "press": [1e0,1e1,1e2,1e3,1e4,2e4,4e4], "trj_freq": 10, "nsteps": 1000, "ensemble": "npt", "_idx": "02"},
-	{"sys_idx": [1,5], "temps": [  50], "press": [1e0,1e1,1e2,1e3,1e4,2e4,4e4], "trj_freq": 10, "nsteps": 1000, "ensemble": "npt", "_idx": "03"},
-	{"sys_idx": [0,4], "temps": [ 100], "press": [1e0,1e1,1e2,1e3,1e4,2e4,4e4], "trj_freq": 10, "nsteps": 1000, "ensemble": "npt", "_idx": "04"},
-	{"sys_idx": [1,5], "temps": [ 100], "press": [1e0,1e1,1e2,1e3,1e4,2e4,4e4], "trj_freq": 10, "nsteps": 1000, "ensemble": "npt", "_idx": "05"},
-	{"sys_idx": [0,4], "temps": [ 100], "press": [1e0,1e1,1e2,1e3,1e4,2e4,4e4], "trj_freq": 10, "nsteps": 1000, "ensemble": "npt", "_idx": "06"},
-	{"sys_idx": [1,5], "temps": [ 100], "press": [1e0,1e1,1e2,1e3,1e4,2e4,4e4], "trj_freq": 10, "nsteps": 1000, "ensemble": "npt", "_idx": "07"}
-    ],
-
-    "_comment":		" 02.fp ",
-    "fp_style":		"cp2k",
-    "shuffle_poscar":	false,
-    "fp_task_max":	100,
-    "fp_task_min":	10,
-    "fp_pp_path":	".",
-    "fp_pp_files":	[],
-    "user_fp_params":	{
-	"cutoff": "400",
-	"rel_cutoff": "50",
-    "functional": "PBE",
-    "max_scf": "320",
-    "basis_path": "./cp2k_basis_pp_file/BASIS_MOLOPT",
-    "pp_path": "./cp2k_basis_pp_file/GTH_POTENTIALS",
-    "element_list": ["H", "C", "N"],
-    "basis_list": ["DZVP-MOLOPT-GTH", "DZVP-MOLOPT-GTH", "DZVP-MOLOPT-GTH"],
-    "pp_list": ["GTH-PBE-q1", "GTH-PBE-q4", "GTH-PBE-q5"]
-    },
-    "_comment":		" that's all "
-}
diff --git a/tests/generator/param-pyridine-cp2k.json b/tests/generator/param-pyridine-cp2k.json
index 77796bb17..d1e27aafc 100644
--- a/tests/generator/param-pyridine-cp2k.json
+++ b/tests/generator/param-pyridine-cp2k.json
@@ -101,18 +101,16 @@
     "fp_pp_path":	".",
     "fp_pp_files":	[],
     "user_fp_params":	{
-	"cutoff": "400",
-	"rel_cutoff": "50",
-    "functional": "PBE",
-    "max_scf": "50",
-    "pair_potential_type": "DFTD3",
-    "pair_potential_path": "./cp2k_basis_pp_file/dftd3.dat",
-    "pair_ref_functional": "PBE",
-    "basis_path": "./cp2k_basis_pp_file/BASIS_MOLOPT",
-    "pp_path": "./cp2k_basis_pp_file/GTH_POTENTIALS",
-    "element_list": ["H", "C", "N"],
-    "basis_list": ["DZVP-MOLOPT-GTH", "DZVP-MOLOPT-GTH", "DZVP-MOLOPT-GTH"],
-    "pp_list": ["GTH-PBE-q1", "GTH-PBE-q4", "GTH-PBE-q5"]
+        "FORCE_EVAL":{
+         "SUBSYS":{
+             "KIND":{
+                "_": ["N","C","H"],
+               "POTENTIAL": ["GTH-PBE-q5","GTH-PBE-q4", "GTH-PBE-q1"],
+               "BASIS_SET": ["DZVP-MOLOPT-GTH","DZVP-MOLOPT-GTH","DZVP-MOLOPT-GTH"]
+                 }
+             }
+         }
+        
     },
     "_comment":		" that's all "
 }
diff --git a/tests/generator/test_make_fp.py b/tests/generator/test_make_fp.py
index 963002974..e77e94d36 100644
--- a/tests/generator/test_make_fp.py
+++ b/tests/generator/test_make_fp.py
@@ -160,14 +160,6 @@
 &XC\n\
 &XC_FUNCTIONAL PBE\n\
 &END XC_FUNCTIONAL\n\
-&VDW_POTENTIAL\n\
-DISPERSION_FUNCTIONAL PAIR_POTENTIAL\n\
-&PAIR_POTENTIAL\n\
-TYPE DFTD3\n\
-PARAMETER_FILE_NAME ./cp2k_basis_pp_file/dftd3.dat\n\
-REFERENCE_FUNCTIONAL PBE\n\
-&END PAIR_POTENTIAL\n\
-&END VDW_POTENTIAL\n\
 &END XC\n\
 &END DFT\n\
 &SUBSYS\n\
@@ -196,65 +188,6 @@
 &END FORCE_EVAL\n"
 
 
-cp2k_input_ref_v1="\
-&GLOBAL\n\
-PROJECT DPGEN\n\
-&END GLOBAL\n\
-&FORCE_EVAL\n\
-METHOD QS\n\
-STRESS_TENSOR ANALYTICAL\n\
-&DFT\n\
-BASIS_SET_FILE_NAME ./cp2k_basis_pp_file/BASIS_MOLOPT\n\
-POTENTIAL_FILE_NAME ./cp2k_basis_pp_file/GTH_POTENTIALS\n\
-CHARGE 0\n\
-UKS F\n\
-MULTIPLICITY 1\n\
-&MGRID\n\
-CUTOFF 400\n\
-REL_CUTOFF 50\n\
-NGRIDS 4\n\
-&END MGRID\n\
-&QS\n\
-EPS_DEFAULT 1.0E-12\n\
-&END QS\n\
-&SCF\n\
-SCF_GUESS ATOMIC\n\
-EPS_SCF 1.0E-6\n\
-MAX_SCF 320\n\
-&OT\n\
-MINIMIZER DIIS\n\
-PRECONDITIONER FULL_SINGLE_INVERSE\n\
-&END OT\n\
-&END SCF\n\
-&XC\n\
-&XC_FUNCTIONAL PBE\n\
-&END XC_FUNCTIONAL\n\
-&END XC\n\
-&END DFT\n\
-&SUBSYS\n\
-&CELL\n\
-&END CELL\n\
-&COORD\n\
-@include coord.xyz\n\
-&END COORD\n\
-&KIND H\n\
-BASIS_SET DZVP-MOLOPT-GTH\n\
-POTENTIAL GTH-PBE-q1\n\
-&END KIND\n\
-&KIND C\n\
-BASIS_SET DZVP-MOLOPT-GTH\n\
-POTENTIAL GTH-PBE-q4\n\
-&END KIND\n\
-&KIND N\n\
-BASIS_SET DZVP-MOLOPT-GTH\n\
-POTENTIAL GTH-PBE-q5\n\
-&END KIND\n\
-&END SUBSYS\n\
-&PRINT\n\
-&FORCES ON\n\
-&END FORCES\n\
-&END PRINT\n\
-&END FORCE_EVAL\n"
 
 
 def _box2lmpbox(orig, box) :
@@ -534,21 +467,6 @@ def _check_cp2k_input_head(testCase, idx) :
         lines_check = lines[:cell_start_idx+1] + lines[cell_end_idx:]
         testCase.assertEqual(('\n'.join(lines_check)).strip(), cp2k_input_ref.strip())
 
-def _check_cp2k_input_head_v1(testCase, idx) :
-    fp_path = os.path.join('iter.%06d' % idx, '02.fp')
-    tasks = glob.glob(os.path.join(fp_path, 'task.*'))
-    for ii in tasks :
-        ifile = os.path.join(ii, 'input.inp')
-        testCase.assertTrue(os.path.isfile(ifile))
-        with open(ifile) as fp:
-            lines = fp.read().split('\n')
-        for idx, jj in enumerate(lines) :
-            if '&CELL' in jj :
-                cell_start_idx = idx
-            if '&END CELL' in jj :
-                cell_end_idx = idx
-        lines_check = lines[:cell_start_idx+1] + lines[cell_end_idx:]
-        testCase.assertEqual(('\n'.join(lines_check)).strip(), cp2k_input_ref_v1.strip())
 
 
 class TestMakeFPPwscf(unittest.TestCase):
@@ -859,31 +777,6 @@ def test_make_fp_cp2k(self):
         _check_cp2k_input_head(self, 0)
         _check_potcar(self, 0, jdata['fp_pp_path'], jdata['fp_pp_files'])
         shutil.rmtree('iter.000000')
-    def test_make_fp_cp2k_choose_vdw(self):
-        if os.path.isdir('iter.000000') :
-            shutil.rmtree('iter.000000')
-        with open (param_cp2k_file_v1, 'r') as fp :
-            jdata = json.load (fp)
-        with open (machine_file, 'r') as fp:
-            mdata = json.load (fp)
-        md_descript = []
-        nsys = 2
-        nmd = 3
-        n_frame = 10
-        for ii in range(nsys) :
-            tmp = []
-            for jj in range(nmd) :
-                tmp.append(np.arange(0, 0.29, 0.29/10))
-            md_descript.append(tmp)
-        atom_types = [0, 1, 2, 2, 0, 1]
-        type_map = jdata['type_map']
-        _make_fake_md(0, md_descript, atom_types, type_map)
-        make_fp(0, jdata, {})
-        _check_sel(self, 0, jdata['fp_task_max'], jdata['model_devi_f_trust_lo'], jdata['model_devi_f_trust_hi'])
-        _check_poscars(self, 0, jdata['fp_task_max'], jdata['type_map'])
-        _check_cp2k_input_head_v1(self, 0)
-        _check_potcar(self, 0, jdata['fp_pp_path'], jdata['fp_pp_files'])
-        shutil.rmtree('iter.000000')
 
 
 if __name__ == '__main__':

From 35e7eb72278d053b0bc99df1b27165ec7a4bbd39 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Sat, 23 Nov 2019 21:23:19 -0500
Subject: [PATCH 033/109] add badges to readme

---
 README.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/README.md b/README.md
index 9e98fc48f..6896b9ddd 100644
--- a/README.md
+++ b/README.md
@@ -19,6 +19,10 @@
 
 
 ## About DP-GEN
+
+[![GitHub release](https://img.shields.io/github/release/deepmodeling/dpgen.svg?maxAge=86400)](https://github.com/deepmodeling/dpgen/releases/)
+[![arxiv:1910.12690](http://img.shields.io/badge/arXiv-1910.12690-B31B1B.svg?maxAge=86400)](https://arxiv.org/abs/1910.12690)
+
 DP-GEN (Deep Generator)  is a software written in Python, delicately designed to generate a deep learning based model of interatomic potential energy and force field. DP-GEN is depedent on DeepMD-kit (https://github.com/deepmodeling/deepmd-kit/blob/master/README.md). With highly scalable interface with common softwares for molecular simulation, DP-GEN is capable to  automatically prepare scripts and maintain job queues on HPC machines (High Performance Cluster) and analyze results
 ### Highlighted features
 + **Accurate and efficient**: DP-GEN is capable to sample more than tens of million structures and select only a few for first principles calculation. DP-GEN will finally obtain a uniformly accurate model.

From 1bba3c4d6fc65c94e052b821be04c7fd1b786233 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Sat, 23 Nov 2019 23:20:52 -0500
Subject: [PATCH 034/109] format RuntimeError message

---
 dpgen/dispatcher/SSHContext.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dpgen/dispatcher/SSHContext.py b/dpgen/dispatcher/SSHContext.py
index e580c0f5a..185ade392 100644
--- a/dpgen/dispatcher/SSHContext.py
+++ b/dpgen/dispatcher/SSHContext.py
@@ -138,7 +138,7 @@ def block_checkcall(self,
         stdin, stdout, stderr = self.ssh.exec_command(('cd %s ;' % self.remote_root) + cmd)
         exit_status = stdout.channel.recv_exit_status() 
         if exit_status != 0:
-            raise RuntimeError("Get error code %d in calling %s through ssh with job: %s . message:",
+            raise RuntimeError("Get error code %d in calling %s through ssh with job: %s . message: %s" %
                                (exit_status, cmd, self.job_uuid, stderr.read().decode('utf-8')))
         return stdin, stdout, stderr    
 

From 0f8d71c72faf2ca43cd3a7c92aead9f6cb5ae569 Mon Sep 17 00:00:00 2001
From: Han Wang <wang_han@iapcm.ac.cn>
Date: Wed, 27 Nov 2019 10:01:40 +0800
Subject: [PATCH 035/109] refactorize dispatcher: support submit, and check all
 finish

---
 dpgen/dispatcher/Dispatcher.py | 92 +++++++++++++++++++++++++---------
 1 file changed, 69 insertions(+), 23 deletions(-)

diff --git a/dpgen/dispatcher/Dispatcher.py b/dpgen/dispatcher/Dispatcher.py
index 39a988543..9f96ef18a 100644
--- a/dpgen/dispatcher/Dispatcher.py
+++ b/dpgen/dispatcher/Dispatcher.py
@@ -77,6 +77,35 @@ def run_jobs(self,
                  forward_task_deference = True,
                  outlog = 'log',
                  errlog = 'err') :
+        job_handler = self.submit_jobs(resources,
+                                       command,
+                                       work_path,
+                                       tasks,
+                                       group_size,
+                                       forward_common_files,
+                                       forward_task_files,
+                                       backward_task_files,
+                                       forward_task_deference,
+                                       outlog,
+                                       errlog)
+        while not self.all_finished(job_handler) :
+            time.sleep(10)
+        # delete path map file when job finish
+        # _pmap.delete()
+
+
+    def submit_jobs(self,
+                    resources,
+                    command,
+                    work_path,
+                    tasks,
+                    group_size,
+                    forward_common_files,
+                    forward_task_files,
+                    backward_task_files,
+                    forward_task_deference = True,
+                    outlog = 'log',
+                    errlog = 'err') :
         # task_chunks = [
         #     [os.path.basename(j) for j in tasks[i:i + group_size]] \
         #     for i in range(0, len(tasks), group_size)
@@ -118,7 +147,7 @@ def run_jobs(self,
                 if job_uuid is None:
                     rjob['batch'].submit(chunk, command, res = resources, outlog=outlog, errlog=errlog)
                     job_uuid = rjob['context'].job_uuid
-                    dlog.debug('assigned uudi %s for %s ' % (job_uuid, task_chunks_[ii]))
+                    dlog.debug('assigned uuid %s for %s ' % (job_uuid, task_chunks_[ii]))
                     dlog.info('new submission of %s' % job_uuid)
                 else:
                     rjob['batch'].submit(chunk, command, res = resources, outlog=outlog, errlog=errlog, restart = True)
@@ -133,29 +162,46 @@ def run_jobs(self,
 
         assert(len(job_list) == len(task_chunks))
         fcount = [0]*len(job_list)
-        while not all(job_fin) :
-            dlog.debug('checking jobs')
-            for idx,rjob in enumerate(job_list) :
-                if not job_fin[idx] :
-                    status = rjob['batch'].check_status()
-                    job_uuid = rjob['context'].job_uuid
-                    if status == JobStatus.terminated :
-                        fcount[idx] += 1
-                        if fcount[idx] > 3:
-                            raise RuntimeError('Job %s failed for more than 3 times' % job_uuid)
-                        dlog.info('job %s terminated, submit again'% job_uuid)
-                        dlog.debug('try %s times for %s'% (fcount[idx], job_uuid))
-                        rjob['batch'].submit(task_chunks[idx], command, res = resources, outlog=outlog, errlog=errlog,restart=True)
-                    elif status == JobStatus.finished :
-                        dlog.info('job %s finished' % job_uuid)
-                        rjob['context'].download(task_chunks[idx], backward_task_files)
-                        rjob['context'].clean()
-                        job_fin[idx] = True
-                        _fr.write_record(job_fin)
-            time.sleep(10)
-        # delete path map file when job finish
-        _pmap.delete()
 
+        job_handler = {
+            'task_chunks': task_chunks,
+            'fin_record': _fr,
+            'job_list': job_list,
+            'fail_count': fcount,
+            'backward_task_files': backward_task_files,
+        }
+        return job_handler
+
+
+    def all_finished(self, 
+                     job_handler):
+        task_chunks = job_handler['task_chunks']
+        _fr = job_handler['fin_record']
+        job_fin = _fr.get_record()
+        fcount = job_handler['fail_count']
+        job_list = job_handler['job_list']
+        backward_task_files = job_handler['backward_task_files']
+        dlog.debug('checking jobs')
+        for idx,rjob in enumerate(job_list) :
+            if not job_fin[idx] :
+                status = rjob['batch'].check_status()
+                job_uuid = rjob['context'].job_uuid
+                if status == JobStatus.terminated :
+                    fcount[idx] += 1
+                    if fcount[idx] > 3:
+                        raise RuntimeError('Job %s failed for more than 3 times' % job_uuid)
+                    dlog.info('job %s terminated, submit again'% job_uuid)
+                    dlog.debug('try %s times for %s'% (fcount[idx], job_uuid))
+                    rjob['batch'].submit(task_chunks[idx], command, res = resources, outlog=outlog, errlog=errlog,restart=True)
+                elif status == JobStatus.finished :
+                    dlog.info('job %s finished' % job_uuid)
+                    rjob['context'].download(task_chunks[idx], backward_task_files)
+                    rjob['context'].clean()
+                    job_fin[idx] = True
+                    _fr.write_record(job_fin)
+        return all(job_fin)
+
+        
 
 class FinRecord(object):
     def __init__ (self, path, njobs, fname = 'fin.record'):

From 9fc955e23b7aa084c6d91fbd05d4c4fa7d28b63a Mon Sep 17 00:00:00 2001
From: Han Wang <wang_han@iapcm.ac.cn>
Date: Wed, 27 Nov 2019 13:45:37 +0800
Subject: [PATCH 036/109] fix bug of local context

---
 dpgen/dispatcher/LocalContext.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/dpgen/dispatcher/LocalContext.py b/dpgen/dispatcher/LocalContext.py
index e9e304418..4549b307a 100644
--- a/dpgen/dispatcher/LocalContext.py
+++ b/dpgen/dispatcher/LocalContext.py
@@ -113,7 +113,10 @@ def download(self,
                     elif (os.path.exists(rfile)) and (os.path.exists(lfile)) :
                         # both exists, replace!
                         dlog.info('find existing %s, replacing by %s' % (lfile, rfile))
-                        shutil.rmtree(lfile)
+                        if os.path.isdir(lfile):
+                            shutil.rmtree(lfile)
+                        elif os.path.isfile(lfile) or os.path.islink(lfile):
+                            os.remove(lfile)
                         shutil.move(rfile, lfile)
                     else :
                         raise RuntimeError('should not reach here!')

From 288d7e851dae6efec1a001cf7d2691cf4ce9f473 Mon Sep 17 00:00:00 2001
From: Han Wang <wang_han@iapcm.ac.cn>
Date: Wed, 27 Nov 2019 13:45:50 +0800
Subject: [PATCH 037/109] refactorize Dispatcher. better job record

---
 dpgen/dispatcher/Dispatcher.py | 203 ++++++++++++++++++---------------
 1 file changed, 113 insertions(+), 90 deletions(-)

diff --git a/dpgen/dispatcher/Dispatcher.py b/dpgen/dispatcher/Dispatcher.py
index 9f96ef18a..3e7b4e6d4 100644
--- a/dpgen/dispatcher/Dispatcher.py
+++ b/dpgen/dispatcher/Dispatcher.py
@@ -1,4 +1,4 @@
-import os,sys,time,random
+import os,sys,time,random,json
 
 from dpgen.dispatcher.LocalContext import LocalSession
 from dpgen.dispatcher.LocalContext import LocalContext
@@ -13,8 +13,6 @@
 from dpgen.dispatcher.JobStatus import JobStatus
 from dpgen import dlog
 from hashlib import sha1
-from monty.serialization import dumpfn,loadfn
-
 
 def _split_tasks(tasks,
                  group_size):
@@ -35,7 +33,8 @@ class Dispatcher(object):
     def __init__ (self,
                   remote_profile,
                   context_type = 'local',
-                  batch_type = 'slurm'):
+                  batch_type = 'slurm', 
+                  job_record = 'jr.json'):
         self.remote_profile = remote_profile
         if context_type == 'local':
             self.session = LocalSession(remote_profile)
@@ -63,7 +62,7 @@ def __init__ (self,
             self.batch = AWS
         else :
             raise RuntimeError('unknown batch ' + batch_type)
-
+        self.jrname = job_record
 
     def run_jobs(self,
                  resources,
@@ -106,30 +105,30 @@ def submit_jobs(self,
                     forward_task_deference = True,
                     outlog = 'log',
                     errlog = 'err') :
+        self.backward_task_files = backward_task_files
         # task_chunks = [
         #     [os.path.basename(j) for j in tasks[i:i + group_size]] \
         #     for i in range(0, len(tasks), group_size)
         # ]
         task_chunks = _split_tasks(tasks, group_size)    
-        _pmap=PMap(work_path)
-        path_map=_pmap.load()
-        _fr = FinRecord(work_path, len(task_chunks))        
-
+        task_chunks_str = ['+'.join(ii) for ii in task_chunks]
+        task_hashes = [sha1(ii.encode('utf-8')).hexdigest() for ii in task_chunks_str]
+        job_record = JobRecord(work_path, task_chunks, fname = self.jrname)
+        nchunks = len(task_chunks)
+        
         job_list = []
-        task_chunks_=['+'.join(ii) for ii in task_chunks]
-        job_fin = _fr.get_record()
-        assert(len(job_fin) == len(task_chunks))
-        for ii,chunk in enumerate(task_chunks) :
-            if not job_fin[ii] :
-                # map chunk info. to uniq id    
-                chunk_sha1 = sha1(task_chunks_[ii].encode('utf-8')).hexdigest() 
-                # if hash in map, recover job, else start a new job
-                if chunk_sha1 in path_map:
-                    # job_uuid = path_map[chunk_sha1][1].split('/')[-1]
-                    job_uuid = path_map[chunk_sha1][2]
-                    dlog.debug("load uuid %s for chunk %s" % (job_uuid, task_chunks_[ii]))
-                else:
+        for ii in range(nchunks) :            
+            cur_chunk = task_chunks[ii]
+            cur_hash = task_hashes[ii]
+            if not job_record.check_finished(cur_hash):                
+                # chunk is not finished
+                # check if chunk is submitted
+                submitted = job_record.check_submitted(cur_hash)
+                if not submitted:
                     job_uuid = None
+                else :
+                    job_uuid = job_record.get_uuid(cur_hash)
+                    dlog.debug("load uuid %s for chunk %s" % (job_uuid, cur_hash))
                 # communication context, bach system
                 context = self.context(work_path, self.session, job_uuid)
                 batch = self.batch(context, uuid_names = self.uuid_names)
@@ -138,37 +137,35 @@ def submit_jobs(self,
                 if not rjob['context'].check_file_exists('tag_upload'):
                     rjob['context'].upload('.',
                                            forward_common_files)
-                    rjob['context'].upload(chunk,
+                    rjob['context'].upload(cur_chunk,
                                            forward_task_files, 
                                            dereference = forward_task_deference)
                     rjob['context'].write_file('tag_upload', '')
-                    dlog.debug('uploaded files for %s' % task_chunks_[ii])
+                    dlog.debug('uploaded files for %s' % task_chunks_str[ii])
                 # submit new or recover old submission
-                if job_uuid is None:
-                    rjob['batch'].submit(chunk, command, res = resources, outlog=outlog, errlog=errlog)
+                if not submitted:
+                    rjob['batch'].submit(cur_chunk, command, res = resources, outlog=outlog, errlog=errlog)
                     job_uuid = rjob['context'].job_uuid
-                    dlog.debug('assigned uuid %s for %s ' % (job_uuid, task_chunks_[ii]))
-                    dlog.info('new submission of %s' % job_uuid)
+                    dlog.debug('assigned uuid %s for %s ' % (job_uuid, task_chunks_str[ii]))
+                    dlog.info('new submission of %s for chunk %s' % (job_uuid, cur_hash))
                 else:
-                    rjob['batch'].submit(chunk, command, res = resources, outlog=outlog, errlog=errlog, restart = True)
-                    dlog.info('restart from old submission %s ' % job_uuid)
-                # record job and its hash
+                    rjob['batch'].submit(cur_chunk, command, res = resources, outlog=outlog, errlog=errlog, restart = True)
+                    dlog.info('restart from old submission %s for chunk %s' % (job_uuid, cur_hash))
+                # record job and its remote context
                 job_list.append(rjob)
-                path_map[chunk_sha1] = [context.local_root, context.remote_root, job_uuid]
+                job_record.record_remote_context(cur_hash,                                                 
+                                                 context.local_root, 
+                                                 context.remote_root, 
+                                                 job_uuid)
             else :
                 # finished job, append a None to list
                 job_list.append(None)
-        _pmap.dump(path_map)
-
-        assert(len(job_list) == len(task_chunks))
-        fcount = [0]*len(job_list)
-
+        job_record.dump()
+        assert(len(job_list) == nchunks)
         job_handler = {
             'task_chunks': task_chunks,
-            'fin_record': _fr,
             'job_list': job_list,
-            'fail_count': fcount,
-            'backward_task_files': backward_task_files,
+            'job_record': job_record,            
         }
         return job_handler
 
@@ -176,81 +173,107 @@ def submit_jobs(self,
     def all_finished(self, 
                      job_handler):
         task_chunks = job_handler['task_chunks']
-        _fr = job_handler['fin_record']
-        job_fin = _fr.get_record()
-        fcount = job_handler['fail_count']
+        task_chunks_str = ['+'.join(ii) for ii in task_chunks]
+        task_hashes = [sha1(ii.encode('utf-8')).hexdigest() for ii in task_chunks_str]
         job_list = job_handler['job_list']
-        backward_task_files = job_handler['backward_task_files']
+        job_record = job_handler['job_record']
         dlog.debug('checking jobs')
-        for idx,rjob in enumerate(job_list) :
-            if not job_fin[idx] :
+        nchunks = len(task_chunks)
+        for idx in range(nchunks) :
+            cur_hash = task_hashes[idx]
+            rjob = job_list[idx]
+            if not job_record.check_finished(cur_hash) :
+                # chunk not finished according to record
                 status = rjob['batch'].check_status()
                 job_uuid = rjob['context'].job_uuid
+                dlog.debug('checked job %s' % job_uuid)
                 if status == JobStatus.terminated :
-                    fcount[idx] += 1
-                    if fcount[idx] > 3:
+                    job_record.increase_nfail(cur_hash)
+                    if job_record.check_nfail(cur_hash) > 3:
                         raise RuntimeError('Job %s failed for more than 3 times' % job_uuid)
                     dlog.info('job %s terminated, submit again'% job_uuid)
                     dlog.debug('try %s times for %s'% (fcount[idx], job_uuid))
                     rjob['batch'].submit(task_chunks[idx], command, res = resources, outlog=outlog, errlog=errlog,restart=True)
                 elif status == JobStatus.finished :
                     dlog.info('job %s finished' % job_uuid)
-                    rjob['context'].download(task_chunks[idx], backward_task_files)
+                    rjob['context'].download(task_chunks[idx], self.backward_task_files)
                     rjob['context'].clean()
-                    job_fin[idx] = True
-                    _fr.write_record(job_fin)
-        return all(job_fin)
+                    job_record.record_finish(cur_hash)
+                    job_record.dump()
+        job_record.dump()
+        return job_record.check_all_finished()
 
-        
 
-class FinRecord(object):
-    def __init__ (self, path, njobs, fname = 'fin.record'):
+class JobRecord(object):
+    def __init__ (self, path, task_chunks, fname = 'job_record.json'):
         self.path = os.path.abspath(path)
         self.fname = os.path.join(self.path, fname)
-        self.njobs = njobs
-
-    def get_record(self):
+        self.task_chunks = task_chunks
         if not os.path.exists(self.fname):
-            return [False] * self.njobs
+            self._new_record()
         else :
-            with open(self.fname) as fp:
-                return [bool(int(ii)) for ii in fp.read().split()]
+            self.load()
 
-    def write_record(self, job_fin):
-        with open(self.fname, 'w') as fp:
-            for ii in job_fin:
-                if ii:
-                    fp.write('1 ')
-                else:
-                    fp.write('0 ')
+    def check_submitted(self, chunk_hash):
+        self.valid_hash(chunk_hash)
+        return self.record[chunk_hash]['context'] is not None
+
+    def record_remote_context(self, 
+                              chunk_hash, 
+                              local_root, 
+                              remote_root, 
+                              job_uuid):
+        self.valid_hash(chunk_hash)
+        self.record[chunk_hash]['context'] = [local_root, remote_root, job_uuid]
+
+    def get_uuid(self, chunk_hash):
+        self.valid_hash(chunk_hash)
+        return self.record[chunk_hash]['context'][2]
+
+    def check_finished(self, chunk_hash):
+        self.valid_hash(chunk_hash)
+        return self.record[chunk_hash]['finished']
 
+    def check_all_finished(self):
+        flist = [self.record[ii]['finished'] for ii in self.record]
+        return all(flist)
 
-class PMap(object):
-   '''
-   Path map class to operate {read,write,delte} the pmap.json file
-   '''
+    def record_finish(self, chunk_hash):
+        self.valid_hash(chunk_hash)
+        self.record[chunk_hash]['finished'] = True
 
-   def __init__(self,path,fname="pmap.json"):
-       self.f_path_map=os.path.join(path,fname)
+    def check_nfail(self,chunk_hash):
+        self.valid_hash(chunk_hash)
+        return self.record[chunk_hash]['fail_count']
+
+    def increase_nfail(self,chunk_hash):
+        self.valid_hash(chunk_hash)
+        self.record[chunk_hash]['fail_count'] += 1
+
+    def valid_hash(self, chunk_hash):
+        if chunk_hash not in self.record.keys():
+            raise RuntimeError('chunk hash %s not in record, a invalid record may be used, please check file %s' % (chunk_hash, self.fname))
+
+    def dump(self):
+        with open(self.fname, 'w') as fp:
+            json.dump(self.record, fp, indent=4)
 
-   def load(self):
-      f_path_map=self.f_path_map
-      if os.path.isfile(f_path_map):
-         path_map=loadfn(f_path_map)
-      else:
-         path_map={}
-      return path_map
+    def load(self):
+        with open(self.fname) as fp:
+            self.record = json.load(fp)
 
-   def dump(self,pmap,indent=4):
-      f_path_map=self.f_path_map
-      dumpfn(pmap,f_path_map,indent=indent)
+    def _new_record(self):
+        task_chunks_str=['+'.join(ii) for ii in self.task_chunks]
+        task_hash = [sha1(ii.encode('utf-8')).hexdigest() for ii in task_chunks_str]
+        self.record = {}
+        for ii,jj in zip(task_hash, self.task_chunks):
+            self.record[ii] = {
+                'context': None,
+                'finished': False,
+                'fail_count': 0,
+                'task_chunk': jj,
+            }
 
-   def delete(self):
-      f_path_map=self.f_path_map
-      try:
-         os.remove(f_path_map)
-      except:
-         pass
 
 def make_dispatcher(mdata):
     try:

From 63ea080341050d5b6cdb357208ea3a8b11a2d779 Mon Sep 17 00:00:00 2001
From: Han Wang <wang_han@iapcm.ac.cn>
Date: Wed, 27 Nov 2019 15:42:18 +0800
Subject: [PATCH 038/109] fix bug of fcount

---
 dpgen/dispatcher/Dispatcher.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dpgen/dispatcher/Dispatcher.py b/dpgen/dispatcher/Dispatcher.py
index 3e7b4e6d4..9e7818293 100644
--- a/dpgen/dispatcher/Dispatcher.py
+++ b/dpgen/dispatcher/Dispatcher.py
@@ -192,7 +192,7 @@ def all_finished(self,
                     if job_record.check_nfail(cur_hash) > 3:
                         raise RuntimeError('Job %s failed for more than 3 times' % job_uuid)
                     dlog.info('job %s terminated, submit again'% job_uuid)
-                    dlog.debug('try %s times for %s'% (fcount[idx], job_uuid))
+                    dlog.debug('try %s times for %s'% (job_record.check_nfail(cur_hash), job_uuid))
                     rjob['batch'].submit(task_chunks[idx], command, res = resources, outlog=outlog, errlog=errlog,restart=True)
                 elif status == JobStatus.finished :
                     dlog.info('job %s finished' % job_uuid)

From 28809f2539e384591cc4c5c532d49331a635ee37 Mon Sep 17 00:00:00 2001
From: Han Wang <wang_han@iapcm.ac.cn>
Date: Wed, 27 Nov 2019 16:08:20 +0800
Subject: [PATCH 039/109] fix bug of resubmit command

---
 dpgen/dispatcher/Dispatcher.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/dpgen/dispatcher/Dispatcher.py b/dpgen/dispatcher/Dispatcher.py
index 9e7818293..e0175413a 100644
--- a/dpgen/dispatcher/Dispatcher.py
+++ b/dpgen/dispatcher/Dispatcher.py
@@ -165,7 +165,9 @@ def submit_jobs(self,
         job_handler = {
             'task_chunks': task_chunks,
             'job_list': job_list,
-            'job_record': job_record,            
+            'job_record': job_record,
+            'command': command,
+            'backward_task_files': backward_task_files
         }
         return job_handler
 
@@ -177,6 +179,8 @@ def all_finished(self,
         task_hashes = [sha1(ii.encode('utf-8')).hexdigest() for ii in task_chunks_str]
         job_list = job_handler['job_list']
         job_record = job_handler['job_record']
+        command = job_handler['command']
+        backward_task_files = job_handler['backward_task_files']
         dlog.debug('checking jobs')
         nchunks = len(task_chunks)
         for idx in range(nchunks) :
@@ -196,7 +200,7 @@ def all_finished(self,
                     rjob['batch'].submit(task_chunks[idx], command, res = resources, outlog=outlog, errlog=errlog,restart=True)
                 elif status == JobStatus.finished :
                     dlog.info('job %s finished' % job_uuid)
-                    rjob['context'].download(task_chunks[idx], self.backward_task_files)
+                    rjob['context'].download(task_chunks[idx], backward_task_files)
                     rjob['context'].clean()
                     job_record.record_finish(cur_hash)
                     job_record.dump()

From 9c684ff3d006439aa5e7bd4cc340d35417a0c77d Mon Sep 17 00:00:00 2001
From: Han Wang <wang_han@iapcm.ac.cn>
Date: Wed, 27 Nov 2019 16:09:44 +0800
Subject: [PATCH 040/109] fix bug of resubmit resources

---
 dpgen/dispatcher/Dispatcher.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/dpgen/dispatcher/Dispatcher.py b/dpgen/dispatcher/Dispatcher.py
index e0175413a..0e24bc389 100644
--- a/dpgen/dispatcher/Dispatcher.py
+++ b/dpgen/dispatcher/Dispatcher.py
@@ -167,6 +167,7 @@ def submit_jobs(self,
             'job_list': job_list,
             'job_record': job_record,
             'command': command,
+            'resources': resources,
             'backward_task_files': backward_task_files
         }
         return job_handler
@@ -180,6 +181,7 @@ def all_finished(self,
         job_list = job_handler['job_list']
         job_record = job_handler['job_record']
         command = job_handler['command']
+        resources = job_handler['resources']
         backward_task_files = job_handler['backward_task_files']
         dlog.debug('checking jobs')
         nchunks = len(task_chunks)

From d1b08d025a4492912ef508af31f77fa7de24d941 Mon Sep 17 00:00:00 2001
From: Han Wang <wang_han@iapcm.ac.cn>
Date: Wed, 27 Nov 2019 16:12:39 +0800
Subject: [PATCH 041/109] fix bug of logs

---
 dpgen/dispatcher/Dispatcher.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/dpgen/dispatcher/Dispatcher.py b/dpgen/dispatcher/Dispatcher.py
index 0e24bc389..03aaf2a7a 100644
--- a/dpgen/dispatcher/Dispatcher.py
+++ b/dpgen/dispatcher/Dispatcher.py
@@ -168,6 +168,8 @@ def submit_jobs(self,
             'job_record': job_record,
             'command': command,
             'resources': resources,
+            'outlog': outlog,
+            'errlog': errlog,
             'backward_task_files': backward_task_files
         }
         return job_handler
@@ -182,6 +184,8 @@ def all_finished(self,
         job_record = job_handler['job_record']
         command = job_handler['command']
         resources = job_handler['resources']
+        outlog = job_handler['outlog']
+        errlog = job_handler['errlog']
         backward_task_files = job_handler['backward_task_files']
         dlog.debug('checking jobs')
         nchunks = len(task_chunks)

From 05e9fac5457b8fad41b54eacee3280c4e4dd8885 Mon Sep 17 00:00:00 2001
From: Han Wang <wang_han@iapcm.ac.cn>
Date: Wed, 27 Nov 2019 16:24:48 +0800
Subject: [PATCH 042/109] update unittest

---
 tests/dispatcher/context.py               |  2 +-
 tests/dispatcher/test_dispatcher_utils.py | 40 +++++++++++------------
 2 files changed, 21 insertions(+), 21 deletions(-)

diff --git a/tests/dispatcher/context.py b/tests/dispatcher/context.py
index 4519ca9e6..1ab29dc9a 100644
--- a/tests/dispatcher/context.py
+++ b/tests/dispatcher/context.py
@@ -7,7 +7,7 @@
 from dpgen.dispatcher.LazyLocalContext import LazyLocalContext
 from dpgen.dispatcher.SSHContext import SSHSession
 from dpgen.dispatcher.SSHContext import SSHContext
-from dpgen.dispatcher.Dispatcher import FinRecord
+# from dpgen.dispatcher.Dispatcher import FinRecord
 from dpgen.dispatcher.Dispatcher import _split_tasks
 
 from dpgen.dispatcher.LocalContext import _identical_files
diff --git a/tests/dispatcher/test_dispatcher_utils.py b/tests/dispatcher/test_dispatcher_utils.py
index 00b6c5fbc..01f0e0a1f 100644
--- a/tests/dispatcher/test_dispatcher_utils.py
+++ b/tests/dispatcher/test_dispatcher_utils.py
@@ -3,29 +3,29 @@
 
 sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
 __package__ = 'dispatcher'
-from .context import FinRecord
+# from .context import FinRecord
 from .context import _split_tasks
 from .context import setUpModule
 
-class TestFinRecord(unittest.TestCase):
-    def setUp(self):
-        self.njobs = 10
-        self.fr = FinRecord('.', self.njobs)
-
-    def tearDown(self):
-        if os.path.isfile('fin.record'):
-            os.remove('fin.record')
-
-    def test_all_false(self) :
-        recd = self.fr.get_record()
-        self.assertEqual(recd, [False]*self.njobs)
-
-    def test_write_read(self) :
-        recd = self.fr.get_record()
-        recd[self.njobs//3] = True
-        self.fr.write_record(recd)
-        recd1 = self.fr.get_record()
-        self.assertEqual(recd, recd1)
+# class TestFinRecord(unittest.TestCase):
+#     def setUp(self):
+#         self.njobs = 10
+#         self.fr = FinRecord('.', self.njobs)
+
+#     def tearDown(self):
+#         if os.path.isfile('fin.record'):
+#             os.remove('fin.record')
+
+#     def test_all_false(self) :
+#         recd = self.fr.get_record()
+#         self.assertEqual(recd, [False]*self.njobs)
+
+#     def test_write_read(self) :
+#         recd = self.fr.get_record()
+#         recd[self.njobs//3] = True
+#         self.fr.write_record(recd)
+#         recd1 = self.fr.get_record()
+#         self.assertEqual(recd, recd1)
 
 class TestDispatchSplit(unittest.TestCase):
     def test_split(self):

From 509d5802ecb5e4c80567a5831208fe513742103e Mon Sep 17 00:00:00 2001
From: robinzhuang <38876805+robinzyb@users.noreply.github.com>
Date: Wed, 27 Nov 2019 16:54:17 +0100
Subject: [PATCH 043/109] reset readme file

---
 README.md | 765 ++++++++++++++++++++++++++----------------------------
 1 file changed, 374 insertions(+), 391 deletions(-)

diff --git a/README.md b/README.md
index 52862ef58..6896b9ddd 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,5 @@
 # DP-GEN Manual
 
-
 ## Table of Contents
    * [DP-GEN Manual](#dp-gen-manual)
       * [Table of Contents](#table-of-contents)
@@ -20,6 +19,10 @@
 
 
 ## About DP-GEN
+
+[![GitHub release](https://img.shields.io/github/release/deepmodeling/dpgen.svg?maxAge=86400)](https://github.com/deepmodeling/dpgen/releases/)
+[![arxiv:1910.12690](http://img.shields.io/badge/arXiv-1910.12690-B31B1B.svg?maxAge=86400)](https://arxiv.org/abs/1910.12690)
+
 DP-GEN (Deep Generator)  is a software written in Python, delicately designed to generate a deep learning based model of interatomic potential energy and force field. DP-GEN is depedent on DeepMD-kit (https://github.com/deepmodeling/deepmd-kit/blob/master/README.md). With highly scalable interface with common softwares for molecular simulation, DP-GEN is capable to  automatically prepare scripts and maintain job queues on HPC machines (High Performance Cluster) and analyze results
 ### Highlighted features
 + **Accurate and efficient**: DP-GEN is capable to sample more than tens of million structures and select only a few for first principles calculation. DP-GEN will finally obtain a uniformly accurate model.
@@ -39,14 +42,14 @@ DP-GEN (Deep Generator)  is a software written in Python, delicately designed to
 
 + tests : unittest tools for developers.
 
-    One can easily run DP-GEN with :
-    ```
-    dpgen TASK PARAM MACHINE
-    ```
+One can easily run DP-GEN with :
+```
+dpgen TASK PARAM MACHINE
+```
 
-    where TASK is the key word, PARAM and MACHINE are both JSON files.
+where TASK is the key word, PARAM and MACHINE are both JSON files.
 
-    Options for TASK:
+Options for TASK:
 * `init_bulk` : Generating initial data for bulk systems.
 * `init_surf` : Generating initial data for surface systems.
 * `run` : Main process of Deep Generator.
@@ -82,13 +85,13 @@ Path:    /home/me/miniconda3/envs/py363/lib/python3.6/site-packages/dpgen-0.5.1.
 
 Dependency
 ------------
-numpy     1.17.2   /home/me/miniconda3/envs/py363/lib/python3.6/site-packages/numpy
-dpdata     0.1.10   /home/me/miniconda3/envs/py363/lib/python3.6/site-packages/dpdata-0.1.10-py3.6.egg/dpdata
-pymatgen   2019.7.2   /home/me/miniconda3/envs/py363/lib/python3.6/site-packages/pymatgen
-monty      2.0.4   /home/me/miniconda3/envs/py363/lib/python3.6/site-packages/monty
-ase     3.17.0   /home/me/miniconda3/envs/py363/lib/python3.6/site-packages/ase-3.17.0-py3.6.egg/ase
-paramiko      2.6.0   /home/me/miniconda3/envs/py363/lib/python3.6/site-packages/paramiko
-custodian  2019.2.10   /home/me/miniconda3/envs/py363/lib/python3.6/site-packages/custodian
+     numpy     1.17.2   /home/me/miniconda3/envs/py363/lib/python3.6/site-packages/numpy
+    dpdata     0.1.10   /home/me/miniconda3/envs/py363/lib/python3.6/site-packages/dpdata-0.1.10-py3.6.egg/dpdata
+  pymatgen   2019.7.2   /home/me/miniconda3/envs/py363/lib/python3.6/site-packages/pymatgen
+     monty      2.0.4   /home/me/miniconda3/envs/py363/lib/python3.6/site-packages/monty
+       ase     3.17.0   /home/me/miniconda3/envs/py363/lib/python3.6/site-packages/ase-3.17.0-py3.6.egg/ase
+  paramiko      2.6.0   /home/me/miniconda3/envs/py363/lib/python3.6/site-packages/paramiko
+ custodian  2019.2.10   /home/me/miniconda3/envs/py363/lib/python3.6/site-packages/custodian
 
 Description
 ------------
@@ -100,17 +103,17 @@ commands with their own options. To see the options for the sub-commands, type
 "dpgen sub-command -h".
 
 positional arguments:
-{init_surf,init_bulk,run,run/report,test,db}
-init_surf           Generating initial data for surface systems.
-init_bulk           Generating initial data for bulk systems.
-run                 Main process of Deep Potential Generator.
-run/report          Report the systems and the thermodynamic conditions of
-the labeled frames.
-test                Auto-test for Deep Potential.
-db                  Collecting data from Deep Generator.
+  {init_surf,init_bulk,run,run/report,test,db}
+    init_surf           Generating initial data for surface systems.
+    init_bulk           Generating initial data for bulk systems.
+    run                 Main process of Deep Potential Generator.
+    run/report          Report the systems and the thermodynamic conditions of
+                        the labeled frames.
+    test                Auto-test for Deep Potential.
+    db                  Collecting data from Deep Generator.
 
 optional arguments:
--h, --help            show this help message and exit
+  -h, --help            show this help message and exit
 
 ```
 
@@ -140,23 +143,23 @@ If MACHINE is None, there should be only one stage in stages. Corresponding task
 Following is an example for `PARAM`, which generates data from a typical structure hcp.
 ```json
 {
-"stages" : [1,2,3,4],
-"cell_type":    "hcp",
-"latt":     4.479,
-"super_cell":   [2, 2, 2],
-"elements":     ["Mg"],
-"potcars":      ["....../POTCAR"],
-"relax_incar": "....../INCAR_metal_rlx",
-"md_incar" : "....../INCAR_metal_md",
-"scale":        [1.00],
-"skip_relax":   false,
-"pert_numb":    2,
-"md_nstep" : 5,
-"pert_box":     0.03,
-"pert_atom":    0.01,
-"coll_ndata":   5000,
-"type_map" : [ "Mg", "Al"],
-"_comment":     "that's all"
+    "stages" : [1,2,3,4],
+    "cell_type":    "hcp",
+    "latt":     4.479,
+    "super_cell":   [2, 2, 2],
+    "elements":     ["Mg"],
+    "potcars":      ["....../POTCAR"],
+    "relax_incar": "....../INCAR_metal_rlx",
+    "md_incar" : "....../INCAR_metal_md",
+    "scale":        [1.00],
+    "skip_relax":   false,
+    "pert_numb":    2,
+    "md_nstep" : 5,
+    "pert_box":     0.03,
+    "pert_atom":    0.01,
+    "coll_ndata":   5000,
+    "type_map" : [ "Mg", "Al"],
+    "_comment":     "that's all"
 }
 ```
 
@@ -170,7 +173,7 @@ The following table gives explicit descriptions on keys in `PARAM`.
 
 The bold notation of key (such as **Elements**) means that it's a necessary key.
 
-Key  | Type          | Example                                                      | Discription                                                      |
+ Key  | Type          | Example                                                      | Discription                                                      |
 | :---------------- | :--------------------- | :-------------------------------------- | :-------------------------------------------------------------|
 | **stages** | List of Integer | [1,2,3,4] | Stages for `init_bulk`
 | **Elements** | List of String | ["Mg"] | Atom types
@@ -208,56 +211,56 @@ All stages must be **in order**.
 Following is an example for `PARAM`, which generates data from a typical structure hcp.
 ```json
 {
-"stages": [
-1,
-2
-],
-"cell_type": "fcc",
-"latt": 4.034,
-"super_cell": [
-2,
-2,
-2
-],
-"layer_numb": 3,
-"vacuum_max": 9,
-"vacuum_resol": [
-0.5,
-1
-],
-"mid_point": 4.0,
-"millers": [
-[
-1,
-0,
-0
-],
-[
-1,
-1,
-0
-],
-[
-1,
-1,
-1
-]
-],
-"elements": [
-"Al"
-],
-"potcars": [
-"....../POTCAR"
-],
-"relax_incar": "....../INCAR_metal_rlx_low",
-"scale": [
-1.0
-],
-"skip_relax": true,
-"pert_numb": 2,
-"pert_box": 0.03,
-"pert_atom": 0.01,
-"_comment": "that's all"
+  "stages": [
+    1,
+    2
+  ],
+  "cell_type": "fcc",
+  "latt": 4.034,
+  "super_cell": [
+    2,
+    2,
+    2
+  ],
+  "layer_numb": 3,
+  "vacuum_max": 9,
+  "vacuum_resol": [
+    0.5,
+    1
+  ],
+  "mid_point": 4.0,
+  "millers": [
+    [
+      1,
+      0,
+      0
+    ],
+    [
+      1,
+      1,
+      0
+    ],
+    [
+      1,
+      1,
+      1
+    ]
+  ],
+  "elements": [
+    "Al"
+  ],
+  "potcars": [
+    "....../POTCAR"
+  ],
+  "relax_incar": "....../INCAR_metal_rlx_low",
+  "scale": [
+    1.0
+  ],
+  "skip_relax": true,
+  "pert_numb": 2,
+  "pert_box": 0.03,
+  "pert_atom": 0.01,
+  "_comment": "that's all"
 }
 ```
 
@@ -265,7 +268,7 @@ The following table gives explicit descriptions on keys in `PARAM`.
 
 The bold notation of key (such as **Elements**) means that it's a necessary key.
 
-Key  | Type          | Example                                                      | Discription                                                      |
+ Key  | Type          | Example                                                      | Discription                                                      |
 | :---------------- | :--------------------- | :-------------------------------------- | :-------------------------------------------------------------|
 | **stages** | List of Integer | [1,2,3,4] | Stages for `init_surf`
 | **Elements** | List of String | ["Mg"] | Atom types
@@ -301,9 +304,9 @@ In each iteration, there are three stages of work, namely, `00.train  01.model_d
 
 + 02.fp : Selected structures will be calculated by first principles methods(default VASP). DP-GEN will obtain some new data and put them together with initial data and data generated in previous iterations. After that a new training will be set up and DP-GEN will enter next iteration!
 
-    DP-GEN identifies the current stage by a record file, `record.dpgen`, which will be created and upgraded by codes.Each line contains two number: the first is index of iteration, and the second ,ranging from 0 to 9 ,records which stage in each iteration is currently running.
+DP-GEN identifies the current stage by a record file, `record.dpgen`, which will be created and upgraded by codes.Each line contains two number: the first is index of iteration, and the second ,ranging from 0 to 9 ,records which stage in each iteration is currently running.
 
-    0,1,2 correspond to make_train, run_train, post_train. DP-GEN will write scripts in `make_train`, run the task by specific machine in `run_train` and collect result in `post_train`. The records for model_devi and fp stage follow similar rules.
+0,1,2 correspond to make_train, run_train, post_train. DP-GEN will write scripts in `make_train`, run the task by specific machine in `run_train` and collect result in `post_train`. The records for model_devi and fp stage follow similar rules.
 
 
 In `PARAM`, you can specialize the task as you expect.
@@ -311,134 +314,134 @@ In `PARAM`, you can specialize the task as you expect.
 
 ```json
 {
-"type_map": [
-"H",
-"C"
-],
-"mass_map": [
-1,
-12
-],
-"init_data_prefix": "....../init/",
-"init_data_sys": [
-"CH4.POSCAR.01x01x01/02.md/sys-0004-0001/deepmd"
-],
-"init_batch_size": [
-8
-],
-"sys_configs_prefix": "....../init/",
-"sys_configs": [
-[
-"CH4.POSCAR.01x01x01/01.scale_pert/sys-0004-0001/scale*/00000*/POSCAR"
-],
-[
-"CH4.POSCAR.01x01x01/01.scale_pert/sys-0004-0001/scale*/00001*/POSCAR"
-]
-],
-"sys_batch_size": [
-8,
-8,
-8,
-8
-],
-"_comment": " that's all ",
-"numb_models": 4,
-"train_param": "input.json",
-"default_training_param": {
-"_comment": "that's all",
-"use_smooth": true,
-"sel_a": [
-16,
-4
-],
-"rcut_smth": 0.5,
-"rcut": 5,
-"filter_neuron": [
-10,
-20,
-40
-],
-"filter_resnet_dt": false,
-"n_axis_neuron": 12,
-"n_neuron": [
-100,
-100,
-100
-],
-"resnet_dt": true,
-"coord_norm": true,
-"type_fitting_net": false,
-"systems": [],
-"set_prefix": "set",
-"stop_batch": 40000,
-"batch_size": 1,
-"start_lr": 0.001,
-"decay_steps": 200,
-"decay_rate": 0.95,
-"seed": 0,
-"start_pref_e": 0.02,
-"limit_pref_e": 2,
-"start_pref_f": 1000,
-"limit_pref_f": 1,
-"start_pref_v": 0.0,
-"limit_pref_v": 0.0,
-"disp_file": "lcurve.out",
-"disp_freq": 1000,
-"numb_test": 4,
-"save_freq": 1000,
-"save_ckpt": "model.ckpt",
-"load_ckpt": "model.ckpt",
-"disp_training": true,
-"time_training": true,
-"profiling": false,
-"profiling_file": "timeline.json"
-},
-"model_devi_dt": 0.002,
-"model_devi_skip": 0,
-"model_devi_f_trust_lo": 0.05,
-"model_devi_f_trust_hi": 0.15,
-"model_devi_clean_traj": true,
-"model_devi_jobs": [
-{
-"sys_idx": [
-0
-],
-"temps": [
-100
-],
-"press": [
-1.0
-],
-"trj_freq": 10,
-"nsteps": 300,
-"ensemble": "nvt",
-"_idx": "00"
-},
-{
-"sys_idx": [
-1
-],
-"temps": [
-100
-],
-"press": [
-1.0
-],
-"trj_freq": 10,
-"nsteps": 3000,
-"ensemble": "nvt",
-"_idx": "01"
-}
-],
-"fp_style": "vasp",
-"shuffle_poscar": false,
-"fp_task_max": 20,
-"fp_task_min": 1,
-"fp_pp_path": "....../methane/",
-"fp_pp_files": [
-"POTCAR"
-],
-"fp_incar": "....../INCAR_methane"
+  "type_map": [
+    "H",
+    "C"
+  ],
+  "mass_map": [
+    1,
+    12
+  ],
+  "init_data_prefix": "....../init/",
+  "init_data_sys": [
+    "CH4.POSCAR.01x01x01/02.md/sys-0004-0001/deepmd"
+  ],
+  "init_batch_size": [
+    8
+  ],
+  "sys_configs_prefix": "....../init/",
+  "sys_configs": [
+    [
+      "CH4.POSCAR.01x01x01/01.scale_pert/sys-0004-0001/scale*/00000*/POSCAR"
+    ],
+    [
+      "CH4.POSCAR.01x01x01/01.scale_pert/sys-0004-0001/scale*/00001*/POSCAR"
+    ]
+  ],
+  "sys_batch_size": [
+    8,
+    8,
+    8,
+    8
+  ],
+  "_comment": " that's all ",
+  "numb_models": 4,
+  "train_param": "input.json",
+  "default_training_param": {
+    "_comment": "that's all",
+    "use_smooth": true,
+    "sel_a": [
+      16,
+      4
+    ],
+    "rcut_smth": 0.5,
+    "rcut": 5,
+    "filter_neuron": [
+      10,
+      20,
+      40
+    ],
+    "filter_resnet_dt": false,
+    "n_axis_neuron": 12,
+    "n_neuron": [
+      100,
+      100,
+      100
+    ],
+    "resnet_dt": true,
+    "coord_norm": true,
+    "type_fitting_net": false,
+    "systems": [],
+    "set_prefix": "set",
+    "stop_batch": 40000,
+    "batch_size": 1,
+    "start_lr": 0.001,
+    "decay_steps": 200,
+    "decay_rate": 0.95,
+    "seed": 0,
+    "start_pref_e": 0.02,
+    "limit_pref_e": 2,
+    "start_pref_f": 1000,
+    "limit_pref_f": 1,
+    "start_pref_v": 0.0,
+    "limit_pref_v": 0.0,
+    "disp_file": "lcurve.out",
+    "disp_freq": 1000,
+    "numb_test": 4,
+    "save_freq": 1000,
+    "save_ckpt": "model.ckpt",
+    "load_ckpt": "model.ckpt",
+    "disp_training": true,
+    "time_training": true,
+    "profiling": false,
+    "profiling_file": "timeline.json"
+  },
+  "model_devi_dt": 0.002,
+  "model_devi_skip": 0,
+  "model_devi_f_trust_lo": 0.05,
+  "model_devi_f_trust_hi": 0.15,
+  "model_devi_clean_traj": true,
+  "model_devi_jobs": [
+    {
+      "sys_idx": [
+        0
+      ],
+      "temps": [
+        100
+      ],
+      "press": [
+        1.0
+      ],
+      "trj_freq": 10,
+      "nsteps": 300,
+      "ensemble": "nvt",
+      "_idx": "00"
+    },
+    {
+      "sys_idx": [
+        1
+      ],
+      "temps": [
+        100
+      ],
+      "press": [
+        1.0
+      ],
+      "trj_freq": 10,
+      "nsteps": 3000,
+      "ensemble": "nvt",
+      "_idx": "01"
+    }
+  ],
+  "fp_style": "vasp",
+  "shuffle_poscar": false,
+  "fp_task_max": 20,
+  "fp_task_min": 1,
+  "fp_pp_path": "....../methane/",
+  "fp_pp_files": [
+    "POTCAR"
+  ],
+  "fp_incar": "....../INCAR_methane"
 }
 ```
 
@@ -446,20 +449,20 @@ The following table gives explicit descriptions on keys in `PARAM`.
 
 The bold notation of key (such aas **type_map**) means that it's a necessary key.
 
-Key  | Type          | Example                                                      | Discription                                                      |
+ Key  | Type          | Example                                                      | Discription                                                      |
 | :---------------- | :--------------------- | :-------------------------------------- | :-------------------------------------------------------------|
 | *#Basics*
 | **type_map** | List of string | ["H", "C"] | Atom types
 | **mass_map** | List of float |  [1, 12] | Standard atom weights.
 | **use_ele_temp** | int | 0 | Currently only support fp_style vasp. 0(default): no electron temperature. 1: eletron temperature as frame parameter. 2: electron temperature as atom parameter.
 | *#Data*
-| init_data_prefix | String | "/sharedext4/.../data/" | Prefix of initial data directories
-| ***init_data_sys*** | List of string|["CH4.POSCAR.01x01x01/.../deepmd"] |Directories of initial data. You may use either absolute or relative path here.
-| ***sys_format*** | String | "vasp/poscar" | Format of initial data. It will be `vasp/poscar` if not set.
-| init_multi_systems | Boolean | false | If set to `true`, `init_data_sys` directories should contain sub-directories of various systems. DP-GEN will regard all of these sub-directories as inital data systems.
-| **init_batch_size**   | String of integer     | [8]                                                            | Each number is the batch_size of corresponding system  for training in `init_data_sys`. One recommended rule for setting the `sys_batch_size` and `init_batch_size` is that `batch_size` mutiply number of atoms ot the stucture should be larger than 32. If set to `auto`, batch size will be 32 divided by number of atoms. |
-| sys_configs_prefix | String | "/sharedext4/.../data/" | Prefix of `sys_configs`
-| **sys_configs**   | List of list of string         | [<br />["/sharedext4/.../POSCAR"], <br />["....../POSCAR"]<br />] | Containing directories of structures to be explored in iterations.Wildcard characters are supported here. |
+ | init_data_prefix | String | "/sharedext4/.../data/" | Prefix of initial data directories
+ | ***init_data_sys*** | List of string|["CH4.POSCAR.01x01x01/.../deepmd"] |Directories of initial data. You may use either absolute or relative path here.
+ | ***sys_format*** | String | "vasp/poscar" | Format of initial data. It will be `vasp/poscar` if not set.
+ | init_multi_systems | Boolean | false | If set to `true`, `init_data_sys` directories should contain sub-directories of various systems. DP-GEN will regard all of these sub-directories as inital data systems.
+ | **init_batch_size**   | String of integer     | [8]                                                            | Each number is the batch_size of corresponding system  for training in `init_data_sys`. One recommended rule for setting the `sys_batch_size` and `init_batch_size` is that `batch_size` mutiply number of atoms ot the stucture should be larger than 32. If set to `auto`, batch size will be 32 divided by number of atoms. |
+  | sys_configs_prefix | String | "/sharedext4/.../data/" | Prefix of `sys_configs`
+ | **sys_configs**   | List of list of string         | [<br />["/sharedext4/.../POSCAR"], <br />["....../POSCAR"]<br />] | Containing directories of structures to be explored in iterations.Wildcard characters are supported here. |
 | **sys_batch_size**      | List of integer   | [8, 8]                                                 | Each number  is the batch_size for training of corresponding system in `sys_configs`. If set to `auto`, batch size will be 32 divided by number of atoms. |
 | *#Training*
 | **numb_models**      | Integer      | 4 (recommend)                                                           | Number of models to be trained in `00.train`. |
@@ -468,7 +471,7 @@ Key  | Type          | Example
 | **model_devi_dt** | Float | 0.002 (recommend) | Timestep for MD |
 | **model_devi_skip** | Integer | 0 | Number of structures skipped for fp in each MD
 | **model_devi_f_trust_lo** | Float | 0.05 | Lower bound of forces for the selection.
-| **model_devi_f_trust_hi** | Float | 0.15 | Upper bound of forces for the selection
+ | **model_devi_f_trust_hi** | Float | 0.15 | Upper bound of forces for the selection
 | **model_devi_e_trust_lo**  | Float | 1e10                                                         | Lower bound of energies for the selection. Recommend to set them a high number, since forces provide more precise information. Special cases such as energy minimization may need this. |
 | **model_devi_e_trust_hi**  | Float | 1e10                                                         | Upper bound of energies for the selection. |
 | **model_devi_clean_traj**  | Boolean | true                                                         | Deciding whether to clean traj folders in MD since they are too large. |
@@ -508,40 +511,20 @@ Key  | Type          | Example
 |**fp_params["mixingweight"]** | Float| 0.05 | Proportion a of output Density Matrix to be used for the input Density Matrix of next SCF cycle (linear mixing).
 |**fp_params["NumberPulay"]** | Integer| 5 | Controls the Pulay convergence accelerator.
 | *fp_style == cp2k*
-| **fp_params** | Dict |  |Parameters for cp2k calculation. find detail in manual.cp2k.org. only the kind section must be set before use.  we assume that you have basic knowledge for cp2k input. 
-
-#### Rules for cp2k input at dictionary form
-  Converting cp2k input is very simple as dictionary used to dpgen input. You just need follow some simple rule:
-- kind section parameter must be provide
-- replace `keyword` in cp2k as `keyword` in dict.
-- replace `keyword parameter` in cp2k as `value` in dict.
-- replace `section name` in cp2k as `keyword` in dict. . The corresponding value is a `dict`.
-- repalce `section parameter` in cp2k as `value` with dict. keyword `"_"`
-- `repeat section` in cp2k just need to be written once with repeat parameter as list. 
-
-Here are examples for setting:
-
-```python
-
-#minimal information you should provide for input
-#we have set other parameters in code, if you want to
-#use your own paramter, just write a corresponding dictionary
-"user_fp_params":   {
-"FORCE_EVAL":{
-"DFT":{
-"BASIS_SET_FILE_NAME": "path",
-"POTENTIAL_FILE_NAME": "path"
-}
-"SUBSYS":{
-"KIND":{
-"_": ["N","C","H"],
-"POTENTIAL": ["GTH-PBE-q5","GTH-PBE-q4", "GTH-PBE-q1"],
-"BASIS_SET": ["DZVP-MOLOPT-GTH","DZVP-MOLOPT-GTH","DZVP-MOLOPT-GTH"]
-}
-}
-}
-}
-```
+| **fp_params** | Dict | | Parameters for cp2k calculation. find detail in manual.cp2k.org. if it is not remarked with "optional", the parameter must be set. we assume that you have basic knowledge for cp2k input.
+|**fp_params["cutoff"]**| String | 400 |
+|**fp_params["rel_cutoff"]**| String | 50 |
+|**fp_params["functional"]**| String | PBE |
+|**fp_params["max_scf"]**| String | 50 |
+|**fp_params["pair_potential_type"]**| String | DFTD3 | This is optional.
+|**fp_params["pair_potential_path"]**| String | "./cp2k_basis_pp_file/dftd3.dat" | must be set if you set the "pair_potential_type"
+|**fp_params["pair_ref_functional"]**| String | PBE | must be set if you set the "pair_potential_type"
+|**fp_params["basis_path"]**| String | "./cp2k_basis_pp_file/BASIS_MOLOPT" |
+|**fp_params["pp_path"]**| String | "./cp2k_basis_pp_file/GTH_POTENTIALS" |
+|**fp_params["element_list"]**| List | ["H","C","N"] |
+|**fp_params["basis_list"]**| List | ["DZVP_MOLOPT_GTH","DZVP_MOLOPT_GTH","DZVP_MOLOPT_GTH"] | Must be same order with element_list
+|**fp_params["pp_list"]**| List | ["GTH-PBE-q1","GTH-PBE-q4","GTH-PBE-q5"] | Must be same order with element_list
+
 
 
 
@@ -571,14 +554,14 @@ The whole program contains a series of tasks shown as follows. In each task, the
 We take Al as an example to show the parameter settings of `param.json`.
 The first part is the fundamental setting for particular alloy system.
 ```json
-"_comment": "models",
-"potcar_map" : {
-"Al" : "/somewhere/POTCAR"
-},
-"conf_dir":"confs/Al/std-fcc",
-"key_id":"API key of Material project",
-"task_type":"deepmd",
-"task":"eos",
+    "_comment": "models",
+    "potcar_map" : {
+	"Al" : "/somewhere/POTCAR"
+    },
+    "conf_dir":"confs/Al/std-fcc",
+    "key_id":"API key of Material project",
+    "task_type":"deepmd",
+    "task":"eos",
 ```
 You need to add the specified paths of necessary `POTCAR` files in "potcar_map". The different `POTCAR` paths are separated by commas.
 Then you also need to add the folder path of particular configuration, which contains `POSCAR` file.
@@ -593,65 +576,65 @@ Usually, if you add the relative path of POSCAR as the above format,
 + `task_type` contains 3 optional types for testing, i.e. **vasp**, **deepmd** and **meam**.
 + `task` contains 7 options, **equi**, **eos**, **elastic**, **vacancy**, **interstitial**, **surf** and **all**. The option **all** can do all the tasks.
 
-    It is worth noting that the subsequent tasks need to rely on the calculation results of the equilibrium state, so it is necessary to give priority to the calculation of the equilibrium state while testing. And due to the stable consideration, we recommand you to test the equilibrium state of **vasp** before other tests.
-
-    The second part is the computational settings for vasp and lammps. According to your actual needs， you can choose to add the paths of specific INCAR or use the simplified INCAR by setting `vasp_params`. The priority of specified INCAR is higher than using `vasp_params`. The most important setting is to add the folder path `model_dir` of **deepmd** model and supply the corresponding element type map. Besides, `dpgen test` also is able to call common lammps packages, such as **meam**.
-    ```json
-    "relax_incar":"somewhere/relax_incar",
-    "scf_incar":"somewhere/scf_incar",
-    "vasp_params":	{
-    "ecut":		650,
-    "ediff":	1e-6,
-    "kspacing":	0.1,
-    "kgamma":	false,
-    "npar":		1,
-    "kpar":		1,
-    "_comment":	" that's all "
+It is worth noting that the subsequent tasks need to rely on the calculation results of the equilibrium state, so it is necessary to give priority to the calculation of the equilibrium state while testing. And due to the stable consideration, we recommand you to test the equilibrium state of **vasp** before other tests.
+
+The second part is the computational settings for vasp and lammps. According to your actual needs， you can choose to add the paths of specific INCAR or use the simplified INCAR by setting `vasp_params`. The priority of specified INCAR is higher than using `vasp_params`. The most important setting is to add the folder path `model_dir` of **deepmd** model and supply the corresponding element type map. Besides, `dpgen test` also is able to call common lammps packages, such as **meam**.
+```json
+"relax_incar":"somewhere/relax_incar",
+"scf_incar":"somewhere/scf_incar",
+"vasp_params":	{
+	"ecut":		650,
+	"ediff":	1e-6,
+	"kspacing":	0.1,
+	"kgamma":	false,
+	"npar":		1,
+	"kpar":		1,
+	"_comment":	" that's all "
     },
     "lammps_params":    {
-    "model_dir":"somewhere/example/Al_model",
-    "type_map":["Al"],
-    "model_name":false,
-    "model_param_type":false
+        "model_dir":"somewhere/example/Al_model",
+        "type_map":["Al"],
+        "model_name":false,
+        "model_param_type":false
     },
-    ```
-    The last part is the optional settings for various tasks mentioned above. You can change the parameters according to actual needs.
-    ```json
+```
+The last part is the optional settings for various tasks mentioned above. You can change the parameters according to actual needs.
+```json
     "_comment":"00.equi",
     "alloy_shift":false,
-    ```
+```
 + `alloy_shift`:(boolean) whether to compute the alloy formation energy. If you test alloy and set 'true', you need to compute the energies of corresponding elements respectively first of ßall. Please set 'false' when test single element.
 
-    ```json
+```json
     "_comment": "01.eos",
     "vol_start":	12,
     "vol_end":		22,
     "vol_step":		0.5,
-    ```
+```
 + `vol_start`, `vol_end` and `vol_step` determine the volumetric range and accuracy of the **eos**.
 
-    ```json
+```json
     "_comment": "02.elastic",
     "norm_deform":	2e-2,
     "shear_deform":	5e-2,
-    ```
+```
 + `norm_deform` and `shear_deform` are the scales of material deformation.
-    This task uses the stress-strain relationship to calculate the elastic constant.
+This task uses the stress-strain relationship to calculate the elastic constant.
 
-    ```json
+```json
     "_comment":"03.vacancy",
     "supercell":[3,3,3],
-    ```
+```
 + `supercell`:(list of integer) the supercell size used to generate vacancy defect and interstitial defect
-    ```json
+```json
     "_comment":"04.interstitial",
     "insert_ele":["Al"],
     "reprod-opt":false,
-    ```
+```
 + `insert_ele`:(list of string) the elements used to generate point interstitial defect
 + `repord-opt`:(boolean) whether to reproduce trajectories of interstitial defect
 
-    ```json
+```json
     "_comment": "05.surface",
     "min_slab_size":	10,
     "min_vacuum_size":	11,
@@ -660,7 +643,7 @@ Usually, if you add the relative path of POSCAR as the above format,
     "max_miller": 2,
     "static-opt":false,
     "relax_box":false,
-    ```
+```
 + `min_slab_size` and `min_vacuum_size` are the minimum size of slab thickness  and  the vacuume width.
 + `pert_xz` is the perturbation through xz direction used to compute surface energy.
 + `max_miller` (integer) is the maximum miller index
@@ -676,90 +659,90 @@ When switching into a new machine, you may modifying the `MACHINE`, according to
 An example for `MACHINE` is:
 ```json
 {
-"train": [
-{
-"machine": {
-"machine_type": "slurm",
-"hostname": "localhost",
-"port": 22,
-"username": "Angus",
-"work_path": "....../work"
-},
-"resources": {
-"numb_node": 1,
-"numb_gpu": 1,
-"task_per_node": 4,
-"partition": "AdminGPU",
-"exclude_list": [],
-"source_list": [
-"....../train_tf112_float.env"
-],
-"module_list": [],
-"time_limit": "23:0:0",
-"qos": "data"
-},
-"deepmd_path": "....../tf1120-lowprec"
-}
-],
-"model_devi": [
-{
-"machine": {
-"machine_type": "slurm",
-"hostname": "localhost",
-"port": 22,
-"username": "Angus",
-"work_path": "....../work"
-},
-"resources": {
-"numb_node": 1,
-"numb_gpu": 1,
-"task_per_node": 2,
-"partition": "AdminGPU",
-"exclude_list": [],
-"source_list": [
-"......./lmp_tf112_float.env"
-],
-"module_list": [],
-"time_limit": "23:0:0",
-"qos": "data"
-},
-"command": "lmp_serial",
-"group_size": 1
-}
-],
-"fp": [
-{
-"machine": {
-"machine_type": "slurm",
-"hostname": "localhost",
-"port": 22,
-"username": "Angus",
-"work_path": "....../work"
-},
-"resources": {
-"task_per_node": 4,
-"numb_gpu": 1,
-"exclude_list": [],
-"with_mpi": false,
-"source_list": [],
-"module_list": [
-"mpich/3.2.1-intel-2017.1",
-"vasp/5.4.4-intel-2017.1",
-"cuda/10.1"
-],
-"time_limit": "120:0:0",
-"partition": "AdminGPU",
-"_comment": "that's All"
-},
-"command": "vasp_gpu",
-"group_size": 1
-}
-]
+  "train": [
+    {
+      "machine": {
+        "machine_type": "slurm",
+        "hostname": "localhost",
+        "port": 22,
+        "username": "Angus",
+        "work_path": "....../work"
+      },
+      "resources": {
+        "numb_node": 1,
+        "numb_gpu": 1,
+        "task_per_node": 4,
+        "partition": "AdminGPU",
+        "exclude_list": [],
+        "source_list": [
+          "....../train_tf112_float.env"
+        ],
+        "module_list": [],
+        "time_limit": "23:0:0",
+        "qos": "data"
+      },
+      "deepmd_path": "....../tf1120-lowprec"
+    }
+  ],
+  "model_devi": [
+    {
+      "machine": {
+        "machine_type": "slurm",
+        "hostname": "localhost",
+        "port": 22,
+        "username": "Angus",
+        "work_path": "....../work"
+      },
+      "resources": {
+        "numb_node": 1,
+        "numb_gpu": 1,
+        "task_per_node": 2,
+        "partition": "AdminGPU",
+        "exclude_list": [],
+        "source_list": [
+          "......./lmp_tf112_float.env"
+        ],
+        "module_list": [],
+        "time_limit": "23:0:0",
+        "qos": "data"
+      },
+      "command": "lmp_serial",
+      "group_size": 1
+    }
+  ],
+  "fp": [
+    {
+      "machine": {
+        "machine_type": "slurm",
+        "hostname": "localhost",
+        "port": 22,
+        "username": "Angus",
+        "work_path": "....../work"
+      },
+      "resources": {
+        "task_per_node": 4,
+        "numb_gpu": 1,
+        "exclude_list": [],
+        "with_mpi": false,
+        "source_list": [],
+        "module_list": [
+          "mpich/3.2.1-intel-2017.1",
+          "vasp/5.4.4-intel-2017.1",
+          "cuda/10.1"
+        ],
+        "time_limit": "120:0:0",
+        "partition": "AdminGPU",
+        "_comment": "that's All"
+      },
+      "command": "vasp_gpu",
+      "group_size": 1
+    }
+  ]
 }
 ```
 Following table illustrates which key is needed for three types of machine: `train`,`model_devi`  and `fp`. Each of them is a list of dicts. Each dict can be considered as an independent environmnet for calculation.
 
-Key   | `train`          | `model_devi`                                                    | `fp`                                                     |
+ Key   | `train`          | `model_devi`                                                    | `fp`                                                     |
 | :---------------- | :--------------------- | :-------------------------------------- | :-------------------------------------------------------------|
 | machine | NEED  | NEED | NEED
 | resources | NEED | NEED | NEED
@@ -770,7 +753,7 @@ Key   | `train`          | `model_devi`
 The following table gives explicit descriptions on keys in param.json.
 
 
-Key   | Type       | Example                                                  | Discription                                                     |
+ Key   | Type       | Example                                                  | Discription                                                     |
 | :---------------- | :--------------------- | :-------------------------------------- | :-------------------------------------------------------------|
 |deepmd_path | String |"......tf1120-lowprec" | Installed directory of DeepMD-Kit 0.x, which should contain `bin lib include`.
 | python_path | String | "....../python3.6/bin/python" | Python path for DeePMD-kit 1.x installed. This option should not be used with `deepmd_path` together.
@@ -801,9 +784,9 @@ mem_limit | Interger | 16 | Maximal memory permitted to apply for the job.
     - Size of `sel_a` and actual types of atoms in your system.
     - Index of `sys_configs` and `sys_idx`
 
-        2. Please verify the directories of `sys_configs`. If there isnt's any POSCAR for `01.model_devi` in one iteration, it may happen that you write the false path of `sys_configs`.
-        3. Correct format of JSON file.
-        4. In `02.fp`, total cores you require through `task_per_node` should be devided by `npar` times `kpar`.
-        5. The frames of one system should be larger than `batch_size` and `numb_test` in `default_training_param`. It happens that one iteration adds only a few structures and causes error in next iteration's training. In this condition, you may let `fp_task_min` be larger than `numb_test`.
+2. Please verify the directories of `sys_configs`. If there isnt's any POSCAR for `01.model_devi` in one iteration, it may happen that you write the false path of `sys_configs`.
+3. Correct format of JSON file.
+4. In `02.fp`, total cores you require through `task_per_node` should be devided by `npar` times `kpar`.
+5. The frames of one system should be larger than `batch_size` and `numb_test` in `default_training_param`. It happens that one iteration adds only a few structures and causes error in next iteration's training. In this condition, you may let `fp_task_min` be larger than `numb_test`.
 ## License
 The project dpgen is licensed under [GNU LGPLv3.0](./LICENSE).

From b5ecc63ac57af54c56cddeb9539bd1c725533a8f Mon Sep 17 00:00:00 2001
From: Yongbin Zhuang <38876805+robinzyb@users.noreply.github.com>
Date: Wed, 27 Nov 2019 17:02:38 +0100
Subject: [PATCH 044/109] Update README.md

---
 README.md | 50 +++++++++++++++++++++++++++++++++-----------------
 1 file changed, 33 insertions(+), 17 deletions(-)

diff --git a/README.md b/README.md
index 6896b9ddd..8206c1065 100644
--- a/README.md
+++ b/README.md
@@ -20,9 +20,6 @@
 
 ## About DP-GEN
 
-[![GitHub release](https://img.shields.io/github/release/deepmodeling/dpgen.svg?maxAge=86400)](https://github.com/deepmodeling/dpgen/releases/)
-[![arxiv:1910.12690](http://img.shields.io/badge/arXiv-1910.12690-B31B1B.svg?maxAge=86400)](https://arxiv.org/abs/1910.12690)
-
 DP-GEN (Deep Generator)  is a software written in Python, delicately designed to generate a deep learning based model of interatomic potential energy and force field. DP-GEN is depedent on DeepMD-kit (https://github.com/deepmodeling/deepmd-kit/blob/master/README.md). With highly scalable interface with common softwares for molecular simulation, DP-GEN is capable to  automatically prepare scripts and maintain job queues on HPC machines (High Performance Cluster) and analyze results
 ### Highlighted features
 + **Accurate and efficient**: DP-GEN is capable to sample more than tens of million structures and select only a few for first principles calculation. DP-GEN will finally obtain a uniformly accurate model.
@@ -511,20 +508,39 @@ The bold notation of key (such aas **type_map**) means that it's a necessary key
 |**fp_params["mixingweight"]** | Float| 0.05 | Proportion a of output Density Matrix to be used for the input Density Matrix of next SCF cycle (linear mixing).
 |**fp_params["NumberPulay"]** | Integer| 5 | Controls the Pulay convergence accelerator.
 | *fp_style == cp2k*
-| **fp_params** | Dict | | Parameters for cp2k calculation. find detail in manual.cp2k.org. if it is not remarked with "optional", the parameter must be set. we assume that you have basic knowledge for cp2k input.
-|**fp_params["cutoff"]**| String | 400 |
-|**fp_params["rel_cutoff"]**| String | 50 |
-|**fp_params["functional"]**| String | PBE |
-|**fp_params["max_scf"]**| String | 50 |
-|**fp_params["pair_potential_type"]**| String | DFTD3 | This is optional.
-|**fp_params["pair_potential_path"]**| String | "./cp2k_basis_pp_file/dftd3.dat" | must be set if you set the "pair_potential_type"
-|**fp_params["pair_ref_functional"]**| String | PBE | must be set if you set the "pair_potential_type"
-|**fp_params["basis_path"]**| String | "./cp2k_basis_pp_file/BASIS_MOLOPT" |
-|**fp_params["pp_path"]**| String | "./cp2k_basis_pp_file/GTH_POTENTIALS" |
-|**fp_params["element_list"]**| List | ["H","C","N"] |
-|**fp_params["basis_list"]**| List | ["DZVP_MOLOPT_GTH","DZVP_MOLOPT_GTH","DZVP_MOLOPT_GTH"] | Must be same order with element_list
-|**fp_params["pp_list"]**| List | ["GTH-PBE-q1","GTH-PBE-q4","GTH-PBE-q5"] | Must be same order with element_list
-
+| **fp_params** | Dict |  |Parameters for cp2k calculation. find detail in manual.cp2k.org. only the kind section must be set before use.  we assume that you have basic knowledge for cp2k input.
+
+
+#### Rules for cp2k input at dictionary form
+   Converting cp2k input is very simple as dictionary used to dpgen input. You just need follow some simple rule:
+- kind section parameter must be provide
+- replace `keyword` in cp2k as `keyword` in dict.
+- replace `keyword parameter` in cp2k as `value` in dict.
+- replace `section name` in cp2k as `keyword` in dict. . The corresponding value is a `dict`.
+- repalce `section parameter` in cp2k as `value` with dict. keyword `"_"`
+- `repeat section` in cp2k just need to be written once with repeat parameter as list. 
+Here are examples for setting:
+ ```python
+
+ #minimal information you should provide for input
+ #other we have set other parameters in code, if you want to
+ #use your own paramter, just write a corresponding dictionary
+ "user_fp_params":   {
+ "FORCE_EVAL":{
+ "DFT":{
+ "BASIS_SET_FILE_NAME": "path",
+ "POTENTIAL_FILE_NAME": "path"
+ }
+ "SUBSYS":{
+ "KIND":{
+ "_": ["N","C","H"],
+ "POTENTIAL": ["GTH-PBE-q5","GTH-PBE-q4", "GTH-PBE-q1"],
+ "BASIS_SET": ["DZVP-MOLOPT-GTH","DZVP-MOLOPT-GTH","DZVP-MOLOPT-GTH"]
+ }
+ }
+ }
+ }
+```
 
 
 

From 7e3415d08f9f73c73e117858971f9ea5f9fcb602 Mon Sep 17 00:00:00 2001
From: Yongbin Zhuang <38876805+robinzyb@users.noreply.github.com>
Date: Wed, 27 Nov 2019 17:07:45 +0100
Subject: [PATCH 045/109] Update README.md

---
 README.md | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/README.md b/README.md
index 8206c1065..31a72fbf6 100644
--- a/README.md
+++ b/README.md
@@ -20,6 +20,9 @@
 
 ## About DP-GEN
 
+[![GitHub release](https://img.shields.io/github/release/deepmodeling/dpgen.svg?maxAge=86400)](https://github.com/deepmodeling/dpgen/releases/)
+[![arxiv:1910.12690](http://img.shields.io/badge/arXiv-1910.12690-B31B1B.svg?maxAge=86400)](https://arxiv.org/abs/1910.12690)
+
 DP-GEN (Deep Generator)  is a software written in Python, delicately designed to generate a deep learning based model of interatomic potential energy and force field. DP-GEN is depedent on DeepMD-kit (https://github.com/deepmodeling/deepmd-kit/blob/master/README.md). With highly scalable interface with common softwares for molecular simulation, DP-GEN is capable to  automatically prepare scripts and maintain job queues on HPC machines (High Performance Cluster) and analyze results
 ### Highlighted features
 + **Accurate and efficient**: DP-GEN is capable to sample more than tens of million structures and select only a few for first principles calculation. DP-GEN will finally obtain a uniformly accurate model.

From ae360ce197dcdf36a79e637eb6ab9a1f828011ba Mon Sep 17 00:00:00 2001
From: BaozCWJ <baoz@pku.edu.cn>
Date: Thu, 28 Nov 2019 10:19:42 +0800
Subject: [PATCH 046/109] fix the bug when not using the vasp_params

---
 dpgen/auto_test/cmpt_02_elastic.py      |  2 +-
 dpgen/auto_test/cmpt_04_interstitial.py |  2 +-
 dpgen/auto_test/gen_03_vacancy.py       | 38 ++++++-----
 dpgen/auto_test/gen_04_interstitial.py  | 83 ++++++++++++++-----------
 dpgen/auto_test/gen_05_surf.py          |  3 +-
 5 files changed, 69 insertions(+), 59 deletions(-)

diff --git a/dpgen/auto_test/cmpt_02_elastic.py b/dpgen/auto_test/cmpt_02_elastic.py
index fc3480110..b1a0c9229 100755
--- a/dpgen/auto_test/cmpt_02_elastic.py
+++ b/dpgen/auto_test/cmpt_02_elastic.py
@@ -104,7 +104,7 @@ def cmpt_deepmd_lammps(jdata, conf_dir, task_name) :
     # et = -et / 1e4
     print_et(et)
     result = os.path.join(task_path,'result')
-    result_et(et,conf_dir,task_path)
+    result_et(et,conf_dir,result)
     if 'upload_username' in jdata.keys() and task_name=='deepmd':
         upload_username=jdata['upload_username']
         util.insert_data('elastic','deepmd',upload_username,result)
diff --git a/dpgen/auto_test/cmpt_04_interstitial.py b/dpgen/auto_test/cmpt_04_interstitial.py
index 6b029909c..e85a3d4ff 100755
--- a/dpgen/auto_test/cmpt_04_interstitial.py
+++ b/dpgen/auto_test/cmpt_04_interstitial.py
@@ -69,7 +69,7 @@ def _cmpt_deepmd_reprod_traj(jdata, conf_dir, supercell, insert_ele, task_name)
     conf_path = os.path.abspath(conf_dir)
     task_path = re.sub('confs', global_task_name, conf_path)
     vasp_path = os.path.join(task_path, vasp_str)
-    lmps_path = os.path.join(task_path, task_name + vasp_str.replace('vasp',''))
+    lmps_path = os.path.join(task_path, task_name + vasp_str.replace('vasp','-reprod'))
     copy_str = "%sx%sx%s" % (supercell[0], supercell[1], supercell[2])
     struct_widecard = os.path.join(vasp_path, 'struct-%s-%s-*' % (insert_ele,copy_str))
     vasp_struct = glob.glob(struct_widecard)
diff --git a/dpgen/auto_test/gen_03_vacancy.py b/dpgen/auto_test/gen_03_vacancy.py
index 56bb10b51..93b061ea9 100755
--- a/dpgen/auto_test/gen_03_vacancy.py
+++ b/dpgen/auto_test/gen_03_vacancy.py
@@ -73,7 +73,7 @@ def make_vasp(jdata, conf_dir, supercell = [1,1,1]) :
         for fname in potcar_list:
             with open(fname) as infile:
                 outfile.write(infile.read())
-    # gen tasks    
+    # gen tasks
     copy_str = "%sx%sx%s" % (supercell[0], supercell[1], supercell[2])
     cwd = os.getcwd()
     for ii in range(len(dss)) :
@@ -94,11 +94,10 @@ def make_vasp(jdata, conf_dir, supercell = [1,1,1]) :
     os.chdir(cwd)
 
 def make_lammps(jdata, conf_dir, task_type, supercell) :
-    fp_params = jdata['vasp_params']
-    kspacing = fp_params['kspacing']
+
     fp_params = jdata['lammps_params']
     model_dir = fp_params['model_dir']
-    type_map = fp_params['type_map'] 
+    type_map = fp_params['type_map']
     model_dir = os.path.abspath(model_dir)
     model_name =fp_params['model_name']
     if not model_name and task_type =='deepmd':
@@ -110,7 +109,7 @@ def make_lammps(jdata, conf_dir, task_type, supercell) :
 
     model_param = {'model_name' :      fp_params['model_name'],
                   'param_type':          fp_params['model_param_type']}
-    
+
     ntypes = len(type_map)
 
     conf_path = os.path.abspath(conf_dir)
@@ -120,7 +119,8 @@ def make_lammps(jdata, conf_dir, task_type, supercell) :
     if 'relax_incar' in jdata.keys():
         vasp_str='vasp-relax_incar'
     else:
-        vasp_str='vasp-k%.2f' % kspacing 
+        kspacing = jdata['vasp_params']['kspacing']
+        vasp_str='vasp-k%.2f' % kspacing
     equi_path = os.path.join(equi_path, vasp_str)
     equi_contcar = os.path.join(equi_path, 'CONTCAR')
     assert os.path.exists(equi_contcar),"Please compute the equilibrium state using vasp first"
@@ -146,28 +146,28 @@ def make_lammps(jdata, conf_dir, task_type, supercell) :
     dss = []
     for jj in vds :
         dss.append(jj.generate_defect_structure(supercell))
-    # gen tasks    
+    # gen tasks
     cwd = os.getcwd()
     # make lammps.in, relax at 0 bar (scale = 1)
     if task_type=='deepmd':
-        fc = lammps.make_lammps_press_relax('conf.lmp', 
-                                        ntypes, 
-                                        1, 
+        fc = lammps.make_lammps_press_relax('conf.lmp',
+                                        ntypes,
+                                        1,
                                         lammps.inter_deepmd,
                                         model_name)
     elif task_type =='meam':
-        fc = lammps.make_lammps_press_relax('conf.lmp', 
-                                        ntypes, 
-                                        1, 
+        fc = lammps.make_lammps_press_relax('conf.lmp',
+                                        ntypes,
+                                        1,
                                         lammps.inter_meam,
                                         model_param)
     f_lammps_in = os.path.join(task_path, 'lammps.in')
     with open(f_lammps_in, 'w') as fp :
         fp.write(fc)
-    # gen tasks    
+    # gen tasks
     copy_str = "%sx%sx%s" % (supercell[0], supercell[1], supercell[2])
     cwd = os.getcwd()
-    
+
     os.chdir(task_path)
     for ii in model_name :
         if os.path.exists(ii) :
@@ -188,7 +188,7 @@ def make_lammps(jdata, conf_dir, task_type, supercell) :
         dss[ii].to('POSCAR', 'POSCAR')
         lammps.cvt_lammps_conf('POSCAR', 'conf.lmp')
         ptypes = vasp.get_poscar_types('POSCAR')
-        lammps.apply_type_map('conf.lmp', type_map, ptypes)    
+        lammps.apply_type_map('conf.lmp', type_map, ptypes)
         # link lammps.in
         os.symlink(os.path.relpath(f_lammps_in), 'lammps.in')
         # link models
@@ -197,7 +197,7 @@ def make_lammps(jdata, conf_dir, task_type, supercell) :
         # save supercell
         np.savetxt('supercell.out', supercell, fmt='%d')
     os.chdir(cwd)
-    
+
 def _main() :
     parser = argparse.ArgumentParser(
         description="gen 03.vacancy")
@@ -221,8 +221,6 @@ def _main() :
         make_lammps(jdata, args.CONF, args.TASK, args.COPY)
     else :
         raise RuntimeError("unknow task ", args.TASK)
-    
+
 if __name__ == '__main__' :
     _main()
-
-    
diff --git a/dpgen/auto_test/gen_04_interstitial.py b/dpgen/auto_test/gen_04_interstitial.py
index f423cbfb6..84f56070c 100755
--- a/dpgen/auto_test/gen_04_interstitial.py
+++ b/dpgen/auto_test/gen_04_interstitial.py
@@ -23,7 +23,7 @@ def _make_vasp(jdata, conf_dir, supercell, insert_ele) :
     # get equi poscar
     if 'relax_incar' in jdata.keys():
         vasp_str='vasp-relax_incar'
-    else: 
+    else:
         kspacing = jdata['vasp_params']['kspacing']
         vasp_str='vasp-k%.2f' % (kspacing)
     equi_path = re.sub('confs', global_equi_name, conf_path)
@@ -65,7 +65,7 @@ def _make_vasp(jdata, conf_dir, supercell, insert_ele) :
         fc = vasp.make_vasp_relax_incar(ecut, ediff, True, True, True, npar=npar,kpar=kpar, kspacing = kspacing, kgamma = kgamma)
     with open(os.path.join(task_path, 'INCAR'), 'w') as fp :
         fp.write(fc)
-    # gen tasks    
+    # gen tasks
     copy_str = "%sx%sx%s" % (supercell[0], supercell[1], supercell[2])
     cwd = os.getcwd()
     for ii in range(len(dss)) :
@@ -82,7 +82,7 @@ def _make_vasp(jdata, conf_dir, supercell, insert_ele) :
         with open('POSCAR','r') as fp :
             lines = fp.read().split('\n')
             ele_list = lines[5].split()
-                  
+
         os.chdir(cwd)
         potcar_map = jdata['potcar_map']
         potcar_list = []
@@ -90,12 +90,12 @@ def _make_vasp(jdata, conf_dir, supercell, insert_ele) :
             assert os.path.exists(os.path.abspath(potcar_map[ii])),"No POTCAR in the potcar_map of %s"%(ii)
             potcar_list.append(os.path.abspath(potcar_map[ii]))
         os.chdir(struct_path)
-                  
+
         with open('POTCAR', 'w') as outfile:
             for fname in potcar_list:
                 with open(fname) as infile:
                     outfile.write(infile.read())
-        
+
         # link incar
         os.symlink(os.path.relpath(os.path.join(task_path, 'INCAR')), 'INCAR')
         # save supercell
@@ -103,15 +103,15 @@ def _make_vasp(jdata, conf_dir, supercell, insert_ele) :
     os.chdir(cwd)
 
 
-def make_reprod_traj(jdata, conf_dir, supercell, insert_ele, task_type) : 
+def make_reprod_traj(jdata, conf_dir, supercell, insert_ele, task_type) :
     for ii in insert_ele :
         _make_reprod_traj(jdata, conf_dir, supercell, ii, task_type)
 
-def _make_reprod_traj(jdata, conf_dir, supercell, insert_ele, task_type) : 
-    kspacing = jdata['vasp_params']['kspacing']
+def _make_reprod_traj(jdata, conf_dir, supercell, insert_ele, task_type) :
+
     fp_params = jdata['lammps_params']
     model_dir = fp_params['model_dir']
-    type_map = fp_params['type_map'] 
+    type_map = fp_params['type_map']
     model_dir = os.path.abspath(model_dir)
     model_name =fp_params['model_name']
     if not model_name and task_type=='deepmd':
@@ -128,24 +128,33 @@ def _make_reprod_traj(jdata, conf_dir, supercell, insert_ele, task_type) :
 
     conf_path = os.path.abspath(conf_dir)
     task_path = re.sub('confs', global_task_name, conf_path)
-    vasp_path = os.path.join(task_path, 'vasp-k%.2f' % kspacing)
-    lmps_path = os.path.join(task_path, task_type + '-reprod-k%.2f' % kspacing)    
+    if 'relax_incar' in jdata.keys():
+        vasp_str='vasp-relax_incar'
+        lmps_str= task_type + '-reprod-relax_incar'
+    else:
+        kspacing = jdata['vasp_params']['kspacing']
+        vasp_str = 'vasp-k%.2f' % (kspacing)
+        lmps_str = task_type + '-reprod-k%.2f' % (kspacing)
+
+    vasp_path = os.path.join(task_path, vasp_str)
+    lmps_path = os.path.join(task_path, lmps_str)
+
     os.makedirs(lmps_path, exist_ok = True)
     copy_str = "%sx%sx%s" % (supercell[0], supercell[1], supercell[2])
     struct_widecard = os.path.join(vasp_path, 'struct-%s-%s-*' % (insert_ele,copy_str))
     vasp_struct = glob.glob(struct_widecard)
     vasp_struct.sort()
     cwd=os.getcwd()
-    
+
     # make lammps.in
     if task_type =='deepmd':
-        fc = lammps.make_lammps_eval('conf.lmp', 
-                                 ntypes, 
+        fc = lammps.make_lammps_eval('conf.lmp',
+                                 ntypes,
                                  lammps.inter_deepmd,
                                  model_name)
     elif task_type =='meam':
-        fc = lammps.make_lammps_eval('conf.lmp', 
-                                 ntypes, 
+        fc = lammps.make_lammps_eval('conf.lmp',
+                                 ntypes,
                                  lammps.inter_meam,
                                  model_param)
     f_lammps_in = os.path.join(lmps_path, 'lammps.in')
@@ -188,7 +197,7 @@ def _make_reprod_traj(jdata, conf_dir, supercell, insert_ele, task_type) :
         for (ii,jj) in zip(models, model_name) :
             os.symlink(os.path.relpath(ii), jj)
         share_models = [os.path.join(ls,ii) for ii in model_name]
-        
+
         # loop over frames
         for ii in range(xdat_nframes) :
             frame_path = 'frame.%06d' % ii
@@ -197,10 +206,10 @@ def _make_reprod_traj(jdata, conf_dir, supercell, insert_ele, task_type) :
             # clear dir
             for jj in ['conf.lmp'] :
                 if os.path.isfile(jj):
-                    os.remove(jj)            
+                    os.remove(jj)
             for jj in ['lammps.in'] + model_name :
                 if os.path.islink(jj):
-                    os.unlink(jj)            
+                    os.unlink(jj)
             # link lammps in
             os.symlink(os.path.relpath('../lammps.in'), 'lammps.in')
             # make conf
@@ -222,11 +231,10 @@ def make_lammps(jdata, conf_dir, supercell, insert_ele, task_type) :
         _make_lammps(jdata, conf_dir, supercell, ii, task_type)
 
 def _make_lammps(jdata, conf_dir, supercell, insert_ele, task_type) :
-    fp_params = jdata['vasp_params']
-    kspacing = fp_params['kspacing']
+
     fp_params = jdata['lammps_params']
     model_dir = fp_params['model_dir']
-    type_map = fp_params['type_map'] 
+    type_map = fp_params['type_map']
     model_dir = os.path.abspath(model_dir)
     model_name =fp_params['model_name']
     if not model_name and task_type=='deepmd':
@@ -245,7 +253,12 @@ def _make_lammps(jdata, conf_dir, supercell, insert_ele, task_type) :
     conf_poscar = os.path.join(conf_path, 'POSCAR')
     # get equi poscar
     equi_path = re.sub('confs', global_equi_name, conf_path)
-    equi_path = os.path.join(equi_path, 'vasp-k%.2f' % kspacing)
+    if 'relax_incar' in jdata.keys():
+        vasp_str='vasp-relax_incar'
+    else:
+        kspacing = jdata['vasp_params']['kspacing']
+        vasp_str='vasp-k%.2f' % (kspacing)
+    equi_path = os.path.join(equi_path, vasp_str)
     equi_contcar = os.path.join(equi_path, 'CONTCAR')
     #equi_path = os.path.join(equi_path, task_type)
     #equi_dump = os.path.join(equi_path, 'dump.relax')
@@ -269,25 +282,25 @@ def _make_lammps(jdata, conf_dir, supercell, insert_ele, task_type) :
     dss = []
     for jj in vds :
         dss.append(jj.generate_defect_structure(supercell))
-    # gen tasks    
+    # gen tasks
     cwd = os.getcwd()
     # make lammps.in, relax at 0 bar (scale = 1)
     if task_type=='deepmd':
-        fc = lammps.make_lammps_press_relax('conf.lmp', 
-                                        ntypes, 
+        fc = lammps.make_lammps_press_relax('conf.lmp',
+                                        ntypes,
                                         1,
                                         lammps.inter_deepmd,
                                         model_name)
     elif task_type =='meam':
-        fc = lammps.make_lammps_press_relax('conf.lmp', 
-                                        ntypes, 
-                                        1, 
+        fc = lammps.make_lammps_press_relax('conf.lmp',
+                                        ntypes,
+                                        1,
                                         lammps.inter_meam,
                                         model_param)
     f_lammps_in = os.path.join(task_path, 'lammps.in')
     with open(f_lammps_in, 'w') as fp :
         fp.write(fc)
-    # gen tasks    
+    # gen tasks
     copy_str = "%sx%sx%s" % (supercell[0], supercell[1], supercell[2])
     cwd = os.getcwd()
 
@@ -298,7 +311,7 @@ def _make_lammps(jdata, conf_dir, supercell, insert_ele, task_type) :
     for (ii,jj) in zip(models, model_name) :
         os.symlink(os.path.relpath(ii), jj)
     share_models = [os.path.join(task_path,ii) for ii in model_name]
-    
+
     for ii in range(len(dss)) :
         struct_path = os.path.join(task_path, 'struct-%s-%s-%03d' % (insert_ele,copy_str,ii))
         print('# generate %s' % (struct_path))
@@ -311,7 +324,7 @@ def _make_lammps(jdata, conf_dir, supercell, insert_ele, task_type) :
         dss[ii].to('POSCAR', 'POSCAR')
         lammps.cvt_lammps_conf('POSCAR', 'conf.lmp')
         ptypes = vasp.get_poscar_types('POSCAR')
-        lammps.apply_type_map('conf.lmp', type_map, ptypes)    
+        lammps.apply_type_map('conf.lmp', type_map, ptypes)
         # link lammps.in
         os.symlink(os.path.relpath(f_lammps_in), 'lammps.in')
         # link models
@@ -321,7 +334,7 @@ def _make_lammps(jdata, conf_dir, supercell, insert_ele, task_type) :
         np.savetxt('supercell.out', supercell, fmt='%d')
     os.chdir(cwd)
 
-    
+
 def _main() :
     parser = argparse.ArgumentParser(
         description="gen 04.interstitial")
@@ -350,8 +363,6 @@ def _main() :
         make_reprod_traj(jdata, args.CONF, args.COPY, args.ELEMENT, args.TASK)
     else :
         raise RuntimeError("unknow task ", args.TASK)
-    
+
 if __name__ == '__main__' :
     _main()
-
-    
diff --git a/dpgen/auto_test/gen_05_surf.py b/dpgen/auto_test/gen_05_surf.py
index a11303427..56a275630 100755
--- a/dpgen/auto_test/gen_05_surf.py
+++ b/dpgen/auto_test/gen_05_surf.py
@@ -125,7 +125,7 @@ def make_vasp(jdata, conf_dir, max_miller = 2, relax_box = False, static = False
     cwd = os.getcwd()
 
 def make_lammps(jdata, conf_dir, max_miller = 2, static = False, relax_box = False, task_type = 'wrong-task') :
-    kspacing = jdata['vasp_params']['kspacing']
+
     fp_params = jdata['lammps_params']
     model_dir = fp_params['model_dir']
     type_map = fp_params['type_map']
@@ -152,6 +152,7 @@ def make_lammps(jdata, conf_dir, max_miller = 2, static = False, relax_box = Fal
     if 'relax_incar' in jdata.keys():
         vasp_str='vasp-relax_incar'
     else:
+        kspacing = jdata['vasp_params']['kspacing']
         vasp_str='vasp-k%.2f' % (kspacing)
 
     equi_path = re.sub('confs', global_equi_name, conf_dir)

From 5c2265ff932c1cf3de1f0b7ca1bf15878a8fa818 Mon Sep 17 00:00:00 2001
From: BaozCWJ <baoz@pku.edu.cn>
Date: Thu, 28 Nov 2019 14:26:44 +0800
Subject: [PATCH 047/109] fix the bug when use the vasp_params from incar

---
 dpgen/auto_test/cmpt_04_interstitial.py                         | 2 +-
 dpgen/auto_test/gen_04_interstitial.py                          | 1 +
 .../test/{vasp_poscar_param.json => vasp_param_from_incar.json} | 2 +-
 3 files changed, 3 insertions(+), 2 deletions(-)
 rename examples/test/{vasp_poscar_param.json => vasp_param_from_incar.json} (99%)

diff --git a/dpgen/auto_test/cmpt_04_interstitial.py b/dpgen/auto_test/cmpt_04_interstitial.py
index e85a3d4ff..6b029909c 100755
--- a/dpgen/auto_test/cmpt_04_interstitial.py
+++ b/dpgen/auto_test/cmpt_04_interstitial.py
@@ -69,7 +69,7 @@ def _cmpt_deepmd_reprod_traj(jdata, conf_dir, supercell, insert_ele, task_name)
     conf_path = os.path.abspath(conf_dir)
     task_path = re.sub('confs', global_task_name, conf_path)
     vasp_path = os.path.join(task_path, vasp_str)
-    lmps_path = os.path.join(task_path, task_name + vasp_str.replace('vasp','-reprod'))
+    lmps_path = os.path.join(task_path, task_name + vasp_str.replace('vasp',''))
     copy_str = "%sx%sx%s" % (supercell[0], supercell[1], supercell[2])
     struct_widecard = os.path.join(vasp_path, 'struct-%s-%s-*' % (insert_ele,copy_str))
     vasp_struct = glob.glob(struct_widecard)
diff --git a/dpgen/auto_test/gen_04_interstitial.py b/dpgen/auto_test/gen_04_interstitial.py
index 84f56070c..376be45c6 100755
--- a/dpgen/auto_test/gen_04_interstitial.py
+++ b/dpgen/auto_test/gen_04_interstitial.py
@@ -143,6 +143,7 @@ def _make_reprod_traj(jdata, conf_dir, supercell, insert_ele, task_type) :
     copy_str = "%sx%sx%s" % (supercell[0], supercell[1], supercell[2])
     struct_widecard = os.path.join(vasp_path, 'struct-%s-%s-*' % (insert_ele,copy_str))
     vasp_struct = glob.glob(struct_widecard)
+    assert len(vasp_struct)>0 ,"Please compute the interstitial defect using vasp first"
     vasp_struct.sort()
     cwd=os.getcwd()
 
diff --git a/examples/test/vasp_poscar_param.json b/examples/test/vasp_param_from_incar.json
similarity index 99%
rename from examples/test/vasp_poscar_param.json
rename to examples/test/vasp_param_from_incar.json
index 7165e1dd1..d89230d65 100644
--- a/examples/test/vasp_poscar_param.json
+++ b/examples/test/vasp_param_from_incar.json
@@ -7,7 +7,7 @@
     "key_id": "key id of Material project",
     "task_type": "vasp",
     "task": "all",
-    
+
     "relax_incar": "somewhere/relax_incar",
     "scf_incar": "somewhere/scf_incar",
 

From 71ed759ab059a334143c065ff051852f1431968f Mon Sep 17 00:00:00 2001
From: Yuan Fengbo <yuanfengbo888@pku.edu.cn>
Date: Sat, 30 Nov 2019 13:52:35 +0800
Subject: [PATCH 048/109] update doc for auto_test

---
 README.md | 653 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 653 insertions(+)

diff --git a/README.md b/README.md
index ee2ede818..bcd0aeaa6 100644
--- a/README.md
+++ b/README.md
@@ -646,6 +646,659 @@ This task uses the stress-strain relationship to calculate the elastic constant.
 + `static-opt`:(boolean) whether to use atomic relaxation to compute surface energy. if false, the structure will be relaxed.
 + `relax_box`:(boolean) set true if the box is relaxed, otherwise only relax atom positions.
 
+## Test: The content of the auto_test
+The atom configuration file to be testes.
+### param.json
+|Key  | Type  | Example | Discription  |
+| :---------------- | :--------------------- | :-------------------------------------- | :-------------------------------------------------------------|
+| potcar_map | dict | {"Al": "example/POTCAR"} |a dict like { "element" : "position of POTCAR"  } |
+|  conf_dir | path_like | confs/Al/std-fcc | the dir which contains vasp's POSCAR  |
+
+
+### 00.equi
+
+equi will test the the equilibrium state and get the following results.
+#### 1.test results
+
+Field  | Type          | Example                                                      | Discription                                                      |
+| :---------------- | :--------------------- | :-------------------------------------- | :-------------------------------------------------------------|
+| EpA(eV) | real number | -3.7468 | the potential energy of a atom|
+| VpA(A^3)| real number | 16.511| theEquilibrium volume of a atom  |
+
+#### 2.param.json
+vasp
+|Key  | Type  | Example | Discription  |
+| :---------------- | :--------------------- | :-------------------------------------- | :-------------------------------------------------------------|
+| ecut | real number | 650  | the plane wave cutoff for grid.  |
+| ediff | real number | 1e-6 |Tolerance of Density Matrix |
+| kspacing | real number | 0.1 | Sample factor in Brillouin zones |
+| kgamma | boolen | false | whether generate a Gamma centered grid |
+| npar | positive integer | 1 | the number of k-points that are to be treated in parallel  |
+| kpar | positive integer | 1 | the number of bands that are treated in parallel |
+
+lammps
+|key  | Type  | Example| Discription|
+| :---------------- | :--------------------- | :-------------------------------------- | :-------------------------------------------------------------|
+| store_stable | boolean | true |whether to store the stable energy and volume|
+
+
+#### 3.atom configuration
+The atom configuration is specified by param['conf_dir']/POSCAR
+
+The box is periodic, so that particles interact across the boundary, and they can exit one end of the box and re-enter the other end
+ 
+Here are the configuration file used by lammps and vasp , note that the following  2 configuration are **equal**.
+ 
+ lammps atom configuration example
+```
+# Al/std-fcc/conf.lmp
+
+1 atoms
+1 atom types
+   0.0000000000    2.8637824638 xlo xhi
+   0.0000000000    2.4801083646 ylo yhi
+   0.0000000000    2.3382685902 zlo zhi
+   1.4318912319    1.4318912319    0.8267027882 xy xz yz
+
+Atoms # atomic
+
+1 1 0.0000000000 0.0000000000 0.0000000000
+```
+vasp Al/std-fcc/POSCAR
+```
+Al1
+1.0
+0.000000 2.025000 2.025000
+2.025000 0.000000 2.025000
+2.025000 2.025000 0.000000
+Al
+1
+direct
+0.000000 0.000000 0.000000 Al
+```
+
+vasp atom configuration example
+
+
+
+#### 4.lammps
+##### 4.1 input file
+lammps perform  energy minimizations of the system,and use the results of energy minimizations as  the test result.
+
+```
+# lammps.in
+dimension       3
+boundary        p       p    p
+atom_style      atomic
+box         tilt large
+read_data   conf.lmp
+
+......
+
+min_style       cg
+fix             1 all box/relax iso 0.0  # 
+minimize        1.000000e-12 1.000000e-06 5000 500000
+fix             1 all box/relax aniso 0.0
+minimize        1.000000e-12 1.000000e-06 5000 500000
+......
+```
+`minimize` perform an energy minimization of the system, by iteratively adjusting atom coordinates. Iterations are terminated when one of the stopping criteria is satisfied.
+
+`fix`  Apply an external pressure or stress tensor to the simulation box during an energy minimization. This allows the box size and shape to vary during the iterations of the minimizer so that the final configuration will be both an energy minimum for the potential energy of the atoms, and the system pressure tensor will be close to the specified external tensor.
+
+`iso/aniso`  The keyword iso means couple all 3 diagonal components together when pressure is computed (hydrostatic pressure), and dilate/contract the dimensions together.
+The keyword aniso means x, y, and z dimensions are controlled independently using the Pxx, Pyy, and Pzz components of the stress tensor as the driving forces
+
+
+#### 4.2 output file
+after  energy minimization, 
+dpgen will use the PotEng of the last step  -3.7467628 as `EpA(eV)`
+dpgen will use the Volume of the last step 16.510567as `VpA(A^3)`
+```
+Step PotEng Pxx Pyy Pzz Pxy Pxz Pyz Lx Ly Lz Volume c_mype
+       0   -3.7465689   -5490.0435     -5489.99   -5489.9875 8.0059506e-05 -0.00015863904 0.00072809883    2.8637825    2.4801084    2.3382686    16.607531   -3.7465689
+      21   -3.7467629   -829.55143   -829.56516   -829.55757 -1.7925939e-05 -9.5713646e-05 0.0038858896    2.8581981    2.4752722     2.333709    16.510567   -3.7467629
+      22   -3.7467628   -829.56697   -829.63547   -829.55631 0.00051858009 1.8511104e-07 -0.0017081319    2.8581981    2.4752722     2.333709    16.510567   -3.7467628
+```
+
+#### 5 vasp
+##### 5.1 INCAR
+some field of the INCAR will be changed according to 
+```
+PREC=A
+ENCUT=650 # will use param.json's
+# ISYM=0
+ALGO=fast
+EDIFF=1.000000e-06  # will use param.json's
+EDIFFG=-0.01
+LREAL=A
+NPAR=1 # will use param.json's
+KPAR=1 # will use param.json's
+
+ISMEAR=1
+SIGMA=0.220000
+
+ISTART=0
+ICHARG=2
+NELM=100
+NELMIN=6
+ISIF=6
+IBRION=2
+
+NSW=50
+
+LWAVE=F
+LCHARG=F
+PSTRESS=0
+
+KSPACING=0.100000 # will use param.json's
+KGAMMA=F # will use param.json's
+```
+
+### 01.eos
+eos will calculate the equation of state and get the following results.
+
+Auto_test 01.eos will calculate the potential energy of single atom in a range of volumes. 
+
+You may then use the test results to draw the  equation of state curve.
+
+#### 1.test results
+
+Field  | Type          | Example                                                      | Discription                                                      |
+| :---------------- | :--------------------- | :-------------------------------------- | :-------------------------------------------------------------|
+| EpA(eV) | list of real number | [15.5,16.0,16.5,17.0] | the potential energy of a atom in  quilibrium state|
+| VpA(A^3)| list of real number |[-3.7306, -3.7429, -3.746762, -3.7430] | the equilibrium volume of a atom  |
+
+#### 2.param.json
+
+vol_start, vol_end and vol_step determine the volumetric range and accuracy of the eos.
+|Key  | Type  | Example | Discription  |
+| :---------------- | :--------------------- | :-------------------------------------- | :-------------------------------------------------------------|
+| vol_start |real number | 12 | the start volume  |
+| vol_end | real number | 22 | the end volume  |
+| vol_step | real number | 0.5 | the intervel to change volume |
+
+The param.json above will become a list below.
+each volume in volume_list will become the target 
+
+```python
+volume_list = list(range(vol_start,vol_end,vol_step))
+```
+
+#### 3.atom configuration
+
+The initial atom configuration  will be the equilibrium state atom configuration(configuration of the last step of 00.equi)
+
+The box will contain only one atom.
+
+The box is periodic, so that particles interact across the boundary, and they can exit one end of the box and re-enter the other end.
+
+
+And the box and atom position will deform proportionally in x,y,z direction to reach the target volume.
+
+For, examle, the file  below will generate a box with single atom.
+the box volume is 2.857588\*2.474744\*2.333211==16.50 A^(3) 
+```
+# vol-16.50/conf.lmp
+
+1 atoms
+1 atom types
+   0.0000000000    2.8575880000 xlo xhi
+   0.0000000000    2.4747440000 ylo yhi
+   0.0000000000    2.3332110000 zlo zhi
+   1.4287940000    1.4287940000    0.8249150000 xy xz yz
+
+Atoms # atomic
+
+1 1 2.8547961365 3.2972419997 2.3309314529
+```
+the images below are atom configuration of volume 12.0, 16.5, 21.5
+![](https://i.imgur.com/R1mXWrM.png)![](https://i.imgur.com/IsQZW8n.png)![](https://i.imgur.com/wTl375d.png)
+
+
+
+
+#### 4.lammps
+Lammps will create a serials of folders and each of the folder will contain different a conf.lmp file.
+
+All of the conf.lmp will contain only one atom, the box size of these conf.lmp will be different
+
+##### 4.1 input file
+
+lammps perform an energy minimizations of the system,and use the results of energy minimizations as  the test result.
+
+notice that lammps will not fix all box/relax 0.0, that means lammps  will **not** try to keep the stress of box to 0.0 (this is different from 00.equi)
+```
+# vol-16.50 lammps.in
+
+units   metal
+dimension       3
+boundary        p       p    p
+atom_style      atomic
+box         tilt large
+read_data   conf.lmp
+
+......
+
+min_style       cg
+minimize        1.000000e-12 1.000000e-06 5000 500000
+......
+```
+
+##### 4.2 output file
+
+
+After the energy minimization(in this example, the energy minimization had run only one step before it stopped).
+
+dpgen will use the PotEng of the last step  -3.7467628 as `EpA(eV)` where  `VpA(A^3)==16.50`
+
+```
+# vol-16.50/log.lammps
+
+Step PotEng Pxx Pyy Pzz Pxy Pxz Pyz Lx Ly Lz Volume c_mype
+       0   -3.7467629   -315.56544   -315.60474   -315.65689 -0.00081929052 -0.0015852434 -0.038537753     2.857588     2.474744     2.333211    16.499999   -3.7467629
+       1   -3.7467629   -315.56544   -315.60474   -315.65689 -0.00081929052 -0.0015852434 -0.038537753     2.857588     2.474744     2.333211    16.499999   -3.746762
+```
+#### 5.VASP
+VASP will calculate the potential energy of the atom configuration. The VASP INCAR is the same as 00.equi's.
+
+
+### 02.elasitc
+
+Calculate the elastic module, bulk modulus, shear modulus, Youngs Modulus, Poission Ratio.
+
+#### 1.test results
+
+Field  | Type          | Example                                                      | Discription                                                      |
+| :---------------- | :--------------------- | :-------------------------------------- | :-------------------------------------------------------------|
+| elastic module(GPa)| 6*6 matrix of real number| [[130.50   57.45   54.45    4.24    0.00    0.00] [57.61  130.31   54.45   -4.29   -0.00   -0.00]  [54.48   54.48  133.32   -0.00   -0.00   -0.00]   [4.49   -4.02   -0.89   33.78    0.00   -0.00]  [-0.00   -0.00   -0.00   -0.00   33.77    4.29] [0.00   -0.00   -0.00   -0.00    4.62   36.86]]| Voigt-notation elastic module;sequence of row and column is (xx, yy, zz, yz, zx, xy)|
+| bulk modulus(GPa) | real number | 80.78 | bulk modulus |
+| shear modulus(GPa) | real number | 36.07 | shear modulus |
+| Youngs Modulus(GPa) | real number | 94.19 | Youngs Modulus|
+| Poission Ratio | real number | 0.31 | Poission Ratio  |
+
+#### 2.param.json
+
+norm_deform and shear_deform are the scales of material deformation. This task uses the stress-strain relationship to calculate the elastic constant.
+|Key  | Type  | Example | Discription  |
+| :---------------- | :--------------------- | :-------------------------------------- | :-------------------------------------------------------------|
+| norm_deform | real number | 0.02  | uniaxial deformation range  |
+| shear_deform | real number | 0.05| shear deformation range  |
+
+
+#### 3.atom configuration
+##### 3.1 uniaxial deformation
+norm_deform=0.02 will become a list ,and then this list will become a list of 3*3 matrix,which will change the configuration of the simulation box. 
+```python
+norm_deform=0.02
+norm_strains = [-norm_def, -0.5*norm_deform, 0.5*norm_deform, norm_def]
+
+# the result of the command above 
+norm_strains = [-0.02, -0.01, 0.01, 0.02]
+
+```
+#For X-axis uniaxial deformation, the list above will become the matrix list below.
+$norm\_x\_matrix\_list=
+[\begin{pmatrix}
+0.98 & 0 & 0\\
+0 & 1 & 0\\
+0 & 0 & 1
+\end{pmatrix} , \begin{pmatrix}
+0.99 & 0 & 0\\
+0 & 1 & 0\\
+0 & 0 & 1
+\end{pmatrix}, 
+\begin{pmatrix}
+1.01 & 0 & 0\\
+0 & 1 & 0\\
+0 & 0 & 1
+\end{pmatrix}, \begin{pmatrix}
+1.02 & 0 & 0\\
+0 & 1 & 0\\
+0 & 0 & 1
+\end{pmatrix}]$
+
+
+
+The initial atom configuration  will be the equilibrium state atom configuration(configuration of the last step of 00.equi)
+
+for each matrix in norm_matrix_list, the atom position and box size of the initial atom configuration will deform accordingly.
+
+For example, the initial box size will be a Lower triangular matrix
+$box\_size=\begin{pmatrix}
+xx &   &  \\
+xy & yy &  \\
+xz & yz & zz
+\end{pmatrix} =\begin{pmatrix}
+2.858198 & 0.0 & 0.0\\
+1.429099 & 2.475272 & 0\\
+1.429099 & 0.825091 & 2.333709
+\end{pmatrix}$
+
+The atom position in the box is 
+(note that the atom may be out of the simulation box, but the box is periodic, the atom will be move into the box automatically)
+$atom\_position = \begin{pmatrix}
+x & y & z
+\end{pmatrix} = \begin{pmatrix}
+2.855406 & 3.297945  & 2.331429 
+\end{pmatrix}$ 
+
+The box_size and atom_position will according to the matrix in norm_matrix_list.
+for example,
+
+$box\_size\_x\_axis\_0.98=\begin{pmatrix}
+2.858198 & 0.0 & 0.0\\
+1.429099 & 2.475272 & 0\\
+1.429099 & 0.825091 & 2.333709
+\end{pmatrix}  \begin{pmatrix}
+0.98 & 0 & 0\\
+0 & 1 & 0\\
+0 & 0 & 1
+\end{pmatrix}=\begin{pmatrix}
+2.800451 & 0 & 0\\
+1.400225 & 2.475272 & 0\\
+1.400225 & 0.825091 & 2.333709
+\end{pmatrix}$
+
+$atom\_position\_x\_axis\_0.98 = \begin{pmatrix}
+2.855406 & 3.297945  & 2.331429
+\end{pmatrix} \begin{pmatrix}
+0.98 & 0 & 0\\
+0 & 1 & 0\\
+0 & 0 & 1
+\end{pmatrix}$
+
+##### 3.2 shear deform
+
+shear_deform=0.05 will become a list ,and then this list will become a list of 3*3 matrix,which will change the configuration of the simulation box. 
+```python
+shear_deform=0.05
+shear_strains = [-shear_deform, -0.5*shear_deform, 0.5*shear_deform, shear_deform]
+
+# the result of the command above 
+shear_strains = [-0.05, -0.025, 0.025, 0.05]
+```
+
+#For yz shear deformation, the list above will become the matrix list below.
+
+$shear\_yz\_matrix\_list=
+[\begin{pmatrix}
+1 & 0 & 0\\
+0 & 1 & -0.05\\
+0 & -0.05 & 1
+\end{pmatrix} , \begin{pmatrix}
+1 & 0 & 0\\
+0 & 1 & -0.025\\
+0 & -0.025 & 1
+\end{pmatrix}, 
+\begin{pmatrix}
+1 & 0 & 0\\
+0 & 1 & 0.025\\
+0 & 0.025 & 1
+\end{pmatrix}, \begin{pmatrix}
+1 & 0 & 0\\
+0 & 1 & 0.05\\
+0 & 0.05 & 1
+\end{pmatrix}]$
+
+and then box_size matrix and atom_position vector will change according to the matrix above
+
+#### 4.lammps
+##### 4.1 input file
+
+lammps perform an energy minimizations of the system,and use the results of energy minimizations as  the test result.
+
+notice that lammps will not fix all box/relax 0.0, that means lammps  will **not** try to keep the stress of box to 0.0 (this is different from 00.equi)
+```
+# dfm-000/lammps , x-axis deform -0.02
+
+min_style       cg
+minimize        1.000000e-12 1.000000e-06 5000 500000
+```
+##### 4.2 output file
+
+After the energy minimization(in this example, the energy minimization had run only one step before it stopped).
+
+```
+Step PotEng Pxx Pyy Pzz Pxy Pxz Pyz Lx Ly Lz Volume c_mype
+       0   -3.7440621    27804.084    11040.856    10437.716  0.082776521  0.063238993    852.90808     2.800451     2.475272     2.333709    16.176986   -3.7440621
+       1   -3.7440621    27804.102    11040.877    10437.739  0.081399331  0.061721109    852.90771     2.800451     2.475272     2.333709    16.176986   -3.7440621
+```
+
+will use the result Pxx Pyy Pzz Pxy Pxz Pyz  of the last MD step as the stress tensor.
+that is,
+
+$stress\_x\_axis\_0.98=\begin{pmatrix}
+Pxx & Pxy & Pxz\\
+Pyx & Pyy & Pyz\\
+Pzx & Pzy & Pzz
+\end{pmatrix}=\begin{pmatrix}
+27804.1 & 0.0813993 & 0.0617211\\
+0.0813993 & 11040.9 & 852.908\\
+0.0617211 & 852.908 & 10437.7
+\end{pmatrix}$
+
+#### 5.vasp
+The atom configuration is the same as lammps's.
+
+VASP will calculate the potential energy of the atom configuration. The VASP INCAR is the same as 00.equi's.
+
+#### 6.data analyze
+the strain_matrix and corresponding stress_matrix will be used to calculate the elastic tensor using least-squares fit.
+```python3
+# lst_strains: list of strain objects to fit
+# lst_stresses: list of stress objects to use in fit
+# eq_stress: the stress of equilibrium state
+
+pymatgen.analysis.elasticity.elastic.from_independent_strains(lst_strain, 
+    lst_stress, 
+    eq_stress = equi_stress)
+
+```
+This will return elastic module, bulk modulus, shear modulus, Youngs modulus and Poission ratio.
+
+### 03.vacancy
+Calculate the vacancy formation energy
+
+#### 1.test results
+the results will be a table,here only show one row of the table.
+
+Field  | Type          | Example                                                      | Discription                                                      |
+| :---------------- | :--------------------- | :-------------------------------------- | :-------------------------------------------------------------|
+|Structure| list of string |['struct-3x3x3-000'] | structure name|
+| Vac_E(eV) | real number |0.723 | the vacancy formation energy |
+| E(eV) | real number | -96.684 | potential energy of the vacancy configuration |
+| equi_E(eV) | real number |-97.407 | potential energy of the equilibrium state|
+
+
+#### 2.param.json
+
+norm_deform and shear_deform are the scales of material deformation. This task uses the stress-strain relationship to calculate the elastic constant.
+|Key  | Type  | Example | Discription  |
+| :---------------- | :--------------------- | :-------------------------------------- | :-------------------------------------------------------------|
+| supercell | list of integer | [3,3,3] | the supercell size used to generate vacancy defect and interstitial defect |
+
+#### 3.atom configuration
+Auto_test will use the 00.equi result atom configuration CONTCAR.
+and create a supercell with param.json's supercell.
+
+The atom configuration will be auto generated by pymatgen's module
+Class  pymatgen.analysis.defects.generators.VacancyGenerator.
+
+Atom configuration for vacancies will be based on periodically equivalent sites.
+
+```python3
+pymatgen.analysis.defects.generators.VacancyGenerator(Structure.from_file(task_poscar))
+```
+VASP CONTCAR of 00.equi
+```
+Al1
+   1.00000000000000
+     0.0000000000000000    2.0213150252059031    2.0213150252059031
+     2.0213150252059031    0.0000000000000000    2.0213150252059031
+     2.0213150252059031    2.0213150252059031   -0.0000000000000000
+   Al
+     1
+Direct
+  0.0000000000000000  0.0000000000000000  0.0000000000000000
+
+  0.00000000E+00  0.00000000E+00  0.00000000E+00
+```
+
+vasp POSCAR to test vacancy formation energy
+```
+  Al26
+1.0
+0.000000 6.063945 6.063945
+6.063945 0.000000 6.063945
+6.063945 6.063945 0.000000
+Al
+26
+direct
+0.000000 0.000000 0.333333 Al
+0.000000 0.000000 0.666667 Al
+0.000000 0.333333 0.000000 Al
+0.000000 0.333333 0.333333 Al
+1.000000 0.333333 0.666667 Al
+0.000000 0.666667 0.000000 Al
+1.000000 0.666667 0.333333 Al
+0.000000 0.666667 0.666667 Al
+0.333333 0.000000 0.000000 Al
+0.333333 0.000000 0.333333 Al
+0.333333 1.000000 0.666667 Al
+0.333333 0.333333 0.000000 Al
+0.333333 0.333333 0.333333 Al
+0.333333 0.333333 0.666667 Al
+0.333333 0.666667 0.000000 Al
+0.333333 0.666667 0.333333 Al
+0.333333 0.666667 0.666667 Al
+0.666667 0.000000 0.000000 Al
+0.666667 1.000000 0.333333 Al
+0.666667 0.000000 0.666667 Al
+0.666667 0.333333 0.000000 Al
+0.666667 0.333333 0.333333 Al
+0.666667 0.333333 0.666667 Al
+0.666667 0.666667 0.000000 Al
+0.666667 0.666667 0.333333 Al
+0.666667 0.666667 0.666667 Al
+```
+
+
+#### 4.lammps
+lammps perform energy minimizations of the system,a nd use the potential energy results of energy minimizations as the vacancy formation atom configuration result.
+
+The formation energy will be the different between vacancy configuration and equilibrium configuration.
+```
+min_style       cg
+fix             1 all box/relax iso ${Px}
+minimize        1.000000e-12 1.000000e-06 5000 500000
+fix             1 all box/relax aniso ${Px}
+minimize        1.000000e-12 1.000000e-06 5000 500000
+```
+#### 5.VASP
+VASP will calculate the potential energy of vacancy configuration and  equilibrium configuration. The formation energy will be the different between them.
+
+### 04.interstitial
+Calculate the interstitial formation energy.
+
+#### 1.test results
+the results will be in a table, here only show one row of the table.
+
+Field  | Type          | Example                                                      | Discription                                                      |
+| :---------------- | :--------------------- | :-------------------------------------- | :-------------------------------------------------------------|
+|Structure| string |'struct-Al-3x3x3-000' | structure name|
+| Inter_E(eV) | real number |0.723 | the interstitial formation energy |
+| E(eV) | real number | -96.684 | potential energy of the interstitial configuration |
+| equi_E(eV) | real number |-97.407 | potential energy of the equilibrium state|
+
+
+#### 2.param.json
+
+|Key  | Type  | Example | Discription  |
+| :---------------- | :--------------------- | :-------------------------------------- | :-------------------------------------------------------------|
+| insert_ele | list of string | ["Al"] | the elements used to generate point interstitial defect |
+| reprod-opt | boolean | false | whether to reproduce trajectories of interstitial defect|
+
+#### 3.atom configuration
+
+Auto_test will use the 00.equi result atom configuration CONTCAR.
+and create a supercell with param.json's supercell.
+
+The atom configuration will be auto generated by pymatgen's module
+Class  pymatgen.analysis.defects.generators.VacancyGenerator.
+
+Atom configuration for interstitials will be based on a simple Voronoi analysis
+ ```python3
+pymatgen.analysis.defects.generators.InterstitialGenerator(Structure.from_file(task_poscar), insert_ele)
+
+``` 
+#### 4.lammps
+##### 4.1 input file
+lammps perform energy minimizations of the system,a nd use the potential energy results of energy minimizations as the interstitials formation atom configuration result.
+
+The formation energy will be the different between interstitials configuration and equilibrium configuration.
+```
+min_style       cg
+fix             1 all box/relax iso 0.0
+minimize        1.000000e-12 1.000000e-06 5000 500000
+fix             1 all box/relax aniso 0.0
+minimize        1.000000e-12 1.000000e-06 5000 500000
+```
+#### 5.VASP
+will calculate the potential energy of interstitials configuration and  equilibrium configuration. The formation energy will be the different between them.
+
+### 05.surface
+Calculate the surface formation energy
+
+#### 1.test results
+the results will be in a table, here only show one row of the table.
+
+Field  | Type          | Example                                                      | Discription                                                      |
+| :---------------- | :--------------------- | :-------------------------------------- | :-------------------------------------------------------------|
+|Miller_Indices| string | struct-000-m1.1.1m | Miller Indices|
+|Surf_E(J/m^2)| real number | 0.673 | the surface formation energy |
+| EpA(eV) | real number | -3.628 | potential energy of the surface configuration |
+| equi_EpA | real number | -3.747 | potential energy of the equilibrium state|
+
+
+#### 2.param.json
+
+norm_deform and shear_deform are the scales of material deformation. This task uses the stress-strain relationship to calculate the elastic constant.
+|Key  | Type  | Example | Discription  |
+| :---------------- | :--------------------- | :-------------------------------------- | :-------------------------------------------------------------|
+| min_slab_size| real number| 10 |  the minimum size of slab thickness |
+|min_vacuum_size | real number| 11 |  the minimum size of  the vacuume width |
+|pert_xz  | real number| 0.01 |  the perturbation through xz direction used to compute surface energy |
+|max_miller  | integer| 2 |  the maximum miller index |
+|static-opt|boolean| false | whether to use atomic relaxation to compute surface energy. if false, the structure will be relaxed. |
+|relax_box | boolean | false | set true if the box is relaxed, otherwise only relax atom positions |
+
+#### 3.atom configuration
+The atom configuration will be auto generated by pymatgen's module
+`pymatgen.core.surface.Structure` and `pymatgen.core.surface.generate_all_slabs` with  `equilibrium state atom structure, max_miller, min_slab_size, min_vacuum_size`
+
+```python
+all_slabs = generate_all_slabs(Structure.from_file(poscar), max_miller, min_slab_size, min_vacuum_size)
+```
+
+the images are slab configuration.
+![](https://i.imgur.com/WxU8K0s.png)![](https://i.imgur.com/0h9IZ6L.png)
+
+
+
+#### 4.lammps
+lammps  will calculate the potential energy of the configuration.and divide the atom numbers as the surface formation energy.
+
+the surface energy will be the difference between  surface formation energy and the potential energy of a atom in equilibrium configuration.
+
+#####4.1 input file
+```
+min_style       cg
+minimize        1.000000e-12 1.000000e-06 5000 500000
+```
+
+#### 5.VASP
+VASP will calculate the potential energy of the configuration.and divide the atom numbers as energy of one atom. The VASP INCAR is the same as 00.equi's
 
 
 

From d15e892e18975e86623a425e528552e5599743cb Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Sat, 30 Nov 2019 15:34:20 -0500
Subject: [PATCH 049/109] add a "hard cutoff" for clusters

Drop atoms that are out of the hard cutoff anyway even if the bond is cut.
---
 dpgen/generator/lib/gaussian.py | 20 ++++++++++++++++++--
 1 file changed, 18 insertions(+), 2 deletions(-)

diff --git a/dpgen/generator/lib/gaussian.py b/dpgen/generator/lib/gaussian.py
index 2669e5f56..4731bfd06 100644
--- a/dpgen/generator/lib/gaussian.py
+++ b/dpgen/generator/lib/gaussian.py
@@ -176,6 +176,7 @@ def make_gaussian_input(sys_data, fp_params):
 
 def take_cluster(old_conf_name, type_map, idx, jdata):
     cutoff = jdata['cluster_cutoff']
+    cutoff_hard = jdata.get('cluster_cutoff_hard', None)
     sys = dpdata.System(old_conf_name, fmt = 'lammps/dump', type_map = type_map)
     atom_names = sys['atom_names']
     atom_types = sys['atom_types']
@@ -190,27 +191,42 @@ def take_cluster(old_conf_name, type_map, idx, jdata):
     distances = all_atoms.get_distances(idx, range(len(all_atoms)), mic=True)
     distancescutoff = distances < cutoff
     cutoff_atoms_idx = np.where(distancescutoff)[0]
+    if cutoff_hard is not None:
+        distancescutoff_hard = distances < cutoff_hard
+        cutoff_atoms_idx_hard = np.where(distancescutoff_hard)[0]
     # make cutoff atoms in molecules
     taken_atoms_idx = []
     added = []
     for ii in range(frag_numb):
         frag_atoms_idx = np.where(frag_index == ii)[0]
+        if cutoff_hard is not None:
+            # drop atoms out of the hard cutoff anyway
+            frag_atoms_idx = np.intersect1d(frag_atoms_idx, cutoff_atoms_idx_hard)
         if np.any(np.isin(frag_atoms_idx, cutoff_atoms_idx)):
             if 'cluster_minify' in jdata and jdata['cluster_minify']:
-                # currently support C, H
+                # support for organic species
                 take_frag_idx=[]
                 for aa in frag_atoms_idx:
                     if np.any(np.isin(aa, cutoff_atoms_idx)):
+                        # atom is in the soft cutoff
+                        # pick up anyway
                         take_frag_idx.append(aa)
                     elif np.count_nonzero(np.logical_and(distancescutoff, graph.toarray()[aa]==1)):
+                        # atom is between the hard cutoff and the soft cutoff
+                        # and has a single bond with the atom inside
                         if all_atoms[aa].symbol == 'H':
+                            # for atom H: just add it
                             take_frag_idx.append(aa)
-                        elif all_atoms[aa].symbol == 'C':
+                        else:
+                            # for other atoms (C, O, etc.): replace it with a ghost H atom
                             near_atom_idx = np.nonzero(np.logical_and(distancescutoff, graph.toarray()[aa]>0))[0][0]
                             vector = all_atoms[aa].position - all_atoms[near_atom_idx].position
                             new_position = all_atoms[near_atom_idx].position + vector / np.linalg.norm(vector) * 1.09
                             added.append(Atom('H', new_position))
                     elif np.count_nonzero(np.logical_and(distancescutoff, graph.toarray()[aa]>1)):
+                        # if that atom has a double bond with the atom inside
+                        # just pick up the whole fragment (within the hard cutoff)
+                        # TODO: use a more fantastic method
                         take_frag_idx=frag_atoms_idx
                         break
             else:

From a5857fd20bcabef5b46849f4ad0977caf164d951 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Sun, 1 Dec 2019 11:17:09 -0500
Subject: [PATCH 050/109] use GetBondOrder as GetBO has been removed

---
 dpgen/generator/lib/gaussian.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dpgen/generator/lib/gaussian.py b/dpgen/generator/lib/gaussian.py
index 4731bfd06..27c2a1bd6 100644
--- a/dpgen/generator/lib/gaussian.py
+++ b/dpgen/generator/lib/gaussian.py
@@ -48,7 +48,7 @@ def _crd2frag(symbols, crds, pbc=False, cell=None, return_bonds=False):
         bond = mol.GetBond(ii)
         a = bond.GetBeginAtom().GetId()
         b = bond.GetEndAtom().GetId()
-        bo = bond.GetBO()
+        bo = bond.GetBondOrder()
         if a >= atomnumber and b >= atomnumber:
             # duplicated
             continue

From ba4a8a87ab4ae07bdab096ad1933323157b92993 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Sun, 1 Dec 2019 14:24:00 -0500
Subject: [PATCH 051/109] fix bug for openbabel 3.0

---
 dpgen/generator/lib/gaussian.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/dpgen/generator/lib/gaussian.py b/dpgen/generator/lib/gaussian.py
index 27c2a1bd6..79a420e27 100644
--- a/dpgen/generator/lib/gaussian.py
+++ b/dpgen/generator/lib/gaussian.py
@@ -76,7 +76,11 @@ def _crd2mul(symbols, crds):
     mol = openbabel.OBMol()
     conv.ReadString(mol, xyzstring)
     gjfstring = conv.WriteString(mol)
-    mul = int(gjfstring.split('\n')[4].split()[1])
+    try:
+        mul = int(gjfstring.split('\n')[4].split()[1])
+    except IndexError:
+        # openbabel 3.0
+        mul = int(gjfstring.split('\n')[5].split()[1])
     return mul  
 
 

From 087755978f88dc5d6f3216ddc02a980de48d6d6c Mon Sep 17 00:00:00 2001
From: Han Wang <wang_han@iapcm.ac.cn>
Date: Mon, 2 Dec 2019 12:40:05 +0800
Subject: [PATCH 052/109] control gpu multiplicity

---
 dpgen/dispatcher/Batch.py                  | 6 +++++-
 tests/dispatcher/shell/test_shell_local.py | 7 +++++++
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/dpgen/dispatcher/Batch.py b/dpgen/dispatcher/Batch.py
index 4da2d196b..3f9629171 100644
--- a/dpgen/dispatcher/Batch.py
+++ b/dpgen/dispatcher/Batch.py
@@ -77,6 +77,10 @@ def sub_script(self,
             self.manual_gpu = res['manual_cuda_devices']
         except:
             self.manual_gpu = 0
+        try:
+            self.manual_gpu_multiplicity = res['manual_cuda_multiplicity']
+        except:
+            self.manual_gpu_multiplicity = 1
         for ii in range(len(cmd)):            
             # for one command
             ret += self._sub_script_inner(job_dirs,
@@ -151,7 +155,7 @@ def _sub_script_inner(self,
                 self.cmd_cnt += 1
             ret += 'cd %s\n' % self.context.remote_root
             ret += 'test $? -ne 0 && exit\n'
-            if self.manual_gpu > 0 and self.cmd_cnt % self.manual_gpu == 0:
+            if self.manual_gpu > 0 and self.cmd_cnt % (self.manual_gpu * self.manual_gpu_multiplicity) == 0:
                 ret += '\nwait\n\n'
         ret += '\nwait\n\n'
         return ret
diff --git a/tests/dispatcher/shell/test_shell_local.py b/tests/dispatcher/shell/test_shell_local.py
index 36a1eb119..8610d9bd5 100644
--- a/tests/dispatcher/shell/test_shell_local.py
+++ b/tests/dispatcher/shell/test_shell_local.py
@@ -40,6 +40,13 @@ def test_manual_gpu(self):
         with open('run.sub.gpu', 'w') as fp:
             fp.write(ret)        
             
+    def test_manual_gpu_multi(self):
+        job_dirs = ['task0', 'task1', 'task2', 'task3']
+        res = {'manual_cuda_devices': 2, 'manual_cuda_multiplicity': 2}
+        ret = self.shell.sub_script(job_dirs, ['touch test1', 'touch test2'], res = res)
+        with open('run.sub.gpu.multi', 'w') as fp:
+            fp.write(ret)
+
     def test_gen_sub_script(self):
         job_dirs = ['task0', 'task1']
         self.shell.context.upload(job_dirs, ['test0'])

From 500490aba7a7c5ab03547ff66ef48badac1bff60 Mon Sep 17 00:00:00 2001
From: Yuan Fengbo <yuanfengbo888@pku.edu.cn>
Date: Mon, 2 Dec 2019 16:48:35 +0800
Subject: [PATCH 053/109] update auto_test doc, delete the details in auto_test

---
 README.md | 747 ++++++++++--------------------------------------------
 1 file changed, 131 insertions(+), 616 deletions(-)

diff --git a/README.md b/README.md
index 76c5f2cd9..9291c1811 100644
--- a/README.md
+++ b/README.md
@@ -548,6 +548,7 @@ Here are examples for setting:
 
 
 ## Test: Auto-test for Deep Generator
+###  configure and param.json
 At this step, we assume that you have prepared some graph files like `graph.*.pb` and the particular pseudopotential `POTCAR`.
 
 The main code of this step is
@@ -569,6 +570,8 @@ The whole program contains a series of tasks shown as follows. In each task, the
 
 + `05.surf`: the surface formation energy
 
+Dpgen auto_test will auto make dir for each task it tests, the dir name is the same as the dir name. And the test results will in a plain text file named result. For example `cat ./01.eos/Al/std-fcc/deepmd/result`
+
 
 We take Al as an example to show the parameter settings of `param.json`.
 The first part is the fundamental setting for particular alloy system.
@@ -582,6 +585,7 @@ The first part is the fundamental setting for particular alloy system.
     "task_type":"deepmd",
     "task":"eos",
 ```
+
 You need to add the specified paths of necessary `POTCAR` files in "potcar_map". The different `POTCAR` paths are separated by commas.
 Then you also need to add the folder path of particular configuration, which contains `POSCAR` file.
 ```
@@ -618,78 +622,21 @@ The second part is the computational settings for vasp and lammps. According to
     },
 ```
 The last part is the optional settings for various tasks mentioned above. You can change the parameters according to actual needs.
-```json
-    "_comment":"00.equi",
-    "alloy_shift":false,
-```
-+ `alloy_shift`:(boolean) whether to compute the alloy formation energy. If you test alloy and set 'true', you need to compute the energies of corresponding elements respectively first of ßall. Please set 'false' when test single element.
-
-```json
-    "_comment": "01.eos",
-    "vol_start":	12,
-    "vol_end":		22,
-    "vol_step":		0.5,
-```
-+ `vol_start`, `vol_end` and `vol_step` determine the volumetric range and accuracy of the **eos**.
-
-```json
-    "_comment": "02.elastic",
-    "norm_deform":	2e-2,
-    "shear_deform":	5e-2,
-```
-+ `norm_deform` and `shear_deform` are the scales of material deformation.
-This task uses the stress-strain relationship to calculate the elastic constant.
-
-```json
-    "_comment":"03.vacancy",
-    "supercell":[3,3,3],
-```
-+ `supercell`:(list of integer) the supercell size used to generate vacancy defect and interstitial defect
-```json
-    "_comment":"04.interstitial",
-    "insert_ele":["Al"],
-    "reprod-opt":false,
-```
-+ `insert_ele`:(list of string) the elements used to generate point interstitial defect
-+ `repord-opt`:(boolean) whether to reproduce trajectories of interstitial defect
 
-```json
-    "_comment": "05.surface",
-    "min_slab_size":	10,
-    "min_vacuum_size":	11,
-    "_comment": "pert xz to work around vasp bug...",
-    "pert_xz":		0.01,
-    "max_miller": 2,
-    "static-opt":false,
-    "relax_box":false,
-```
-+ `min_slab_size` and `min_vacuum_size` are the minimum size of slab thickness  and  the vacuume width.
-+ `pert_xz` is the perturbation through xz direction used to compute surface energy.
-+ `max_miller` (integer) is the maximum miller index
-+ `static-opt`:(boolean) whether to use atomic relaxation to compute surface energy. if false, the structure will be relaxed.
-+ `relax_box`:(boolean) set true if the box is relaxed, otherwise only relax atom positions.
 
-## Test: The content of the auto_test
-The atom configuration file to be testes.
-### param.json
+A dictionary 
 |Key  | Type  | Example | Discription  |
 | :---------------- | :--------------------- | :-------------------------------------- | :-------------------------------------------------------------|
 | potcar_map | dict | {"Al": "example/POTCAR"} |a dict like { "element" : "position of POTCAR"  } |
-|  conf_dir | path_like | confs/Al/std-fcc | the dir which contains vasp's POSCAR  |
+|  conf_dir | path like string | "confs/Al/std-fcc" | the dir which contains vasp's POSCAR  |
+| key_id | string| "DZIwdXCXg1fiXXXXXX" |the API key of Material project|
+| task_type | string | "vasp" | task type, one of deepmd vasp meam |
+| task | string | "equi" | task, one of equi, eos, elastic, vacancy, interstitial, surf or all  |
+| vasp_params| dict | seeing below | params relating to vasp INCAR|
+| lammps_params | dict| seeing below| params relating to lammps |
 
+the keys in param["vasp_params"]
 
-### 00.equi
-
-equi will test the the equilibrium state and get the following results.
-#### 1.test results
-
-Field  | Type          | Example                                                      | Discription                                                      |
-| :---------------- | :--------------------- | :-------------------------------------- | :-------------------------------------------------------------|
-| EpA(eV) | real number | -3.7468 | the potential energy of a atom|
-| VpA(A^3)| real number | 16.511| theEquilibrium volume of a atom  |
-
-#### 2.param.json
-vasp
 |Key  | Type  | Example | Discription  |
 | :---------------- | :--------------------- | :-------------------------------------- | :-------------------------------------------------------------|
 | ecut | real number | 650  | the plane wave cutoff for grid.  |
@@ -699,240 +646,95 @@ vasp
 | npar | positive integer | 1 | the number of k-points that are to be treated in parallel  |
 | kpar | positive integer | 1 | the number of bands that are treated in parallel |
 
-lammps
-|key  | Type  | Example| Discription|
+the keys in param["lammps_params"]
+|Key  | Type  | Example | Discription  |
 | :---------------- | :--------------------- | :-------------------------------------- | :-------------------------------------------------------------|
-| store_stable | boolean | true |whether to store the stable energy and volume|
-
-
-#### 3.atom configuration
-The atom configuration is specified by param['conf_dir']/POSCAR
-
-The box is periodic, so that particles interact across the boundary, and they can exit one end of the box and re-enter the other end
- 
-Here are the configuration file used by lammps and vasp , note that the following  2 configuration are **equal**.
- 
- lammps atom configuration example
-```
-# Al/std-fcc/conf.lmp
+| model_dir | path like string | "example/Al_model" | the model dir which contains .pb file  |
+| type_map | list of string | ["Al"] | a list contains the element, usually useful for multiple element situation |
+| model_name | boolean |  false |  |
+| model_param_type | boolean |  false |  |
 
-1 atoms
-1 atom types
-   0.0000000000    2.8637824638 xlo xhi
-   0.0000000000    2.4801083646 ylo yhi
-   0.0000000000    2.3382685902 zlo zhi
-   1.4318912319    1.4318912319    0.8267027882 xy xz yz
-
-Atoms # atomic
-
-1 1 0.0000000000 0.0000000000 0.0000000000
-```
-vasp Al/std-fcc/POSCAR
-```
-Al1
-1.0
-0.000000 2.025000 2.025000
-2.025000 0.000000 2.025000
-2.025000 2.025000 0.000000
-Al
-1
-direct
-0.000000 0.000000 0.000000 Al
+### auto_test tasks
+#### 00.equi
+```json
+    "_comment":"00.equi",
+    "store_stable":true,
 ```
++ `store_stable`:(boolean) whether to store the stable energy and volume
 
-vasp atom configuration example
-
-
-
-#### 4.lammps
-##### 4.1 input file
-lammps perform  energy minimizations of the system,and use the results of energy minimizations as  the test result.
+param.json
+| Field  | Type | Example | Discription |
+| :---------------- | :--------------------- | :-------------------------------------- | :-------------------------------------------------------------|
+| EpA(eV) | real number | -3.7468 | the potential energy of a atom|
+| VpA(A^3)| real number | 16.511| theEquilibrium volume of a atom  |
 
+test results
 ```
-# lammps.in
-dimension       3
-boundary        p       p    p
-atom_style      atomic
-box         tilt large
-read_data   conf.lmp
-
-......
-
-min_style       cg
-fix             1 all box/relax iso 0.0  # 
-minimize        1.000000e-12 1.000000e-06 5000 500000
-fix             1 all box/relax aniso 0.0
-minimize        1.000000e-12 1.000000e-06 5000 500000
-......
+conf_dir:        EpA(eV)  VpA(A^3)
+confs/Al/std-fcc  -3.7468   16.511
 ```
-`minimize` perform an energy minimization of the system, by iteratively adjusting atom coordinates. Iterations are terminated when one of the stopping criteria is satisfied.
 
-`fix`  Apply an external pressure or stress tensor to the simulation box during an energy minimization. This allows the box size and shape to vary during the iterations of the minimizer so that the final configuration will be both an energy minimum for the potential energy of the atoms, and the system pressure tensor will be close to the specified external tensor.
-
-`iso/aniso`  The keyword iso means couple all 3 diagonal components together when pressure is computed (hydrostatic pressure), and dilate/contract the dimensions together.
-The keyword aniso means x, y, and z dimensions are controlled independently using the Pxx, Pyy, and Pzz components of the stress tensor as the driving forces
 
+| Field  | Type | Example | Discription |
+| :---------------- | :--------------------- | :-------------------------------------- | :-------------------------------------------------------------|
+| EpA(eV) | real number | -3.7468 | the potential energy of a atom|
+| VpA(A^3)| real number | 16.511| theEquilibrium volume of a atom  |
 
-#### 4.2 output file
-after  energy minimization, 
-dpgen will use the PotEng of the last step  -3.7467628 as `EpA(eV)`
-dpgen will use the Volume of the last step 16.510567as `VpA(A^3)`
-```
-Step PotEng Pxx Pyy Pzz Pxy Pxz Pyz Lx Ly Lz Volume c_mype
-       0   -3.7465689   -5490.0435     -5489.99   -5489.9875 8.0059506e-05 -0.00015863904 0.00072809883    2.8637825    2.4801084    2.3382686    16.607531   -3.7465689
-      21   -3.7467629   -829.55143   -829.56516   -829.55757 -1.7925939e-05 -9.5713646e-05 0.0038858896    2.8581981    2.4752722     2.333709    16.510567   -3.7467629
-      22   -3.7467628   -829.56697   -829.63547   -829.55631 0.00051858009 1.8511104e-07 -0.0017081319    2.8581981    2.4752722     2.333709    16.510567   -3.7467628
+#### 01.eos
+```json
+    "_comment": "01.eos",
+    "vol_start":	12,
+    "vol_end":		22,
+    "vol_step":		0.5,
 ```
++ `vol_start`, `vol_end` and `vol_step` determine the volumetric range and accuracy of the **eos**.
 
-#### 5 vasp
-##### 5.1 INCAR
-some field of the INCAR will be changed according to 
+test results
 ```
-PREC=A
-ENCUT=650 # will use param.json's
-# ISYM=0
-ALGO=fast
-EDIFF=1.000000e-06  # will use param.json's
-EDIFFG=-0.01
-LREAL=A
-NPAR=1 # will use param.json's
-KPAR=1 # will use param.json's
-
-ISMEAR=1
-SIGMA=0.220000
-
-ISTART=0
-ICHARG=2
-NELM=100
-NELMIN=6
-ISIF=6
-IBRION=2
-
-NSW=50
-
-LWAVE=F
-LCHARG=F
-PSTRESS=0
-
-KSPACING=0.100000 # will use param.json's
-KGAMMA=F # will use param.json's
+conf_dir:confs/Al/std-fcc
+VpA(A^3)  EpA(eV)
+15.500   -3.7306
+16.000   -3.7429
+16.500   -3.7468
+17.000   -3.7430
 ```
 
-### 01.eos
-eos will calculate the equation of state and get the following results.
 
-Auto_test 01.eos will calculate the potential energy of single atom in a range of volumes. 
-
-You may then use the test results to draw the  equation of state curve.
-
-#### 1.test results
-
-Field  | Type          | Example                                                      | Discription                                                      |
+| Field  | Type| Example| Discription  |
 | :---------------- | :--------------------- | :-------------------------------------- | :-------------------------------------------------------------|
 | EpA(eV) | list of real number | [15.5,16.0,16.5,17.0] | the potential energy of a atom in  quilibrium state|
 | VpA(A^3)| list of real number |[-3.7306, -3.7429, -3.746762, -3.7430] | the equilibrium volume of a atom  |
 
-#### 2.param.json
+#### 02.elastic
+```json
+    "_comment": "02.elastic",
+    "norm_deform":	2e-2,
+    "shear_deform":	5e-2,
+```
++ `norm_deform` and `shear_deform` are the scales of material deformation.
+This task uses the stress-strain relationship to calculate the elastic constant.
 
-vol_start, vol_end and vol_step determine the volumetric range and accuracy of the eos.
 |Key  | Type  | Example | Discription  |
 | :---------------- | :--------------------- | :-------------------------------------- | :-------------------------------------------------------------|
-| vol_start |real number | 12 | the start volume  |
-| vol_end | real number | 22 | the end volume  |
-| vol_step | real number | 0.5 | the intervel to change volume |
-
-The param.json above will become a list below.
-each volume in volume_list will become the target 
-
-```python
-volume_list = list(range(vol_start,vol_end,vol_step))
-```
-
-#### 3.atom configuration
-
-The initial atom configuration  will be the equilibrium state atom configuration(configuration of the last step of 00.equi)
-
-The box will contain only one atom.
-
-The box is periodic, so that particles interact across the boundary, and they can exit one end of the box and re-enter the other end.
-
-
-And the box and atom position will deform proportionally in x,y,z direction to reach the target volume.
-
-For, examle, the file  below will generate a box with single atom.
-the box volume is 2.857588\*2.474744\*2.333211==16.50 A^(3) 
-```
-# vol-16.50/conf.lmp
-
-1 atoms
-1 atom types
-   0.0000000000    2.8575880000 xlo xhi
-   0.0000000000    2.4747440000 ylo yhi
-   0.0000000000    2.3332110000 zlo zhi
-   1.4287940000    1.4287940000    0.8249150000 xy xz yz
-
-Atoms # atomic
-
-1 1 2.8547961365 3.2972419997 2.3309314529
-```
-the images below are atom configuration of volume 12.0, 16.5, 21.5
-![](https://i.imgur.com/R1mXWrM.png)![](https://i.imgur.com/IsQZW8n.png)![](https://i.imgur.com/wTl375d.png)
-
-
-
-
-#### 4.lammps
-Lammps will create a serials of folders and each of the folder will contain different a conf.lmp file.
-
-All of the conf.lmp will contain only one atom, the box size of these conf.lmp will be different
-
-##### 4.1 input file
-
-lammps perform an energy minimizations of the system,and use the results of energy minimizations as  the test result.
-
-notice that lammps will not fix all box/relax 0.0, that means lammps  will **not** try to keep the stress of box to 0.0 (this is different from 00.equi)
-```
-# vol-16.50 lammps.in
-
-units   metal
-dimension       3
-boundary        p       p    p
-atom_style      atomic
-box         tilt large
-read_data   conf.lmp
-
-......
-
-min_style       cg
-minimize        1.000000e-12 1.000000e-06 5000 500000
-......
-```
-
-##### 4.2 output file
-
-
-After the energy minimization(in this example, the energy minimization had run only one step before it stopped).
-
-dpgen will use the PotEng of the last step  -3.7467628 as `EpA(eV)` where  `VpA(A^3)==16.50`
+| norm_deform | real number | 0.02  | uniaxial deformation range  |
+| shear_deform | real number | 0.05| shear deformation range  |
 
+test results
 ```
-# vol-16.50/log.lammps
-
-Step PotEng Pxx Pyy Pzz Pxy Pxz Pyz Lx Ly Lz Volume c_mype
-       0   -3.7467629   -315.56544   -315.60474   -315.65689 -0.00081929052 -0.0015852434 -0.038537753     2.857588     2.474744     2.333211    16.499999   -3.7467629
-       1   -3.7467629   -315.56544   -315.60474   -315.65689 -0.00081929052 -0.0015852434 -0.038537753     2.857588     2.474744     2.333211    16.499999   -3.746762
+conf_dir:confs/Al/std-fcc
+130.50   57.45   54.45    4.24    0.00    0.00
+57.61  130.31   54.45   -4.29   -0.00   -0.00
+54.48   54.48  133.32   -0.00   -0.00   -0.00
+4.49   -4.02   -0.89   33.78    0.00   -0.00
+-0.00   -0.00   -0.00   -0.00   33.77    4.29
+0.00   -0.00   -0.00   -0.00    4.62   36.86
+# Bulk   Modulus BV = 80.78 GPa
+# Shear  Modulus GV = 36.07 GPa
+# Youngs Modulus EV = 94.19 GPa
+# Poission Ratio uV = 0.31
 ```
-#### 5.VASP
-VASP will calculate the potential energy of the atom configuration. The VASP INCAR is the same as 00.equi's.
 
-
-### 02.elasitc
-
-Calculate the elastic module, bulk modulus, shear modulus, Youngs Modulus, Poission Ratio.
-
-#### 1.test results
-
-Field  | Type          | Example                                                      | Discription                                                      |
+| Field  | Type | Example | Discription |
 | :---------------- | :--------------------- | :-------------------------------------- | :-------------------------------------------------------------|
 | elastic module(GPa)| 6*6 matrix of real number| [[130.50   57.45   54.45    4.24    0.00    0.00] [57.61  130.31   54.45   -4.29   -0.00   -0.00]  [54.48   54.48  133.32   -0.00   -0.00   -0.00]   [4.49   -4.02   -0.89   33.78    0.00   -0.00]  [-0.00   -0.00   -0.00   -0.00   33.77    4.29] [0.00   -0.00   -0.00   -0.00    4.62   36.86]]| Voigt-notation elastic module;sequence of row and column is (xx, yy, zz, yz, zx, xy)|
 | bulk modulus(GPa) | real number | 80.78 | bulk modulus |
@@ -940,353 +742,78 @@ Field  | Type          | Example
 | Youngs Modulus(GPa) | real number | 94.19 | Youngs Modulus|
 | Poission Ratio | real number | 0.31 | Poission Ratio  |
 
-#### 2.param.json
-
-norm_deform and shear_deform are the scales of material deformation. This task uses the stress-strain relationship to calculate the elastic constant.
-|Key  | Type  | Example | Discription  |
-| :---------------- | :--------------------- | :-------------------------------------- | :-------------------------------------------------------------|
-| norm_deform | real number | 0.02  | uniaxial deformation range  |
-| shear_deform | real number | 0.05| shear deformation range  |
-
-
-#### 3.atom configuration
-##### 3.1 uniaxial deformation
-norm_deform=0.02 will become a list ,and then this list will become a list of 3*3 matrix,which will change the configuration of the simulation box. 
-```python
-norm_deform=0.02
-norm_strains = [-norm_def, -0.5*norm_deform, 0.5*norm_deform, norm_def]
 
-# the result of the command above 
-norm_strains = [-0.02, -0.01, 0.01, 0.02]
 
+#### 03.vacancy
+```json
+    "_comment":"03.vacancy",
+    "supercell":[3,3,3],
 ```
-#For X-axis uniaxial deformation, the list above will become the matrix list below.
-$norm\_x\_matrix\_list=
-[\begin{pmatrix}
-0.98 & 0 & 0\\
-0 & 1 & 0\\
-0 & 0 & 1
-\end{pmatrix} , \begin{pmatrix}
-0.99 & 0 & 0\\
-0 & 1 & 0\\
-0 & 0 & 1
-\end{pmatrix}, 
-\begin{pmatrix}
-1.01 & 0 & 0\\
-0 & 1 & 0\\
-0 & 0 & 1
-\end{pmatrix}, \begin{pmatrix}
-1.02 & 0 & 0\\
-0 & 1 & 0\\
-0 & 0 & 1
-\end{pmatrix}]$
-
-
-
-The initial atom configuration  will be the equilibrium state atom configuration(configuration of the last step of 00.equi)
-
-for each matrix in norm_matrix_list, the atom position and box size of the initial atom configuration will deform accordingly.
-
-For example, the initial box size will be a Lower triangular matrix
-$box\_size=\begin{pmatrix}
-xx &   &  \\
-xy & yy &  \\
-xz & yz & zz
-\end{pmatrix} =\begin{pmatrix}
-2.858198 & 0.0 & 0.0\\
-1.429099 & 2.475272 & 0\\
-1.429099 & 0.825091 & 2.333709
-\end{pmatrix}$
-
-The atom position in the box is 
-(note that the atom may be out of the simulation box, but the box is periodic, the atom will be move into the box automatically)
-$atom\_position = \begin{pmatrix}
-x & y & z
-\end{pmatrix} = \begin{pmatrix}
-2.855406 & 3.297945  & 2.331429 
-\end{pmatrix}$ 
-
-The box_size and atom_position will according to the matrix in norm_matrix_list.
-for example,
-
-$box\_size\_x\_axis\_0.98=\begin{pmatrix}
-2.858198 & 0.0 & 0.0\\
-1.429099 & 2.475272 & 0\\
-1.429099 & 0.825091 & 2.333709
-\end{pmatrix}  \begin{pmatrix}
-0.98 & 0 & 0\\
-0 & 1 & 0\\
-0 & 0 & 1
-\end{pmatrix}=\begin{pmatrix}
-2.800451 & 0 & 0\\
-1.400225 & 2.475272 & 0\\
-1.400225 & 0.825091 & 2.333709
-\end{pmatrix}$
-
-$atom\_position\_x\_axis\_0.98 = \begin{pmatrix}
-2.855406 & 3.297945  & 2.331429
-\end{pmatrix} \begin{pmatrix}
-0.98 & 0 & 0\\
-0 & 1 & 0\\
-0 & 0 & 1
-\end{pmatrix}$
-
-##### 3.2 shear deform
-
-shear_deform=0.05 will become a list ,and then this list will become a list of 3*3 matrix,which will change the configuration of the simulation box. 
-```python
-shear_deform=0.05
-shear_strains = [-shear_deform, -0.5*shear_deform, 0.5*shear_deform, shear_deform]
-
-# the result of the command above 
-shear_strains = [-0.05, -0.025, 0.025, 0.05]
-```
-
-#For yz shear deformation, the list above will become the matrix list below.
-
-$shear\_yz\_matrix\_list=
-[\begin{pmatrix}
-1 & 0 & 0\\
-0 & 1 & -0.05\\
-0 & -0.05 & 1
-\end{pmatrix} , \begin{pmatrix}
-1 & 0 & 0\\
-0 & 1 & -0.025\\
-0 & -0.025 & 1
-\end{pmatrix}, 
-\begin{pmatrix}
-1 & 0 & 0\\
-0 & 1 & 0.025\\
-0 & 0.025 & 1
-\end{pmatrix}, \begin{pmatrix}
-1 & 0 & 0\\
-0 & 1 & 0.05\\
-0 & 0.05 & 1
-\end{pmatrix}]$
-
-and then box_size matrix and atom_position vector will change according to the matrix above
-
-#### 4.lammps
-##### 4.1 input file
-
-lammps perform an energy minimizations of the system,and use the results of energy minimizations as  the test result.
-
-notice that lammps will not fix all box/relax 0.0, that means lammps  will **not** try to keep the stress of box to 0.0 (this is different from 00.equi)
-```
-# dfm-000/lammps , x-axis deform -0.02
-
-min_style       cg
-minimize        1.000000e-12 1.000000e-06 5000 500000
-```
-##### 4.2 output file
++ `supercell`:(list of integer) the supercell size used to generate vacancy defect and interstitial defect
 
-After the energy minimization(in this example, the energy minimization had run only one step before it stopped).
+|Key  | Type  | Example | Discription  |
+| :---------------- | :--------------------- | :-------------------------------------- | :-------------------------------------------------------------|
+| supercell | list of integer | [3,3,3] | the supercell size used to generate vacancy defect and interstitial defect |
 
+test result
 ```
-Step PotEng Pxx Pyy Pzz Pxy Pxz Pyz Lx Ly Lz Volume c_mype
-       0   -3.7440621    27804.084    11040.856    10437.716  0.082776521  0.063238993    852.90808     2.800451     2.475272     2.333709    16.176986   -3.7440621
-       1   -3.7440621    27804.102    11040.877    10437.739  0.081399331  0.061721109    852.90771     2.800451     2.475272     2.333709    16.176986   -3.7440621
+conf_dir:confs/Al/std-fcc
+Structure:      Vac_E(eV)  E(eV) equi_E(eV)
+struct-3x3x3-000:   0.859  -96.557 -97.416
 ```
-
-will use the result Pxx Pyy Pzz Pxy Pxz Pyz  of the last MD step as the stress tensor.
-that is,
-
-$stress\_x\_axis\_0.98=\begin{pmatrix}
-Pxx & Pxy & Pxz\\
-Pyx & Pyy & Pyz\\
-Pzx & Pzy & Pzz
-\end{pmatrix}=\begin{pmatrix}
-27804.1 & 0.0813993 & 0.0617211\\
-0.0813993 & 11040.9 & 852.908\\
-0.0617211 & 852.908 & 10437.7
-\end{pmatrix}$
-
-#### 5.vasp
-The atom configuration is the same as lammps's.
-
-VASP will calculate the potential energy of the atom configuration. The VASP INCAR is the same as 00.equi's.
-
-#### 6.data analyze
-the strain_matrix and corresponding stress_matrix will be used to calculate the elastic tensor using least-squares fit.
-```python3
-# lst_strains: list of strain objects to fit
-# lst_stresses: list of stress objects to use in fit
-# eq_stress: the stress of equilibrium state
-
-pymatgen.analysis.elasticity.elastic.from_independent_strains(lst_strain, 
-    lst_stress, 
-    eq_stress = equi_stress)
-
-```
-This will return elastic module, bulk modulus, shear modulus, Youngs modulus and Poission ratio.
-
-### 03.vacancy
-Calculate the vacancy formation energy
-
-#### 1.test results
-the results will be a table,here only show one row of the table.
-
-Field  | Type          | Example                                                      | Discription                                                      |
+| Field  | Type | Example | Discription |
 | :---------------- | :--------------------- | :-------------------------------------- | :-------------------------------------------------------------|
 |Structure| list of string |['struct-3x3x3-000'] | structure name|
 | Vac_E(eV) | real number |0.723 | the vacancy formation energy |
 | E(eV) | real number | -96.684 | potential energy of the vacancy configuration |
 | equi_E(eV) | real number |-97.407 | potential energy of the equilibrium state|
 
+#### 04.interstitial
+```json
+    "_comment":"04.interstitial",
+    "insert_ele":["Al"],
+    "reprod-opt":false,
+```
++ `insert_ele`:(list of string) the elements used to generate point interstitial defect
++ `repord-opt`:(boolean) whether to reproduce trajectories of interstitial defect
 
-#### 2.param.json
-
-norm_deform and shear_deform are the scales of material deformation. This task uses the stress-strain relationship to calculate the elastic constant.
 |Key  | Type  | Example | Discription  |
 | :---------------- | :--------------------- | :-------------------------------------- | :-------------------------------------------------------------|
-| supercell | list of integer | [3,3,3] | the supercell size used to generate vacancy defect and interstitial defect |
-
-#### 3.atom configuration
-Auto_test will use the 00.equi result atom configuration CONTCAR.
-and create a supercell with param.json's supercell.
-
-The atom configuration will be auto generated by pymatgen's module
-Class  pymatgen.analysis.defects.generators.VacancyGenerator.
-
-Atom configuration for vacancies will be based on periodically equivalent sites.
-
-```python3
-pymatgen.analysis.defects.generators.VacancyGenerator(Structure.from_file(task_poscar))
-```
-VASP CONTCAR of 00.equi
-```
-Al1
-   1.00000000000000
-     0.0000000000000000    2.0213150252059031    2.0213150252059031
-     2.0213150252059031    0.0000000000000000    2.0213150252059031
-     2.0213150252059031    2.0213150252059031   -0.0000000000000000
-   Al
-     1
-Direct
-  0.0000000000000000  0.0000000000000000  0.0000000000000000
-
-  0.00000000E+00  0.00000000E+00  0.00000000E+00
-```
-
-vasp POSCAR to test vacancy formation energy
-```
-  Al26
-1.0
-0.000000 6.063945 6.063945
-6.063945 0.000000 6.063945
-6.063945 6.063945 0.000000
-Al
-26
-direct
-0.000000 0.000000 0.333333 Al
-0.000000 0.000000 0.666667 Al
-0.000000 0.333333 0.000000 Al
-0.000000 0.333333 0.333333 Al
-1.000000 0.333333 0.666667 Al
-0.000000 0.666667 0.000000 Al
-1.000000 0.666667 0.333333 Al
-0.000000 0.666667 0.666667 Al
-0.333333 0.000000 0.000000 Al
-0.333333 0.000000 0.333333 Al
-0.333333 1.000000 0.666667 Al
-0.333333 0.333333 0.000000 Al
-0.333333 0.333333 0.333333 Al
-0.333333 0.333333 0.666667 Al
-0.333333 0.666667 0.000000 Al
-0.333333 0.666667 0.333333 Al
-0.333333 0.666667 0.666667 Al
-0.666667 0.000000 0.000000 Al
-0.666667 1.000000 0.333333 Al
-0.666667 0.000000 0.666667 Al
-0.666667 0.333333 0.000000 Al
-0.666667 0.333333 0.333333 Al
-0.666667 0.333333 0.666667 Al
-0.666667 0.666667 0.000000 Al
-0.666667 0.666667 0.333333 Al
-0.666667 0.666667 0.666667 Al
-```
-
-
-#### 4.lammps
-lammps perform energy minimizations of the system,a nd use the potential energy results of energy minimizations as the vacancy formation atom configuration result.
+| insert_ele | list of string | ["Al"] | the elements used to generate point interstitial defect |
+| reprod-opt | boolean | false | whether to reproduce trajectories of interstitial defect|
 
-The formation energy will be the different between vacancy configuration and equilibrium configuration.
+test result
 ```
-min_style       cg
-fix             1 all box/relax iso ${Px}
-minimize        1.000000e-12 1.000000e-06 5000 500000
-fix             1 all box/relax aniso ${Px}
-minimize        1.000000e-12 1.000000e-06 5000 500000
+conf_dir:confs/Al/std-fcc
+Insert_ele-Struct: Inter_E(eV)  E(eV) equi_E(eV)
+struct-Al-3x3x3-000:   3.919  -100.991 -104.909
+struct-Al-3x3x3-001:   2.681  -102.229 -104.909
 ```
-#### 5.VASP
-VASP will calculate the potential energy of vacancy configuration and  equilibrium configuration. The formation energy will be the different between them.
-
-### 04.interstitial
-Calculate the interstitial formation energy.
-
-#### 1.test results
-the results will be in a table, here only show one row of the table.
-
-Field  | Type          | Example                                                      | Discription                                                      |
+| Field  | Type | Example | Discription |
 | :---------------- | :--------------------- | :-------------------------------------- | :-------------------------------------------------------------|
 |Structure| string |'struct-Al-3x3x3-000' | structure name|
 | Inter_E(eV) | real number |0.723 | the interstitial formation energy |
 | E(eV) | real number | -96.684 | potential energy of the interstitial configuration |
 | equi_E(eV) | real number |-97.407 | potential energy of the equilibrium state|
 
+#### 05.surface
 
-#### 2.param.json
-
-|Key  | Type  | Example | Discription  |
-| :---------------- | :--------------------- | :-------------------------------------- | :-------------------------------------------------------------|
-| insert_ele | list of string | ["Al"] | the elements used to generate point interstitial defect |
-| reprod-opt | boolean | false | whether to reproduce trajectories of interstitial defect|
-
-#### 3.atom configuration
-
-Auto_test will use the 00.equi result atom configuration CONTCAR.
-and create a supercell with param.json's supercell.
-
-The atom configuration will be auto generated by pymatgen's module
-Class  pymatgen.analysis.defects.generators.VacancyGenerator.
-
-Atom configuration for interstitials will be based on a simple Voronoi analysis
- ```python3
-pymatgen.analysis.defects.generators.InterstitialGenerator(Structure.from_file(task_poscar), insert_ele)
-
-``` 
-#### 4.lammps
-##### 4.1 input file
-lammps perform energy minimizations of the system,a nd use the potential energy results of energy minimizations as the interstitials formation atom configuration result.
-
-The formation energy will be the different between interstitials configuration and equilibrium configuration.
-```
-min_style       cg
-fix             1 all box/relax iso 0.0
-minimize        1.000000e-12 1.000000e-06 5000 500000
-fix             1 all box/relax aniso 0.0
-minimize        1.000000e-12 1.000000e-06 5000 500000
+```json
+    "_comment": "05.surface",
+    "min_slab_size":	10,
+    "min_vacuum_size":	11,
+    "_comment": "pert xz to work around vasp bug...",
+    "pert_xz":		0.01,
+    "max_miller": 2,
+    "static-opt":false,
+    "relax_box":false,
 ```
-#### 5.VASP
-will calculate the potential energy of interstitials configuration and  equilibrium configuration. The formation energy will be the different between them.
-
-### 05.surface
-Calculate the surface formation energy
-
-#### 1.test results
-the results will be in a table, here only show one row of the table.
-
-Field  | Type          | Example                                                      | Discription                                                      |
-| :---------------- | :--------------------- | :-------------------------------------- | :-------------------------------------------------------------|
-|Miller_Indices| string | struct-000-m1.1.1m | Miller Indices|
-|Surf_E(J/m^2)| real number | 0.673 | the surface formation energy |
-| EpA(eV) | real number | -3.628 | potential energy of the surface configuration |
-| equi_EpA | real number | -3.747 | potential energy of the equilibrium state|
-
-
-#### 2.param.json
++ `min_slab_size` and `min_vacuum_size` are the minimum size of slab thickness  and  the vacuume width.
++ `pert_xz` is the perturbation through xz direction used to compute surface energy.
++ `max_miller` (integer) is the maximum miller index
++ `static-opt`:(boolean) whether to use atomic relaxation to compute surface energy. if false, the structure will be relaxed.
++ `relax_box`:(boolean) set true if the box is relaxed, otherwise only relax atom positions.
 
-norm_deform and shear_deform are the scales of material deformation. This task uses the stress-strain relationship to calculate the elastic constant.
 |Key  | Type  | Example | Discription  |
 | :---------------- | :--------------------- | :-------------------------------------- | :-------------------------------------------------------------|
 | min_slab_size| real number| 10 |  the minimum size of slab thickness |
@@ -1296,34 +823,22 @@ norm_deform and shear_deform are the scales of material deformation. This task u
 |static-opt|boolean| false | whether to use atomic relaxation to compute surface energy. if false, the structure will be relaxed. |
 |relax_box | boolean | false | set true if the box is relaxed, otherwise only relax atom positions |
 
-#### 3.atom configuration
-The atom configuration will be auto generated by pymatgen's module
-`pymatgen.core.surface.Structure` and `pymatgen.core.surface.generate_all_slabs` with  `equilibrium state atom structure, max_miller, min_slab_size, min_vacuum_size`
-
-```python
-all_slabs = generate_all_slabs(Structure.from_file(poscar), max_miller, min_slab_size, min_vacuum_size)
+test result
 ```
-
-the images are slab configuration.
-![](https://i.imgur.com/WxU8K0s.png)![](https://i.imgur.com/0h9IZ6L.png)
-
-
-
-#### 4.lammps
-lammps  will calculate the potential energy of the configuration.and divide the atom numbers as the surface formation energy.
-
-the surface energy will be the difference between  surface formation energy and the potential energy of a atom in equilibrium configuration.
-
-#####4.1 input file
-```
-min_style       cg
-minimize        1.000000e-12 1.000000e-06 5000 500000
+conf_dir:confs/Al/std-fcc
+Miller_Indices:         Surf_E(J/m^2) EpA(eV) equi_EpA(eV)
+struct-000-m1.1.1m:        0.673     -3.628   -3.747
+struct-001-m2.2.1m:        0.917     -3.592   -3.747
 ```
+| Field  | Type | Example| Discription|
+| :---------------- | :--------------------- | :-------------------------------------- | :-------------------------------------------------------------|
+|Miller_Indices| string | struct-000-m1.1.1m | Miller Indices|
+|Surf_E(J/m^2)| real number | 0.673 | the surface formation energy |
+| EpA(eV) | real number | -3.628 | potential energy of the surface configuration |
+| equi_EpA | real number | -3.747 | potential energy of the equilibrium state|
 
-#### 5.VASP
-VASP will calculate the potential energy of the configuration.and divide the atom numbers as energy of one atom. The VASP INCAR is the same as 00.equi's
-
-
+### The content of the auto_test
+To know what actually will dpgen autotest do, including the lammps and vasp script, the input file and atom configuration file auto_test will generate, please refer to https://hackmd.io/@yeql5ephQLaGJGgFgpvIDw/rJY1FO92B
 
 ## Set up machine
 When switching into a new machine, you may modifying the `MACHINE`, according to the actual circumstance. Once you have finished, the `MACHINE` can be re-used for any DP-GEN tasks without any extra efforts.

From 08c864fe1784d6e41f039c88d0e834708cdf1dcf Mon Sep 17 00:00:00 2001
From: AnguseZhang <529133328@qq.con>
Date: Mon, 2 Dec 2019 22:26:54 +0800
Subject: [PATCH 054/109] Set default kit-version to 1.0 and support 'dp train'

---
 dpgen/generator/run.py | 20 +++++++++++++++++---
 1 file changed, 17 insertions(+), 3 deletions(-)

diff --git a/dpgen/generator/run.py b/dpgen/generator/run.py
index 8855c5187..24257084c 100644
--- a/dpgen/generator/run.py
+++ b/dpgen/generator/run.py
@@ -356,7 +356,8 @@ def run_train (iter_index,
         deepmd_path = mdata['deepmd_path']
     else:
         # 1.x
-        python_path = mdata['python_path']
+        python_path = mdata.get('python_path', None)
+        train_command = mdata.get('train_command', 'dp')
     train_resources = mdata['train_resources']
 
     # paths
@@ -379,12 +380,21 @@ def run_train (iter_index,
         commands.append(command)
         command = os.path.join(deepmd_path, 'bin/dp_frz')
         commands.append(command)        
-    else:
+    elif python_path:
         # 1.x
         command =  '%s -m deepmd train %s' % (python_path, train_input_file)
         commands.append(command)
         command = '%s -m deepmd freeze' % python_path
         commands.append(command)
+    else: 
+        ## Commands are like `dp train` and `dp freeze`
+        ## train_command should not be None
+        assert(train_command)
+        command =  '%s train %s' % (train_command, train_input_file)
+        commands.append(command)
+        command = '%s freeze' % train_command
+        commands.append(command)
+
 
     #_tasks = [os.path.basename(ii) for ii in all_task]
     # run_tasks = []
@@ -1635,15 +1645,19 @@ def set_version(mdata):
         deepmd_version = '0.1'
     elif 'python_path' in mdata:
         deepmd_version = '1'
+    elif mdata['train_command'] == 'dp':
+        deepmd_version = '1'
     elif 'train' in mdata:
         if 'deepmd_path' in mdata['train'][0]:
             deepmd_version = '0.1'
         elif 'python_path' in mdata['train'][0]:
             deepmd_version = '1'
+        elif mdata['train'][0]['command'] == 'dp' :
+            deepmd_version = '1'
         else:
             deepmd_version = '0.1'
     else:
-        deepmd_version = '0.1'
+        deepmd_version = '1'
     # set
     mdata['deepmd_version'] = deepmd_version
     return mdata

From 19b96704bdc8f5edf11329f1cb182ea9afb5aaa1 Mon Sep 17 00:00:00 2001
From: AnguseZhang <529133328@qq.con>
Date: Mon, 2 Dec 2019 22:29:35 +0800
Subject: [PATCH 055/109] Fix bug in shell.py

---
 dpgen/dispatcher/Shell.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dpgen/dispatcher/Shell.py b/dpgen/dispatcher/Shell.py
index dd285c229..81bff523c 100644
--- a/dpgen/dispatcher/Shell.py
+++ b/dpgen/dispatcher/Shell.py
@@ -69,7 +69,7 @@ def sub_script_head(self, resources) :
             ret += ('module load %s\n' % ii)
         ret += ('\n')
         for ii in source_list :
-            ret += 'source %s\n'
+            ret += ('source %s\n' % ii)
         ret += ('\n')
         return ret
 

From 495460a181c34cdfc61d3786bfb9f56213e5cf58 Mon Sep 17 00:00:00 2001
From: AnguseZhang <529133328@qq.con>
Date: Mon, 2 Dec 2019 22:49:29 +0800
Subject: [PATCH 056/109] Set default kit-version to 1.0 and support 'dp train'

---
 dpgen/generator/run.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/dpgen/generator/run.py b/dpgen/generator/run.py
index 24257084c..2262fd42d 100644
--- a/dpgen/generator/run.py
+++ b/dpgen/generator/run.py
@@ -1643,19 +1643,19 @@ def post_fp (iter_index,
 def set_version(mdata):
     if 'deepmd_path' in mdata:
         deepmd_version = '0.1'
-    elif 'python_path' in mdata:
-        deepmd_version = '1'
-    elif mdata['train_command'] == 'dp':
-        deepmd_version = '1'
+    #elif 'python_path' in mdata:
+    #    deepmd_version = '1'
+    #elif 'train_command' in mdata:
+    #    deepmd_version = '1'
     elif 'train' in mdata:
         if 'deepmd_path' in mdata['train'][0]:
             deepmd_version = '0.1'
-        elif 'python_path' in mdata['train'][0]:
-            deepmd_version = '1'
-        elif mdata['train'][0]['command'] == 'dp' :
-            deepmd_version = '1'
         else:
-            deepmd_version = '0.1'
+            deepmd_version = '1'
+    #    elif 'python_path' in mdata['train'][0]:
+    #        deepmd_version = '1'
+    #    elif 'command' in mdata['train']:
+    #        deepmd_version = '1'
     else:
         deepmd_version = '1'
     # set

From 8bbc2cf5e88bf09bac2a95c5cb109a4793bb9345 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Mon, 2 Dec 2019 18:44:53 -0500
Subject: [PATCH 057/109] add simplify workflow

---
 dpgen/main.py               |  13 ++
 dpgen/simpilify/__init__.py |   0
 dpgen/simpilify/simplify.py | 397 ++++++++++++++++++++++++++++++++++++
 3 files changed, 410 insertions(+)
 create mode 100644 dpgen/simpilify/__init__.py
 create mode 100644 dpgen/simpilify/simplify.py

diff --git a/dpgen/main.py b/dpgen/main.py
index be4e08b9f..90b5d9f48 100644
--- a/dpgen/main.py
+++ b/dpgen/main.py
@@ -10,6 +10,7 @@
 from dpgen.data.gen import gen_init_bulk
 from dpgen.data.surf import gen_init_surf
 from dpgen.data.reaction import gen_init_reaction
+from dpgen.simpilify.simplify import gen_simpilify
 from dpgen.auto_test.run import gen_test
 from dpgen.database.run import db_run
 from dpgen.tools.run_report import run_report
@@ -114,6 +115,18 @@ def main():
                            help="being loud")
     parser_rr.set_defaults(func=run_report)    
 
+    # simpilify
+    parser_run = subparsers.add_parser(
+        "simpilify",
+        help="Simpilify data.")
+    parser_run.add_argument('PARAM', type=str,
+                        help="parameter file, json/yaml format")
+    parser_run.add_argument('MACHINE', type=str,
+                        help="machine file, json/yaml format")
+    parser_run.add_argument('-d','--debug', action='store_true',
+                        help="log debug info")
+    parser_run.set_defaults(func=simpilify_run)
+
     # test 
     parser_test = subparsers.add_parser("test", help="Auto-test for Deep Potential.")
     parser_test.add_argument('PARAM', type=str,
diff --git a/dpgen/simpilify/__init__.py b/dpgen/simpilify/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/dpgen/simpilify/simplify.py b/dpgen/simpilify/simplify.py
new file mode 100644
index 000000000..50655c03c
--- /dev/null
+++ b/dpgen/simpilify/simplify.py
@@ -0,0 +1,397 @@
+"""Simplify dataset (minimize the dataset size).
+
+Init:
+pick up init data from dataset randomly
+
+Iter:
+00: train models (same as generator)
+01: calculate model deviations of the rest dataset, pick up data with proper model deviaiton 
+02: fp (optional, if the original dataset do not have fp data, same as generator)
+"""
+import logging
+import queue
+import os
+import json
+import argparse
+import pickle
+import glob
+
+import dpdata
+import numpy as np
+
+from dpgen import dlog
+from dpgen import SHORT_CMD
+from dpgen.util import sepline
+from dpgen.remote.decide_machine import decide_train_machine
+from dpgen.dispatcher.Dispatcher import Dispatcher, make_dispatcher
+from dpgen.generator.run import make_train, run_train, post_train, run_fp, post_fp, fp_name, model_devi_name, train_name
+# TODO: maybe the following functions can be moved to dpgen.util
+from dpgen.generator.lib.utils import log_iter, make_iter_name, create_path, record_iter
+from dpgen.remote.decide_machine import decide_train_machine, decide_fp_machine, decide_model_devi_machine
+from dpgen.generator.lib.gaussian import make_gaussian_input
+
+
+picked_data_name = "data.picked"
+rest_data_name = "data.rest"
+accurate_data_name = "data.accurate"
+detail_file_name_prefix = "details"
+
+
+def init_pick(iter_index, jdata, mdata):
+    """pick up init data from dataset randomly"""
+    pick_data = jdata['pick_data']
+    init_pick_number = jdata['init_pick_number']
+    # use MultiSystems with System
+    # TODO: support System and LabeledSystem
+    # TODO: support MultiSystems with LabeledSystem
+    # TODO: support other format
+    systems = dpdata.MultiSystems(
+        *[dpdata.System(s, fmt='deepmd/npy') for s in os.listdir(pick_data)])
+    # label the system
+    labels = []
+    for key, system in systems.systems.items():
+        labels.extend([(key, j) for j in range(len(system))])
+
+    # random pick
+    iter_name = make_iter_name(iter_index)
+    create_path(iter_name)
+    work_path = os.path.join(iter_name, model_devi_name)
+    create_path(work_path)
+    idx = np.arange(len(labels))
+    np.random.shuffle(idx)
+    pick_idx = idx[:init_pick_number]
+    rest_idx = idx[init_pick_number:]
+
+    # dump the init data
+    picked_systems = dpdata.MultiSystems()
+    for j in pick_idx:
+        sys_name, sys_id = labels[j]
+        picked_systems.append(systems[sys_name][sys_id])
+    sys_data_path = os.path.join(work_path, picked_data_name)
+
+    picked_systems.to_deepmd_raw(sys_data_path)
+    picked_systems.to_deepmd_npy(sys_data_path, set_size=init_pick_number)
+
+    # dump the rest data
+    rest_systems = dpdata.MultiSystems()
+    for j in rest_idx:
+        sys_name, sys_id = labels[j]
+        rest_systems.append(systems[sys_name][sys_id])
+    sys_data_path = os.path.join(work_path, rest_data_name)
+    rest_systems.to_deepmd_raw(sys_data_path)
+    rest_systems.to_deepmd_npy(sys_data_path, set_size=rest_idx.size)
+
+
+def make_model_devi(iter_index, jdata, mdata):
+    """calculate the model deviation of the rest idx"""
+    iter_name = make_iter_name(iter_index)
+    work_path = os.path.join(iter_name, model_devi_name)
+    create_path(work_path)
+    # link the model
+    train_path = os.path.join(iter_name, train_name)
+    train_path = os.path.abspath(train_path)
+    models = glob.glob(os.path.join(train_path, "graph*pb"))
+    for mm in models:
+        model_name = os.path.basename(mm)
+        os.symlink(mm, os.path.join(work_path, model_name))
+    # link the last rest data
+    last_iter_name = make_iter_name(iter_index-1)
+    rest_data_path = os.path.join(last_iter_name, model_devi_name, rest_data_name)
+    if not os.path.exists(rest_data_path):
+        return False
+    for jj, subsystem in enumerate(os.listdir(rest_data_path)):
+        task_name = "%03d.%06d" % (iter_index, jj)
+        task_path = os.path.join(work_path, task_name)
+        create_path(task_path)
+        os.symlink(os.path.join(rest_data_path, subsystem),
+                   os.path.join(task_path, rest_data_name))
+    return True
+
+
+def run_model_devi(iter_index, jdata, mdata, dispatcher):
+    """submit dp test tasks"""
+    iter_name = make_iter_name(iter_index)
+    work_path = os.path.join(iter_name, model_devi_name)
+    # generate command
+    commands = []
+    tasks = glob.glob(os.path.join(work_path, "task.*"))
+    run_tasks = [os.path.basename(ii) for ii in tasks]
+    # get models
+    models = glob.glob(os.path.join(work_path, "graph*pb"))
+    model_names = [os.path.basename(ii) for ii in models]
+    task_model_list = []
+    for ii in model_names:
+        task_model_list.append(os.path.join('..', model_names))
+    # get max data size
+    data_size = max([len(dpdata.System(os.path.join(
+        task, rest_data_name), fmt="deepmd/npy")) for task in tasks])
+    # models
+    commands = []
+    detail_file_names = []
+    for ii, mm in enumerate(task_model_list):
+        detail_file_name = "{prefix}.{ii}".format(
+            prefix=detail_file_name_prefix,
+            ii=ii,
+        )
+        # TODO: support 0.x?
+        command = "{python} -m deepmd test -m {model} -s {system} -n {numb_test} -d {detail_file}".format(
+            python=mdata['python_path'],
+            model=mm,
+            system=rest_data_name,
+            numb_test=data_size,
+            detail_file=detail_file_name,
+        )
+        commands.append(command)
+        detail_file_names.append(detail_file_name)
+    # submit
+    try:
+        model_devi_group_size = mdata['model_devi_group_size']
+    except:
+        model_devi_group_size = 1
+
+    forward_files = rest_data_name
+    backward_files = sum([[pf+".e.out", pf+".f.out", pf+".v.out"] for pf in detail_file_names], [])
+
+    dispatcher.run_jobs(mdata['model_devi_resources'],
+                        commands,
+                        work_path,
+                        run_tasks,
+                        model_devi_group_size,
+                        model_names,
+                        forward_files,
+                        backward_files,
+                        outlog='model_devi.log',
+                        errlog='model_devi.log')
+
+
+def post_model_devi(iter_index, jdata, mdata):
+    """calculate the model deviation"""
+    iter_name = make_iter_name(iter_index)
+    work_path = os.path.join(iter_name, model_devi_name)
+    tasks = glob.glob(os.path.join(work_path, "task.*"))
+
+    e_trust_lo = jdata['e_trust_lo']
+    e_trust_hi = jdata['e_trust_hi']
+    f_trust_lo = jdata['f_trust_lo']
+    f_trust_hi = jdata['f_trust_hi']
+
+    sys_accurate = dpdata.MultiSystems()
+    sys_candinate = dpdata.MultiSystems()
+    sys_failed = dpdata.MultiSystems()
+
+    for task in tasks:
+        # e.out
+        details_e = glob.glob(os.path.join(task, "{}.*.e.out".format(detail_file_name_prefix)))
+        e_all = np.array([np.loadtxt(detail_e, ndmin=2)[:, 1] for detail_e in details_e])
+        e_std = np.std(e_all, axis=0)
+        n_frame = e_std.size
+        
+        # f.out
+        details_f = glob.glob(os.path.join(task, "{}.*.f.out".format(detail_file_name_prefix)))
+        f_all = np.array([np.loadtxt(detail_f, ndmin=2)[:, 3:6].reshape((n_frame, -1, 3)) for detail_f in details_f])
+        # (n_model, n_frame, n_atom, 3)
+        f_std = np.std(f_all, axis=0)
+        # (n_frame, n_atom, 3)
+        f_std = np.linalg.norm(f_std, axis=2)
+        # (n_frame, n_atom)
+        f_std = np.max(f_std, axis=1)
+        # (n_frame,)
+
+        for subsys, e_devi, f_devi in zip(dpdata.System(os.path.join(task, rest_data_name), fmt='deepmd/npy'), e_std, f_std):
+            if (e_devi < e_trust_hi and e_devi >= e_trust_lo) or (f_devi < f_trust_hi and f_devi >= f_trust_lo) :
+                sys_candinate.append(subsys)
+            elif (e_devi >= e_trust_hi ) or (f_devi >= f_trust_hi ):
+                sys_failed.append(subsys)
+            elif (e_devi < e_trust_lo and f_devi < f_trust_lo ):
+                sys_accurate.append(subsys)
+    counter = {"candidate": sys_candinate.get_nframes(), "accurate": sys_accurate.get_nframes(), "failed": sys_failed.get_nframes()}
+    fp_sum = sum(counter.values())
+    for cc_key, cc_value in counter.items():
+        dlog.info("system {1:9s} : {2:6d} in {3:6d} {4:6.2f} %".format(cc_key, cc_value, fp_sum, cc_value/fp_sum*100))
+    
+    # label the candidate system
+    labels = []
+    for key, system in sys_candinate.systems.items():
+        labels.extend([(key, j) for j in range(len(system))])
+    # candinate: pick up randomly
+    iter_pick_number = jdata['iter_pick_number']
+    idx = np.arange(counter['candidate'])
+    np.random.shuffle(idx)
+    pick_idx = idx[:iter_pick_number]
+    rest_idx = idx[iter_pick_number:]
+
+    # dump the picked candinate data
+    picked_systems = dpdata.MultiSystems()
+    for j in pick_idx:
+        sys_name, sys_id = labels[j]
+        picked_systems.append(sys_candinate[sys_name][sys_id])
+    sys_data_path = os.path.join(work_path, picked_data_name)
+
+    picked_systems.to_deepmd_raw(sys_data_path)
+    picked_systems.to_deepmd_npy(sys_data_path, set_size=iter_pick_number)
+
+    # dump the rest data (not picked candinate data and failed data)
+    rest_systems = dpdata.MultiSystems()
+    for j in rest_idx:
+        sys_name, sys_id = labels[j]
+        rest_systems.append(sys_candinate[sys_name][sys_id])
+    rest_systems += sys_failed
+    sys_data_path = os.path.join(work_path, rest_data_name)
+    rest_systems.to_deepmd_raw(sys_data_path)
+    rest_systems.to_deepmd_npy(sys_data_path, set_size=rest_idx.size)
+
+    # dump the accurate data -- to another directory
+    sys_data_path = os.path.join(work_path, accurate_data_name)
+    sys_accurate.to_deepmd_raw(sys_data_path)
+    sys_accurate.to_deepmd_npy(sys_data_path, set_size=rest_idx.size)
+
+
+def make_fp(iter_index, jdata, mdata):
+    iter_name = make_iter_name(iter_index)
+    work_path = os.path.join(iter_name, fp_name)
+    create_path(work_path)
+    picked_data_path = os.path.join(iter_name, model_devi_name, picked_data_name)
+    systems = dpdata.MultiSystems(
+        *[dpdata.System(s, fmt='deepmd/npy') for s in os.listdir(picked_data_path)])
+    fp_style = jdata['fp_style']
+    if 'user_fp_params' in jdata.keys() :
+        fp_params = jdata['user_fp_params']
+    else:
+        fp_params = jdata['fp_params']
+    jj = 0
+    for system in systems:
+        for subsys in system:
+            sys_data = subsys.data
+            task_name = "%03d.%06d" % (iter_index, jj)
+            task_path = os.path.join(work_path, task_name)
+            create_path(task_path)
+            if fp_style == "gaussian" :
+                ret = make_gaussian_input(sys_data, fp_params)
+                with open(os.path.join(task_path, 'input'), 'w') as fp:
+                    fp.write(ret)
+            else :
+                # TODO: support other formats
+                raise RuntimeError ("unsupported fp style")
+            jj += 1
+
+
+def run_iter(param_file, machine_file):
+    """ init (iter 0): init_pick
+
+    tasks (iter > 0):
+    00 make_train (same as generator)
+    01 run_train (same as generator)
+    02 post_train (same as generator)
+    03 make_model_devi
+    04 run_model_devi
+    05 post_model_devi
+    06 make_fp
+    07 run_fp (same as generator)
+    08 post_fp (same as generator)
+    """
+    # TODO: function of handling input json should be combined as one function
+    try:
+        import ruamel
+        from monty.serialization import loadfn, dumpfn
+        warnings.simplefilter(
+            'ignore', ruamel.yaml.error.MantissaNoDotYAML1_1Warning)
+        jdata = loadfn(param_file)
+        mdata = loadfn(machine_file)
+    except:
+        with open(param_file, 'r') as fp:
+            jdata = json.load(fp)
+        with open(machine_file, 'r') as fp:
+            mdata = json.load(fp)
+
+    if jdata.get('pretty_print', False):
+        fparam = SHORT_CMD+'_' + \
+            param_file.split('.')[0]+'.'+jdata.get('pretty_format', 'json')
+        dumpfn(jdata, fparam, indent=4)
+        fmachine = SHORT_CMD+'_' + \
+            machine_file.split('.')[0]+'.'+jdata.get('pretty_format', 'json')
+        dumpfn(mdata, fmachine, indent=4)
+
+    if mdata.get('handlers', None):
+        if mdata['handlers'].get('smtp', None):
+            que = queue.Queue(-1)
+            queue_handler = logging.handlers.QueueHandler(que)
+            smtp_handler = logging.handlers.SMTPHandler(
+                **mdata['handlers']['smtp'])
+            listener = logging.handlers.QueueListener(que, smtp_handler)
+            dlog.addHandler(queue_handler)
+            listener.start()
+
+    max_tasks = 10000
+    numb_task = 9
+    record = "record.dpgen"
+    iter_rec = [0, -1]
+    if os.path.isfile(record):
+        with open(record) as frec:
+            for line in frec:
+                iter_rec = [int(x) for x in line.split()]
+        dlog.info("continue from iter %03d task %02d" %
+                  (iter_rec[0], iter_rec[1]))
+
+    cont = True
+    ii = -1
+    while cont:
+        ii += 1
+        iter_name = make_iter_name(ii)
+        sepline(iter_name, '=')
+        for jj in range(numb_task):
+            if ii * max_tasks + jj <= iter_rec[0] * max_tasks + iter_rec[1]:
+                continue
+            task_name = "task %02d" % jj
+            sepline("{} {}".format(iter_name, task_name), '-')
+            if ii == 0 and jj < 6:
+                if jj == 0:
+                    log_iter("init_pick", ii, jj)
+                    init_pick(ii, jdata, mdata)
+                dlog.info("first iter, skip step 1-5")
+            elif jj == 0:
+                log_iter("make_train", ii, jj)
+                make_train(ii, jdata, mdata)
+            elif jj == 1:
+                log_iter("run_train", ii, jj)
+                mdata = decide_train_machine(mdata)
+                disp = make_dispatcher(mdata['train_machine'])
+                run_train(ii, jdata, mdata, disp)
+            elif jj == 2:
+                log_iter("post_train", ii, jj)
+                post_train(ii, jdata, mdata)
+            elif jj == 3:
+                log_iter("make_model_devi", ii, jj)
+                cont = make_model_devi(ii, jdata, mdata)
+                if not cont:
+                    break
+            elif jj == 4:
+                log_iter("run_model_devi", ii, jj)
+                mdata = decide_model_devi_machine(mdata)
+                disp = make_dispatcher(mdata['pick_machine'])
+                run_model_devi(ii, jdata, mdata, disp)
+            elif jj == 5:
+                log_iter("post_model_devi", ii, jj)
+                post_model_devi(ii, jdata, mdata)
+            elif jj == 6:
+                log_iter("make_fp", ii, jj)
+                make_fp(ii, jdata, mdata)
+            elif jj == 7:
+                log_iter("run_fp", ii, jj)
+                mdata = decide_fp_machine(mdata)
+                disp = make_dispatcher(mdata['fp_machine'])
+                run_fp(ii, jdata, mdata, disp)
+            elif jj == 8:
+                log_iter("post_fp", ii, jj)
+                post_fp(ii, jdata)
+            else:
+                raise RuntimeError("unknown task %d, something wrong" % jj)
+            record_iter(record, ii, jj)
+
+
+def gen_simpilify(args):
+    if args.PARAM and args.MACHINE:
+        if args.debug:
+            dlog.setLevel(logging.DEBUG)
+        dlog.info("start simplifying")
+        run_iter(args.PARAM, args.MACHINE)
+        dlog.info("finished")

From 454996c1ff361a1d5a5a33e156f6a98dacde223e Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Mon, 2 Dec 2019 19:26:18 -0500
Subject: [PATCH 058/109] use abspath

---
 dpgen/simpilify/simplify.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/dpgen/simpilify/simplify.py b/dpgen/simpilify/simplify.py
index 50655c03c..90762bc7a 100644
--- a/dpgen/simpilify/simplify.py
+++ b/dpgen/simpilify/simplify.py
@@ -103,8 +103,8 @@ def make_model_devi(iter_index, jdata, mdata):
         task_name = "%03d.%06d" % (iter_index, jj)
         task_path = os.path.join(work_path, task_name)
         create_path(task_path)
-        os.symlink(os.path.join(rest_data_path, subsystem),
-                   os.path.join(task_path, rest_data_name))
+        os.symlink(os.path.abspath(os.path.join(rest_data_path, subsystem)),
+                   os.path.abspath(os.path.join(task_path, rest_data_name)))
     return True
 
 

From 68f5658ef4a51cadd7dee5cfdc670d1acdf9175f Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Mon, 2 Dec 2019 19:37:35 -0500
Subject: [PATCH 059/109] fix main.py

---
 dpgen/main.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dpgen/main.py b/dpgen/main.py
index 90b5d9f48..90b25bbe5 100644
--- a/dpgen/main.py
+++ b/dpgen/main.py
@@ -125,7 +125,7 @@ def main():
                         help="machine file, json/yaml format")
     parser_run.add_argument('-d','--debug', action='store_true',
                         help="log debug info")
-    parser_run.set_defaults(func=simpilify_run)
+    parser_run.set_defaults(func=gen_simpilify)
 
     # test 
     parser_test = subparsers.add_parser("test", help="Auto-test for Deep Potential.")

From eb98e88a4ae72d999a40ba89c8e1d348b76bd48e Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Mon, 2 Dec 2019 19:41:32 -0500
Subject: [PATCH 060/109] add to module and fix name

---
 dpgen/main.py                             | 2 +-
 dpgen/{simpilify => simplify}/__init__.py | 0
 dpgen/{simpilify => simplify}/simplify.py | 0
 setup.py                                  | 3 ++-
 4 files changed, 3 insertions(+), 2 deletions(-)
 rename dpgen/{simpilify => simplify}/__init__.py (100%)
 rename dpgen/{simpilify => simplify}/simplify.py (100%)

diff --git a/dpgen/main.py b/dpgen/main.py
index 90b25bbe5..b65883891 100644
--- a/dpgen/main.py
+++ b/dpgen/main.py
@@ -10,7 +10,7 @@
 from dpgen.data.gen import gen_init_bulk
 from dpgen.data.surf import gen_init_surf
 from dpgen.data.reaction import gen_init_reaction
-from dpgen.simpilify.simplify import gen_simpilify
+from dpgen.simplify.simplify import gen_simpilify
 from dpgen.auto_test.run import gen_test
 from dpgen.database.run import db_run
 from dpgen.tools.run_report import run_report
diff --git a/dpgen/simpilify/__init__.py b/dpgen/simplify/__init__.py
similarity index 100%
rename from dpgen/simpilify/__init__.py
rename to dpgen/simplify/__init__.py
diff --git a/dpgen/simpilify/simplify.py b/dpgen/simplify/simplify.py
similarity index 100%
rename from dpgen/simpilify/simplify.py
rename to dpgen/simplify/simplify.py
diff --git a/setup.py b/setup.py
index e2fa458bf..cd8680425 100755
--- a/setup.py
+++ b/setup.py
@@ -40,7 +40,8 @@
               'dpgen/remote',
               'dpgen/dispatcher',
               'dpgen/database',
-              'dpgen/tools'
+              'dpgen/tools',
+              'dpgen/simplify',
     ],
     # data_files = [('dpgen/tools/', ['dpgen/tools/update_time.sh', ])],
     # package_data={'example':['*.json']},

From 6becae3af09d831b1f16619e08e522a4bfd47eb0 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Mon, 2 Dec 2019 19:46:21 -0500
Subject: [PATCH 061/109] correct simplify

---
 dpgen/main.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/dpgen/main.py b/dpgen/main.py
index b65883891..f5aff6583 100644
--- a/dpgen/main.py
+++ b/dpgen/main.py
@@ -10,7 +10,7 @@
 from dpgen.data.gen import gen_init_bulk
 from dpgen.data.surf import gen_init_surf
 from dpgen.data.reaction import gen_init_reaction
-from dpgen.simplify.simplify import gen_simpilify
+from dpgen.simplify.simplify import gen_simplify
 from dpgen.auto_test.run import gen_test
 from dpgen.database.run import db_run
 from dpgen.tools.run_report import run_report
@@ -115,17 +115,17 @@ def main():
                            help="being loud")
     parser_rr.set_defaults(func=run_report)    
 
-    # simpilify
+    # simplify
     parser_run = subparsers.add_parser(
-        "simpilify",
-        help="Simpilify data.")
+        "simplify",
+        help="Simplify data.")
     parser_run.add_argument('PARAM', type=str,
                         help="parameter file, json/yaml format")
     parser_run.add_argument('MACHINE', type=str,
                         help="machine file, json/yaml format")
     parser_run.add_argument('-d','--debug', action='store_true',
                         help="log debug info")
-    parser_run.set_defaults(func=gen_simpilify)
+    parser_run.set_defaults(func=gen_simplify)
 
     # test 
     parser_test = subparsers.add_parser("test", help="Auto-test for Deep Potential.")

From dea390209eb739a9f9086dc3d39c715f8a4a62d0 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Mon, 2 Dec 2019 19:47:25 -0500
Subject: [PATCH 062/109] correct words again

---
 dpgen/simplify/simplify.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dpgen/simplify/simplify.py b/dpgen/simplify/simplify.py
index 90762bc7a..66b7ea938 100644
--- a/dpgen/simplify/simplify.py
+++ b/dpgen/simplify/simplify.py
@@ -388,7 +388,7 @@ def run_iter(param_file, machine_file):
             record_iter(record, ii, jj)
 
 
-def gen_simpilify(args):
+def gen_simplify(args):
     if args.PARAM and args.MACHINE:
         if args.debug:
             dlog.setLevel(logging.DEBUG)

From 8ce5b9b278a0f3ccf90d6c4e712c2784798686bf Mon Sep 17 00:00:00 2001
From: Yuan Fengbo <yuanfengbo888@pku.edu.cn>
Date: Tue, 3 Dec 2019 10:00:46 +0800
Subject: [PATCH 063/109] modify auto_test doc table

---
 README.md | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/README.md b/README.md
index 9291c1811..38bdeba91 100644
--- a/README.md
+++ b/README.md
@@ -625,10 +625,10 @@ The last part is the optional settings for various tasks mentioned above. You ca
 
 
 A dictionary 
-|Key  | Type  | Example | Discription  |
-| :---------------- | :--------------------- | :-------------------------------------- | :-------------------------------------------------------------|
-| potcar_map | dict | {"Al": "example/POTCAR"} |a dict like { "element" : "position of POTCAR"  } |
-|  conf_dir | path like string | "confs/Al/std-fcc" | the dir which contains vasp's POSCAR  |
+| Key  | Type  | Example | Discription  |
+| :---------------- | :--------------------- | :------------- | :----------------|
+| potcar_map | dict | {"Al": "example/POTCAR"} |a  dict like { "element" : "position of POTCAR"  } |
+| conf_dir | path like string | "confs/Al/std-fcc" | the dir which contains vasp's POSCAR  |
 | key_id | string| "DZIwdXCXg1fiXXXXXX" |the API key of Material project|
 | task_type | string | "vasp" | task type, one of deepmd vasp meam |
 | task | string | "equi" | task, one of equi, eos, elastic, vacancy, interstitial, surf or all  |
@@ -637,8 +637,8 @@ A dictionary
 
 the keys in param["vasp_params"]
 
-|Key  | Type  | Example | Discription  |
-| :---------------- | :--------------------- | :-------------------------------------- | :-------------------------------------------------------------|
+| Key  | Type  | Example | Discription  |
+| :---------------- | :--------------------- | :---------------- | :----------------|
 | ecut | real number | 650  | the plane wave cutoff for grid.  |
 | ediff | real number | 1e-6 |Tolerance of Density Matrix |
 | kspacing | real number | 0.1 | Sample factor in Brillouin zones |
@@ -647,7 +647,7 @@ the keys in param["vasp_params"]
 | kpar | positive integer | 1 | the number of bands that are treated in parallel |
 
 the keys in param["lammps_params"]
-|Key  | Type  | Example | Discription  |
+| Key  | Type  | Example | Discription  |
 | :---------------- | :--------------------- | :-------------------------------------- | :-------------------------------------------------------------|
 | model_dir | path like string | "example/Al_model" | the model dir which contains .pb file  |
 | type_map | list of string | ["Al"] | a list contains the element, usually useful for multiple element situation |

From 9fd7fd81c6a71438a6c99cb5fc2b16ff5b778d44 Mon Sep 17 00:00:00 2001
From: Yuan Fengbo <yuanfengbo888@pku.edu.cn>
Date: Tue, 3 Dec 2019 10:06:19 +0800
Subject: [PATCH 064/109] modify auto_test doc table 2

---
 README.md | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/README.md b/README.md
index 38bdeba91..5bab84c8e 100644
--- a/README.md
+++ b/README.md
@@ -624,8 +624,9 @@ The second part is the computational settings for vasp and lammps. According to
 The last part is the optional settings for various tasks mentioned above. You can change the parameters according to actual needs.
 
 
-A dictionary 
-| Key  | Type  | Example | Discription  |
+param.json in a dictionary.
+
+| Fields  | Type  | Example | Discription  |
 | :---------------- | :--------------------- | :------------- | :----------------|
 | potcar_map | dict | {"Al": "example/POTCAR"} |a  dict like { "element" : "position of POTCAR"  } |
 | conf_dir | path like string | "confs/Al/std-fcc" | the dir which contains vasp's POSCAR  |
@@ -635,9 +636,9 @@ A dictionary
 | vasp_params| dict | seeing below | params relating to vasp INCAR|
 | lammps_params | dict| seeing below| params relating to lammps |
 
-the keys in param["vasp_params"]
+The keys in param["vasp_params"] is shown below.
 
-| Key  | Type  | Example | Discription  |
+| Fields  | Type  | Example | Discription  |
 | :---------------- | :--------------------- | :---------------- | :----------------|
 | ecut | real number | 650  | the plane wave cutoff for grid.  |
 | ediff | real number | 1e-6 |Tolerance of Density Matrix |
@@ -646,7 +647,8 @@ the keys in param["vasp_params"]
 | npar | positive integer | 1 | the number of k-points that are to be treated in parallel  |
 | kpar | positive integer | 1 | the number of bands that are treated in parallel |
 
-the keys in param["lammps_params"]
+the keys in param["lammps_params"].
+
 | Key  | Type  | Example | Discription  |
 | :---------------- | :--------------------- | :-------------------------------------- | :-------------------------------------------------------------|
 | model_dir | path like string | "example/Al_model" | the model dir which contains .pb file  |
@@ -662,7 +664,8 @@ the keys in param["lammps_params"]
 ```
 + `store_stable`:(boolean) whether to store the stable energy and volume
 
-param.json
+param.json.
+
 | Field  | Type | Example | Discription |
 | :---------------- | :--------------------- | :-------------------------------------- | :-------------------------------------------------------------|
 | EpA(eV) | real number | -3.7468 | the potential energy of a atom|
@@ -674,7 +677,6 @@ conf_dir:        EpA(eV)  VpA(A^3)
 confs/Al/std-fcc  -3.7468   16.511
 ```
 
-
 | Field  | Type | Example | Discription |
 | :---------------- | :--------------------- | :-------------------------------------- | :-------------------------------------------------------------|
 | EpA(eV) | real number | -3.7468 | the potential energy of a atom|

From 993165ade8bd6992b0d3695ac3d002d2be9ce225 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Mon, 2 Dec 2019 22:18:41 -0500
Subject: [PATCH 065/109] fix bugs

---
 dpgen/simplify/simplify.py | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/dpgen/simplify/simplify.py b/dpgen/simplify/simplify.py
index 66b7ea938..1a809a3c1 100644
--- a/dpgen/simplify/simplify.py
+++ b/dpgen/simplify/simplify.py
@@ -46,7 +46,7 @@ def init_pick(iter_index, jdata, mdata):
     # TODO: support MultiSystems with LabeledSystem
     # TODO: support other format
     systems = dpdata.MultiSystems(
-        *[dpdata.System(s, fmt='deepmd/npy') for s in os.listdir(pick_data)])
+        *[dpdata.System(os.path.join(pick_data, s), fmt='deepmd/npy') for s in os.listdir(pick_data)])
     # label the system
     labels = []
     for key, system in systems.systems.items():
@@ -100,7 +100,7 @@ def make_model_devi(iter_index, jdata, mdata):
     if not os.path.exists(rest_data_path):
         return False
     for jj, subsystem in enumerate(os.listdir(rest_data_path)):
-        task_name = "%03d.%06d" % (iter_index, jj)
+        task_name = "task.%03d.%06d" % (iter_index, jj)
         task_path = os.path.join(work_path, task_name)
         create_path(task_path)
         os.symlink(os.path.abspath(os.path.join(rest_data_path, subsystem)),
@@ -121,7 +121,7 @@ def run_model_devi(iter_index, jdata, mdata, dispatcher):
     model_names = [os.path.basename(ii) for ii in models]
     task_model_list = []
     for ii in model_names:
-        task_model_list.append(os.path.join('..', model_names))
+        task_model_list.append(os.path.join('..', ii))
     # get max data size
     data_size = max([len(dpdata.System(os.path.join(
         task, rest_data_name), fmt="deepmd/npy")) for task in tasks])
@@ -149,7 +149,7 @@ def run_model_devi(iter_index, jdata, mdata, dispatcher):
     except:
         model_devi_group_size = 1
 
-    forward_files = rest_data_name
+    forward_files = [rest_data_name]
     backward_files = sum([[pf+".e.out", pf+".f.out", pf+".v.out"] for pf in detail_file_names], [])
 
     dispatcher.run_jobs(mdata['model_devi_resources'],
@@ -207,7 +207,7 @@ def post_model_devi(iter_index, jdata, mdata):
     counter = {"candidate": sys_candinate.get_nframes(), "accurate": sys_accurate.get_nframes(), "failed": sys_failed.get_nframes()}
     fp_sum = sum(counter.values())
     for cc_key, cc_value in counter.items():
-        dlog.info("system {1:9s} : {2:6d} in {3:6d} {4:6.2f} %".format(cc_key, cc_value, fp_sum, cc_value/fp_sum*100))
+        dlog.info("{0:9s} : {1:6d} in {2:6d} {3:6.2f} %".format(cc_key, cc_value, fp_sum, cc_value/fp_sum*100))
     
     # label the candidate system
     labels = []
@@ -252,7 +252,7 @@ def make_fp(iter_index, jdata, mdata):
     create_path(work_path)
     picked_data_path = os.path.join(iter_name, model_devi_name, picked_data_name)
     systems = dpdata.MultiSystems(
-        *[dpdata.System(s, fmt='deepmd/npy') for s in os.listdir(picked_data_path)])
+        *[dpdata.System(os.path.join(picked_data_path, s), fmt='deepmd/npy') for s in os.listdir(picked_data_path)])
     fp_style = jdata['fp_style']
     if 'user_fp_params' in jdata.keys() :
         fp_params = jdata['user_fp_params']
@@ -262,7 +262,7 @@ def make_fp(iter_index, jdata, mdata):
     for system in systems:
         for subsys in system:
             sys_data = subsys.data
-            task_name = "%03d.%06d" % (iter_index, jj)
+            task_name = "task.%03d.%06d" % (iter_index, jj)
             task_path = os.path.join(work_path, task_name)
             create_path(task_path)
             if fp_style == "gaussian" :
@@ -343,6 +343,7 @@ def run_iter(param_file, machine_file):
                 continue
             task_name = "task %02d" % jj
             sepline("{} {}".format(iter_name, task_name), '-')
+            jdata['model_devi_jobs'] = [{} for _ in range(ii+1)]
             if ii == 0 and jj < 6:
                 if jj == 0:
                     log_iter("init_pick", ii, jj)
@@ -362,12 +363,12 @@ def run_iter(param_file, machine_file):
             elif jj == 3:
                 log_iter("make_model_devi", ii, jj)
                 cont = make_model_devi(ii, jdata, mdata)
-                if not cont:
+                if not cont or ii >= jdata.get("stop_iter", ii+1):
                     break
             elif jj == 4:
                 log_iter("run_model_devi", ii, jj)
                 mdata = decide_model_devi_machine(mdata)
-                disp = make_dispatcher(mdata['pick_machine'])
+                disp = make_dispatcher(mdata['model_devi_machine'])
                 run_model_devi(ii, jdata, mdata, disp)
             elif jj == 5:
                 log_iter("post_model_devi", ii, jj)

From ae349bf96ee34b922d4b5b16b8a63d8eb06acd94 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Mon, 2 Dec 2019 22:29:56 -0500
Subject: [PATCH 066/109] fix bugs

---
 dpgen/simplify/simplify.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/dpgen/simplify/simplify.py b/dpgen/simplify/simplify.py
index 1a809a3c1..4103f83b6 100644
--- a/dpgen/simplify/simplify.py
+++ b/dpgen/simplify/simplify.py
@@ -100,7 +100,7 @@ def make_model_devi(iter_index, jdata, mdata):
     if not os.path.exists(rest_data_path):
         return False
     for jj, subsystem in enumerate(os.listdir(rest_data_path)):
-        task_name = "task.%03d.%06d" % (iter_index, jj)
+        task_name = "task.%03d.%06d" % (0, jj)
         task_path = os.path.join(work_path, task_name)
         create_path(task_path)
         os.symlink(os.path.abspath(os.path.join(rest_data_path, subsystem)),
@@ -262,7 +262,7 @@ def make_fp(iter_index, jdata, mdata):
     for system in systems:
         for subsys in system:
             sys_data = subsys.data
-            task_name = "task.%03d.%06d" % (iter_index, jj)
+            task_name = "task.%03d.%06d" % (0, jj)
             task_path = os.path.join(work_path, task_name)
             create_path(task_path)
             if fp_style == "gaussian" :

From 102e4cadb70c67b4694424aa0388e86198960e03 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Tue, 3 Dec 2019 17:32:14 -0500
Subject: [PATCH 067/109] add qm7.json to examples folder

---
 examples/simplify/qm7.json | 113 +++++++++++++++++++++++++++++++++++++
 1 file changed, 113 insertions(+)
 create mode 100644 examples/simplify/qm7.json

diff --git a/examples/simplify/qm7.json b/examples/simplify/qm7.json
new file mode 100644
index 000000000..648c589e7
--- /dev/null
+++ b/examples/simplify/qm7.json
@@ -0,0 +1,113 @@
+{
+    "type_map": [
+        "C",
+        "H",
+        "N",
+        "O",
+        "S"
+    ],
+    "mass_map": [
+        12.011,
+        1.008,
+        14.007,
+        15.999,
+        32.065
+    ],
+    "pick_data": "/scratch/jz748/simplify/qm7",
+    "init_data_prefix": "",
+    "init_data_sys": [],
+    "sys_batch_size": [
+        "auto"
+    ],
+    "numb_models": 4,
+    "train_param": "input.json",
+    "default_training_param": {
+        "model": {
+            "type_map": [
+                "C",
+                "H",
+                "N",
+                "O",
+                "S"
+            ],
+            "descriptor": {
+                "type": "se_a",
+                "sel": [
+                    7,
+                    16,
+                    3,
+                    3,
+                    1
+                ],
+                "rcut_smth": 1.00,
+                "rcut": 6.00,
+                "neuron": [
+                    25,
+                    50,
+                    100
+                ],
+                "resnet_dt": false,
+                "axis_neuron": 12
+            },
+            "fitting_net": {
+                "neuron": [
+                    240,
+                    240,
+                    240
+                ],
+                "resnet_dt": true
+            }
+        },
+        "learning_rate": {
+            "type": "exp",
+            "start_lr": 0.001,
+            "decay_steps": 10,
+            "decay_rate": 0.99
+        },
+        "loss": {
+            "start_pref_e": 0.02,
+            "limit_pref_e": 1,
+            "start_pref_f": 1000,
+            "limit_pref_f": 1,
+            "start_pref_v": 0,
+            "limit_pref_v": 0,
+            "start_pref_pf": 0,
+            "limit_pref_pf": 0
+        },
+        "training": {
+            "set_prefix": "set",
+            "stop_batch": 10000,
+            "disp_file": "lcurve.out",
+            "disp_freq": 1000,
+            "numb_test": 1,
+            "save_freq": 1000,
+            "save_ckpt": "model.ckpt",
+            "load_ckpt": "model.ckpt",
+            "disp_training": true,
+            "time_training": true,
+            "profiling": false,
+            "profiling_file": "timeline.json"
+        },
+        "_comment": "that's all"
+    },
+    "use_clusters": true,
+    "fp_style": "gaussian",
+    "shuffle_poscar": false,
+    "fp_task_max": 1000,
+    "fp_task_min": 10,
+    "fp_pp_path": "/home/jzzeng/",
+    "fp_pp_files": [],
+    "fp_params": {
+        "keywords": "mn15/6-31g** force nosymm scf(maxcyc=512)",
+        "nproc": 28,
+        "multiplicity": 1,
+        "_comment": " that's all "
+    },
+    "init_pick_number":100,
+    "iter_pick_number":100,
+    "e_trust_lo":1e10,
+    "e_trust_hi":1e10,
+    "f_trust_lo":0.25,
+    "f_trust_hi":0.45,
+    "_comment": " that's all "
+}

From 507af832402b11ef15be28e53e2ab4c1e849e01a Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Tue, 3 Dec 2019 17:50:18 -0500
Subject: [PATCH 068/109] add docs for dpgen simplify

---
 README.md | 125 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 125 insertions(+)

diff --git a/README.md b/README.md
index 31a72fbf6..26f3a81e1 100644
--- a/README.md
+++ b/README.md
@@ -670,7 +670,132 @@ This task uses the stress-strain relationship to calculate the elastic constant.
 + `relax_box`:(boolean) set true if the box is relaxed, otherwise only relax atom positions.
 
 
+## Simplify
+When you have a dataset containing lots of repeated data, this step will help you simplify your dataset. The workflow contains three stages: train, model_devi, and fp. The train stage and the fp stage are as the same as the run step, and the model_devi stage will calculate model deviations of the rest data that has not been confirmed accurate. Data with small model deviations will be confirmed accurate, while the program will pick data from those with large model deviations to the new dataset.
 
+Use the following script to start the workflow:
+```bash
+dpgen simplify param.json machine.json
+```
+
+Here is an example of `param.json` for QM7 dataset:
+```
+{
+    "type_map": [
+        "C",
+        "H",
+        "N",
+        "O",
+        "S"
+    ],
+    "mass_map": [
+        12.011,
+        1.008,
+        14.007,
+        15.999,
+        32.065
+    ],
+    "pick_data": "/scratch/jz748/simplify/qm7",
+    "init_data_prefix": "",
+    "init_data_sys": [],
+    "sys_batch_size": [
+        "auto"
+    ],
+    "numb_models": 4,
+    "train_param": "input.json",
+    "default_training_param": {
+        "model": {
+            "type_map": [
+                "C",
+                "H",
+                "N",
+                "O",
+                "S"
+            ],
+            "descriptor": {
+                "type": "se_a",
+                "sel": [
+                    7,
+                    16,
+                    3,
+                    3,
+                    1
+                ],
+                "rcut_smth": 1.00,
+                "rcut": 6.00,
+                "neuron": [
+                    25,
+                    50,
+                    100
+                ],
+                "resnet_dt": false,
+                "axis_neuron": 12
+            },
+            "fitting_net": {
+                "neuron": [
+                    240,
+                    240,
+                    240
+                ],
+                "resnet_dt": true
+            }
+        },
+        "learning_rate": {
+            "type": "exp",
+            "start_lr": 0.001,
+            "decay_steps": 10,
+            "decay_rate": 0.99
+        },
+        "loss": {
+            "start_pref_e": 0.02,
+            "limit_pref_e": 1,
+            "start_pref_f": 1000,
+            "limit_pref_f": 1,
+            "start_pref_v": 0,
+            "limit_pref_v": 0,
+            "start_pref_pf": 0,
+            "limit_pref_pf": 0
+        },
+        "training": {
+            "set_prefix": "set",
+            "stop_batch": 10000,
+            "disp_file": "lcurve.out",
+            "disp_freq": 1000,
+            "numb_test": 1,
+            "save_freq": 1000,
+            "save_ckpt": "model.ckpt",
+            "load_ckpt": "model.ckpt",
+            "disp_training": true,
+            "time_training": true,
+            "profiling": false,
+            "profiling_file": "timeline.json"
+        },
+        "_comment": "that's all"
+    },
+    "use_clusters": true,
+    "fp_style": "gaussian",
+    "shuffle_poscar": false,
+    "fp_task_max": 1000,
+    "fp_task_min": 10,
+    "fp_pp_path": "/home/jzzeng/",
+    "fp_pp_files": [],
+    "fp_params": {
+        "keywords": "mn15/6-31g** force nosymm scf(maxcyc=512)",
+        "nproc": 28,
+        "multiplicity": 1,
+        "_comment": " that's all "
+    },
+    "init_pick_number":100,
+    "iter_pick_number":100,
+    "e_trust_lo":1e10,
+    "e_trust_hi":1e10,
+    "f_trust_lo":0.25,
+    "f_trust_hi":0.45,
+    "_comment": " that's all "
+}
+```
+
+Here `pick_data` is the data to simplify and currently only supports `MultiSystems` containing `System` with `deepmd/npy` format, and `use_clusters` should always be `true`. `init_pick_number` and `iter_pick_number` are the numbers of picked frames. `e_trust_lo`, `e_trust_hi` mean the range of the deviation of the frame energy, and `f_trust_lo` and `f_trust_hi` mean the range of the max deviation of atomic forces in a frame. `fp_style` can only be `gaussian` currently. Other parameters are as the same as those of generator.
 
 ## Set up machine
 When switching into a new machine, you may modifying the `MACHINE`, according to the actual circumstance. Once you have finished, the `MACHINE` can be re-used for any DP-GEN tasks without any extra efforts.

From fc2e43affd451c4d9f2e077c022ad20e8637f041 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Tue, 3 Dec 2019 17:53:46 -0500
Subject: [PATCH 069/109] mark the json json in the document

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 70ef2a621..675d5e7f4 100644
--- a/README.md
+++ b/README.md
@@ -851,7 +851,7 @@ dpgen simplify param.json machine.json
 ```
 
 Here is an example of `param.json` for QM7 dataset:
-```
+```json
 {
     "type_map": [
         "C",

From 34d952f2a984ac754a429e03d9f8c5a090009fac Mon Sep 17 00:00:00 2001
From: Han Wang <amcadmus@gmail.com>
Date: Wed, 4 Dec 2019 08:40:25 +0800
Subject: [PATCH 070/109] update the requirement for dpdata

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index cd8680425..1fe19c133 100755
--- a/setup.py
+++ b/setup.py
@@ -17,7 +17,7 @@
 with open(path.join('dpgen', '_date.py'), 'w') as fp :
     fp.write('date = \'%s\'' % today)
 
-install_requires=['numpy>=1.14.3', 'dpdata>=0.1.10', 'pymatgen>=2017.9.1', 'ase', 'monty>2.0.0', 'paramiko', 'custodian']
+install_requires=['numpy>=1.14.3', 'dpdata>=0.1.11', 'pymatgen>=2017.9.1', 'ase', 'monty>2.0.0', 'paramiko', 'custodian']
 
 setuptools.setup(
     name=NAME,

From 66b17a79e375c23676a39c9e37d8e4edc57a46ec Mon Sep 17 00:00:00 2001
From: Han Wang <amcadmus@gmail.com>
Date: Wed, 4 Dec 2019 14:20:10 +0800
Subject: [PATCH 071/109] fix the bug of dpdata

require the dpdata that fixed the bug of siesta output version compatibility.
---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 1fe19c133..325b2b005 100755
--- a/setup.py
+++ b/setup.py
@@ -17,7 +17,7 @@
 with open(path.join('dpgen', '_date.py'), 'w') as fp :
     fp.write('date = \'%s\'' % today)
 
-install_requires=['numpy>=1.14.3', 'dpdata>=0.1.11', 'pymatgen>=2017.9.1', 'ase', 'monty>2.0.0', 'paramiko', 'custodian']
+install_requires=['numpy>=1.14.3', 'dpdata>=0.1.12', 'pymatgen>=2017.9.1', 'ase', 'monty>2.0.0', 'paramiko', 'custodian']
 
 setuptools.setup(
     name=NAME,

From c3dc408a60ac46a14b73d47dd465e7e0cd36dfee Mon Sep 17 00:00:00 2001
From: Han Wang <wang_han@iapcm.ac.cn>
Date: Wed, 4 Dec 2019 15:51:26 +0800
Subject: [PATCH 072/109] refact make_model_devi: split the input making part
 from the conf makeing part.

---
 dpgen/generator/run.py | 66 +++++++++++++++++++++++++-----------------
 1 file changed, 40 insertions(+), 26 deletions(-)

diff --git a/dpgen/generator/run.py b/dpgen/generator/run.py
index 8855c5187..69a4dd994 100644
--- a/dpgen/generator/run.py
+++ b/dpgen/generator/run.py
@@ -494,34 +494,14 @@ def parse_cur_job(cur_job) :
         dt = None
     return ensemble, nsteps, trj_freq, temps, press, pka_e, dt
 
+
 def make_model_devi (iter_index,
                      jdata,
                      mdata) :
-    use_ele_temp = jdata.get('use_ele_temp', 0)
-    model_devi_dt = jdata['model_devi_dt']
-    model_devi_neidelay = None
-    if 'model_devi_neidelay' in jdata :
-        model_devi_neidelay = jdata['model_devi_neidelay']
-    model_devi_taut = 0.1
-    if 'model_devi_taut' in jdata :
-        model_devi_taut = jdata['model_devi_taut']
-    model_devi_taup = 0.5
-    if 'model_devi_taup' in jdata :
-        model_devi_taup = jdata['model_devi_taup']
     model_devi_jobs = jdata['model_devi_jobs']
     if (iter_index >= len(model_devi_jobs)) :
         return False
     cur_job = model_devi_jobs[iter_index]
-    # ensemble = model_devi_jobs['ensemble']
-    # nsteps = model_devi_jobs['nsteps']
-    # trj_freq = model_devi_jobs['trj_freq']
-    # job_names = get_job_names (model_devi_jobs)
-    # assert (iter_index < len(job_names))
-    # cur_job_name = job_names[iter_index]
-    # cur_job = model_devi_jobs[cur_job_name]
-    ensemble, nsteps, trj_freq, temps, press, pka_e, dt = parse_cur_job(cur_job)
-    if dt is not None :
-        model_devi_dt = dt
     if "sys_configs_prefix" in jdata:
         sys_configs = []
         for sys_list in jdata["sys_configs"]:
@@ -544,15 +524,11 @@ def make_model_devi (iter_index,
         cur_systems.sort()
         cur_systems = [os.path.abspath(ii) for ii in cur_systems]
         conf_systems.append (cur_systems)
-    mass_map = jdata['mass_map']
 
     iter_name = make_iter_name(iter_index)
     train_path = os.path.join(iter_name, train_name)
     train_path = os.path.abspath(train_path)
     models = glob.glob(os.path.join(train_path, "graph*pb"))
-    task_model_list = []
-    for ii in models:
-        task_model_list.append(os.path.join('..', os.path.basename(ii)))
     work_path = os.path.join(iter_name, model_devi_name)
     create_path(work_path)
     for mm in models :
@@ -586,6 +562,45 @@ def make_model_devi (iter_index,
             conf_counter += 1
         sys_counter += 1
 
+    _make_model_devi_inner(iter_index, jdata, mdata, conf_systems)
+
+    return True
+
+
+def _make_model_devi_inner(iter_index, jdata, mdata, conf_systems):
+    model_devi_jobs = jdata['model_devi_jobs']
+    if (iter_index >= len(model_devi_jobs)) :
+        return False
+    cur_job = model_devi_jobs[iter_index]
+    ensemble, nsteps, trj_freq, temps, press, pka_e, dt = parse_cur_job(cur_job)
+    if dt is not None :
+        model_devi_dt = dt    
+    sys_idx = expand_idx(cur_job['sys_idx'])
+    if (len(sys_idx) != len(list(set(sys_idx)))) :
+        raise RuntimeError("system index should be uniq")
+
+    use_ele_temp = jdata.get('use_ele_temp', 0)
+    model_devi_dt = jdata['model_devi_dt']
+    model_devi_neidelay = None
+    if 'model_devi_neidelay' in jdata :
+        model_devi_neidelay = jdata['model_devi_neidelay']
+    model_devi_taut = 0.1
+    if 'model_devi_taut' in jdata :
+        model_devi_taut = jdata['model_devi_taut']
+    model_devi_taup = 0.5
+    if 'model_devi_taup' in jdata :
+        model_devi_taup = jdata['model_devi_taup']
+    mass_map = jdata['mass_map']
+
+    iter_name = make_iter_name(iter_index)
+    train_path = os.path.join(iter_name, train_name)
+    train_path = os.path.abspath(train_path)
+    models = glob.glob(os.path.join(train_path, "graph*pb"))
+    task_model_list = []
+    for ii in models:
+        task_model_list.append(os.path.join('..', os.path.basename(ii)))
+    work_path = os.path.join(iter_name, model_devi_name)
+
     sys_counter = 0
     for ss in conf_systems:
         conf_counter = 0
@@ -666,7 +681,6 @@ def make_model_devi (iter_index,
             conf_counter += 1
         sys_counter += 1
 
-    return True
 
 def run_model_devi (iter_index,
                     jdata,

From 954723dd9347473a05e83f0d3091fb3f14a12b4f Mon Sep 17 00:00:00 2001
From: Han Wang <wang_han@iapcm.ac.cn>
Date: Wed, 4 Dec 2019 21:27:27 +0800
Subject: [PATCH 073/109] support revising template for model devi jobs

---
 dpgen/generator/run.py          | 176 ++++++++++++++++++++++-
 tests/generator/test_make_md.py | 241 +++++++++++++++++++++++++++++++-
 2 files changed, 413 insertions(+), 4 deletions(-)

diff --git a/dpgen/generator/run.py b/dpgen/generator/run.py
index dcd60d019..fcab11136 100644
--- a/dpgen/generator/run.py
+++ b/dpgen/generator/run.py
@@ -21,6 +21,7 @@
 import warnings
 import shutil
 import time
+import copy
 import dpdata
 import numpy as np
 import subprocess as sp
@@ -504,6 +505,86 @@ def parse_cur_job(cur_job) :
         dt = None
     return ensemble, nsteps, trj_freq, temps, press, pka_e, dt
 
+def expand_matrix_values(target_list, cur_idx = 0):
+    nvar = len(target_list)
+    if cur_idx == nvar :
+        return [[]]
+    else :
+        res = []
+        prev = expand_matrix_values(target_list, cur_idx+1)
+        for ii in target_list[cur_idx]:
+            tmp = copy.deepcopy(prev)
+            for jj in tmp:
+               jj.insert(0, ii)
+               res.append(jj)
+        return res
+
+def parse_cur_job_revmat(cur_job, use_plm = False):
+    templates = [cur_job['template']['lmp']]
+    if use_plm :
+        templates.append(cur_job['template']['plm'])
+    revise_keys = []
+    revise_values = []
+    if 'rev_mat' not in cur_job.keys():
+        cur_job['rev_mat'] = {}
+    if 'lmp' not in cur_job['rev_mat'].keys():
+        cur_job['rev_mat']['lmp'] = {}
+    for ii in cur_job['rev_mat']['lmp'].keys():
+        revise_keys.append(ii)
+        revise_values.append(cur_job['rev_mat']['lmp'][ii])
+    n_lmp_keys = len(revise_keys)
+    if use_plm:
+        if 'plm' not in cur_job['rev_mat'].keys():
+            cur_job['rev_mat']['plm'] = {}
+        for ii in cur_job['rev_mat']['plm'].keys():
+            revise_keys.append(ii)
+            revise_values.append(cur_job['rev_mat']['plm'][ii])
+    revise_matrix = expand_matrix_values(revise_values)
+    return revise_keys, revise_matrix, n_lmp_keys            
+
+
+def find_only_one_key(lmp_lines, key):
+    found = []
+    for idx in range(len(lmp_lines)):
+        words = lmp_lines[idx].split()
+        nkey = len(key)
+        if len(words) >= nkey and words[:nkey] == key :
+            found.append(idx)
+    if len(found) > 1:
+        raise RuntimeError('found %d keywords %s' % (len(found), key))
+    if len(found) == 0:
+        raise RuntimeError('failed to find keyword %s' % (key))
+    return found[0]
+
+
+def revise_lmp_input_model(lmp_lines, task_model_list, trj_freq, deepmd_version = '1'):
+    idx = find_only_one_key(lmp_lines, ['pair_style', 'deepmd'])
+    graph_list = ' '.join(task_model_list)
+    if LooseVersion(deepmd_version) < LooseVersion('1'):        
+        lmp_lines[idx] = "pair_style      deepmd %s %d model_devi.out\n" % (graph_list, trj_freq)
+    else:
+        lmp_lines[idx] = "pair_style      deepmd %s out_freq %d out_file model_devi.out\n" % (graph_list, trj_freq)
+    return lmp_lines
+
+
+def revise_lmp_input_dump(lmp_lines, trj_freq):
+    idx = find_only_one_key(lmp_lines, ['dump', 'dpgen_dump'])
+    lmp_lines[idx] = "dump            dpgen_dump all custom %d traj/*.lammpstrj id type x y z\n" % trj_freq
+    return lmp_lines
+    
+
+def revise_lmp_input_plm(lmp_lines, in_plm, out_plm = 'output.plumed'):
+    idx = find_only_one_key(lmp_lines, ['fix', 'dpgen_plm'])
+    lmp_lines[idx] = "fix            dpgen_plm all plumed plumedfile %s outfile %s\n" % (in_plm, out_plm)
+    return lmp_lines
+    
+
+def revise_by_keys(lmp_lines, keys, values):
+    for kk,vv in zip(keys, values):
+        for ii in range(len(lmp_lines)):
+            lmp_lines[ii] = lmp_lines[ii].replace(kk, str(vv))
+    return lmp_lines
+
 
 def make_model_devi (iter_index,
                      jdata,
@@ -572,12 +653,99 @@ def make_model_devi (iter_index,
             conf_counter += 1
         sys_counter += 1
 
-    _make_model_devi_inner(iter_index, jdata, mdata, conf_systems)
+    input_mode = "native"
+    if "template" in cur_job:
+        input_mode = "revise_template"
+    use_plm = jdata.get('model_devi_plumed', False)
+    if input_mode == "native":
+        _make_model_devi_native(iter_index, jdata, mdata, conf_systems)
+    elif input_mode == "revise_template":
+        _make_model_devi_revmat(iter_index, jdata, mdata, conf_systems)
+    else:
+        raise RuntimeError('unknown model_devi input mode', input_mode)
 
     return True
 
 
-def _make_model_devi_inner(iter_index, jdata, mdata, conf_systems):
+def _make_model_devi_revmat(iter_index, jdata, mdata, conf_systems):
+    model_devi_jobs = jdata['model_devi_jobs']
+    if (iter_index >= len(model_devi_jobs)) :
+        return False
+    cur_job = model_devi_jobs[iter_index]    
+    sys_idx = expand_idx(cur_job['sys_idx'])
+    if (len(sys_idx) != len(list(set(sys_idx)))) :
+        raise RuntimeError("system index should be uniq")
+    mass_map = jdata['mass_map']
+    use_plm = jdata.get('model_devi_plumed', False)
+    trj_freq = _get_param_alias(cur_job, ['t_freq', 'trj_freq','traj_freq'])
+
+    rev_keys, rev_mat, num_lmp = parse_cur_job_revmat(cur_job, use_plm = use_plm)
+    lmp_templ = cur_job['template']['lmp']
+    lmp_templ = os.path.abspath(lmp_templ)
+    if use_plm:
+        plm_templ = cur_job['template']['plm']
+        plm_templ = os.path.abspath(plm_templ)    
+
+    iter_name = make_iter_name(iter_index)
+    train_path = os.path.join(iter_name, train_name)
+    train_path = os.path.abspath(train_path)
+    models = glob.glob(os.path.join(train_path, "graph*pb"))
+    task_model_list = []
+    for ii in models:
+        task_model_list.append(os.path.join('..', os.path.basename(ii)))
+    work_path = os.path.join(iter_name, model_devi_name)
+    try:
+        mdata["deepmd_version"]
+    except:
+        mdata = set_version(mdata)
+    deepmd_version = mdata['deepmd_version']
+
+    sys_counter = 0
+    for ss in conf_systems:
+        conf_counter = 0
+        task_counter = 0
+        for cc in ss :
+            for ii in range(len(rev_mat)):
+                rev_item = rev_mat[ii]
+                task_name = make_model_devi_task_name(sys_idx[sys_counter], task_counter)
+                conf_name = make_model_devi_conf_name(sys_idx[sys_counter], conf_counter) + '.lmp'
+                task_path = os.path.join(work_path, task_name)
+                # create task path
+                create_path(task_path)
+                create_path(os.path.join(task_path, 'traj'))
+                # link conf
+                loc_conf_name = 'conf.lmp'
+                os.symlink(os.path.join(os.path.join('..','confs'), conf_name),
+                           os.path.join(task_path, loc_conf_name) )
+                cwd_ = os.getcwd()
+                # chdir to task path
+                os.chdir(task_path)                
+                shutil.copyfile(lmp_templ, 'input.lammps')
+                # revise input of lammps
+                with open('input.lammps') as fp:
+                    lmp_lines = fp.readlines()
+                lmp_lines = revise_lmp_input_model(lmp_lines, task_model_list, trj_freq, deepmd_version = deepmd_version)
+                lmp_lines = revise_lmp_input_dump(lmp_lines, trj_freq)
+                lmp_lines = revise_by_keys(lmp_lines, rev_keys[:num_lmp], rev_item[:num_lmp])
+                # revise input of plumed
+                if use_plm:
+                    lmp_lines = revise_lmp_input_plm(lmp_lines, 'input.plumed')
+                    shutil.copyfile(plm_templ, 'input.plumed')
+                    with open('input.plumed') as fp:
+                        plm_lines = fp.readlines()
+                    plm_lines = revise_by_keys(plm_lines, rev_keys[num_lmp:], rev_item[num_lmp:])
+                    with open('input.plumed', 'w') as fp:
+                        fp.write(''.join(plm_lines))
+                # dump input of lammps
+                with open('input.lammps', 'w') as fp:
+                    fp.write(''.join(lmp_lines))
+                os.chdir(cwd_)
+                task_counter += 1
+            conf_counter += 1
+        sys_counter += 1                    
+
+
+def _make_model_devi_native(iter_index, jdata, mdata, conf_systems):
     model_devi_jobs = jdata['model_devi_jobs']
     if (iter_index >= len(model_devi_jobs)) :
         return False
@@ -700,6 +868,7 @@ def run_model_devi (iter_index,
     lmp_exec = mdata['lmp_command']
     model_devi_group_size = mdata['model_devi_group_size']
     model_devi_resources = mdata['model_devi_resources']
+    use_plm = jdata.get('model_devi_plumed', False)
 
     iter_name = make_iter_name(iter_index)
     work_path = os.path.join(iter_name, model_devi_name)
@@ -732,6 +901,9 @@ def run_model_devi (iter_index,
     model_names = [os.path.basename(ii) for ii in all_models]
     forward_files = ['conf.lmp', 'input.lammps', 'traj']
     backward_files = ['model_devi.out', 'model_devi.log', 'traj']
+    if use_plm:
+        forward_files += ['input.plumed']
+        backward_files += ['output.plumed']
 
     dispatcher.run_jobs(mdata['model_devi_resources'],
                         commands,
diff --git a/tests/generator/test_make_md.py b/tests/generator/test_make_md.py
index 6c35b4b30..6d321ba57 100644
--- a/tests/generator/test_make_md.py
+++ b/tests/generator/test_make_md.py
@@ -1,4 +1,4 @@
-import os,sys,json,glob,shutil
+import os,sys,json,glob,shutil,copy
 import dpdata
 import numpy as np
 import unittest
@@ -7,10 +7,16 @@
 __package__ = 'generator'
 from .context import make_model_devi
 from .context import parse_cur_job
+from .context import parse_cur_job_revmat
 from .context import param_file
 from .context import machine_file
 from .context import my_file_cmp
 from .context import setUpModule
+from .context import find_only_one_key
+from .context import revise_lmp_input_model
+from .context import revise_lmp_input_dump
+from .context import revise_lmp_input_plm
+from .context import revise_by_keys
 from .comp_sys import test_atom_names
 from .comp_sys import test_atom_types
 from .comp_sys import test_coord
@@ -53,7 +59,7 @@ def _check_confs(testCase, idx, jdata) :
         l_conf_file = os.path.basename(os.readlink(conf_file))
         poscar_file = poscars[int(l_conf_file.split('.')[0])][int(l_conf_file.split('.')[1])]
         sys_0 = dpdata.System(conf_file, type_map = jdata['type_map'])
-        sys_1 = dpdata.System(poscar_file)
+        sys_1 = dpdata.System(poscar_file, type_map = jdata['type_map'])
         test_atom_names(testCase, sys_0, sys_1)
         test_atom_types(testCase, sys_0, sys_1)
         test_cell(testCase, sys_0, sys_1)
@@ -123,6 +129,10 @@ def _check_pt(testCase, idx, jdata) :
                     
 
 class TestMakeModelDevi(unittest.TestCase):
+    def tearDown(self):
+        if os.path.isdir('iter.000000') :
+            shutil.rmtree('iter.000000')
+
     def test_make_model_devi (self) :        
         if os.path.isdir('iter.000000') :
             shutil.rmtree('iter.000000')
@@ -137,7 +147,234 @@ def test_make_model_devi (self) :
         _check_traj_dir(self, 0)
         _check_pt(self, 0, jdata)
         shutil.rmtree('iter.000000')
+        
+
+
+class TestMakeModelDeviRevMat(unittest.TestCase):
+    def tearDown(self):
+        if os.path.isdir('iter.000000') :
+            shutil.rmtree('iter.000000')
+
+    def test_make_model_devi (self) :        
+        if os.path.isdir('iter.000000') :
+            shutil.rmtree('iter.000000')
+        jdata = {
+            "type_map":		["Mg", "Al"],
+            "mass_map":		[24, 27],
+            "init_data_prefix":	"data",
+            "init_data_sys":	["deepmd"],
+            "init_batch_size":	[16],            
+            "sys_configs_prefix": os.getcwd(),
+            "sys_configs":	[
+	        ["data/al.fcc.02x02x02/01.scale_pert/sys-0032/scale*/000001/POSCAR"],
+	        ["data/al.fcc.02x02x02/01.scale_pert/sys-0032/scale*/000000/POSCAR"]
+            ],
+            "numb_models":	4,
+            "shuffle_poscar":	False,
+            "model_devi_f_trust_lo":	0.050,
+            "model_devi_f_trust_hi":	0.150,
+            "model_devi_e_trust_lo":	1e10,
+            "model_devi_e_trust_hi":	1e10,
+            "model_devi_plumed":        True,
+            "model_devi_jobs":	[
+                {"sys_idx": [0, 1], 'traj_freq': 10,  "template":{"lmp": "lmp/input.lammps", "plm": "lmp/input.plumed"},
+                 "rev_mat":{
+                     "lmp": {"V_NSTEPS": [1000], "V_TEMP": [50, 100], "V_PRES": [1, 10]}, "plm": {"V_DIST0":  [3,4], "V_DIST1": [5, 6]}
+                 }}
+            ]
+        }
+        mdata = {'deepmd_version': '1'}
+        _make_fake_models(0, jdata['numb_models'])
+        make_model_devi(0, jdata, mdata)
+        _check_pb(self, 0)
+        _check_confs(self, 0, jdata)
+        _check_traj_dir(self, 0)
+        # check the first task
+        md_dir = os.path.join('iter.%06d' % 0, '01.model_devi')
+        tasks = glob.glob(os.path.join(md_dir, 'task.*'))
+        # 4 accounts for 2 systems each with 2 frames
+        self.assertEqual(len(tasks), (len(jdata['model_devi_jobs'][0]['rev_mat']['lmp']['V_NSTEPS']) * 
+                                      len(jdata['model_devi_jobs'][0]['rev_mat']['lmp']['V_TEMP']) * 
+                                      len(jdata['model_devi_jobs'][0]['rev_mat']['lmp']['V_PRES']) * 
+                                      len(jdata['model_devi_jobs'][0]['rev_mat']['plm']['V_DIST0']) * 
+                                      len(jdata['model_devi_jobs'][0]['rev_mat']['plm']['V_DIST1']) * 
+                                      4))
+        tasks.sort()
+        cwd_ = os.getcwd()
+        os.chdir(tasks[0])
+        with open('input.lammps') as fp:
+            lines = fp.readlines()
+            for ii in lines:
+                if 'variable' in ii and 'TEMP' in ii:
+                    self.assertEqual('variable TEMP equal 50', 
+                                     ' '.join(ii.split()))
+                if 'variable' in ii and 'PRES' in ii:
+                    self.assertEqual('variable PRES equal 1', 
+                                     ' '.join(ii.split()))
+                if 'variable' in ii and 'NSTEPS' in ii:
+                    self.assertEqual('variable NSTEPS equal 1000', 
+                                     ' '.join(ii.split()))
+        with open('input.plumed') as fp:
+            lines = fp.readlines()
+            for ii in lines:
+                if 'RESTRAINT' in ii:
+                    self.assertEqual('RESTRAINT ARG=d1,d2 AT=3,5 KAPPA=150.0,150.0 LABEL=restraint', 
+                                     ' '.join(ii.split()))
+        os.chdir(cwd_)
+
+
+    def test_make_model_devi_null (self) :        
+        if os.path.isdir('iter.000000') :
+            shutil.rmtree('iter.000000')
+        jdata = {
+            "type_map":		["Mg", "Al"],
+            "mass_map":		[24, 27],
+            "init_data_prefix":	"data",
+            "init_data_sys":	["deepmd"],
+            "init_batch_size":	[16],            
+            "sys_configs_prefix": os.getcwd(),
+            "sys_configs":	[
+	        ["data/al.fcc.02x02x02/01.scale_pert/sys-0032/scale*/000001/POSCAR"],
+	        ["data/al.fcc.02x02x02/01.scale_pert/sys-0032/scale*/000000/POSCAR"]
+            ],
+            "numb_models":	4,
+            "shuffle_poscar":	False,
+            "model_devi_f_trust_lo":	0.050,
+            "model_devi_f_trust_hi":	0.150,
+            "model_devi_e_trust_lo":	1e10,
+            "model_devi_e_trust_hi":	1e10,
+            "model_devi_plumed":        True,
+            "model_devi_jobs":	[
+                {"sys_idx": [0, 1], 'traj_freq': 10,  "template":{"lmp": "lmp/input.lammps", "plm": "lmp/input.plumed"},
+                }
+            ]
+        }
+        mdata = {'deepmd_version': '1'}
+        _make_fake_models(0, jdata['numb_models'])
+        make_model_devi(0, jdata, mdata)
+        _check_pb(self, 0)
+        _check_confs(self, 0, jdata)
+        _check_traj_dir(self, 0)
+        # check the first task
+        md_dir = os.path.join('iter.%06d' % 0, '01.model_devi')
+        tasks = glob.glob(os.path.join(md_dir, 'task.*'))
+        # 4 accounts for 2 systems each with 2 frames
+        self.assertEqual(len(tasks), (4))
+        tasks.sort()
+        cwd_ = os.getcwd()
+        os.chdir(tasks[0])
+        with open('input.lammps') as fp:
+            lines = fp.readlines()
+            for ii in lines:
+                if 'variable' in ii and 'TEMP' in ii:
+                    self.assertEqual('variable TEMP equal V_TEMP', 
+                                     ' '.join(ii.split()))
+                if 'variable' in ii and 'PRES' in ii:
+                    self.assertEqual('variable PRES equal V_PRES', 
+                                     ' '.join(ii.split()))
+                if 'variable' in ii and 'NSTEPS' in ii:
+                    self.assertEqual('variable NSTEPS equal V_NSTEPS', 
+                                     ' '.join(ii.split()))
+        with open('input.plumed') as fp:
+            lines = fp.readlines()
+            for ii in lines:
+                if 'RESTRAINT' in ii:
+                    self.assertEqual('RESTRAINT ARG=d1,d2 AT=V_DIST0,V_DIST1 KAPPA=150.0,150.0 LABEL=restraint', 
+                                     ' '.join(ii.split()))
+        os.chdir(cwd_)
+
+
+
+
+class TestParseCurJobRevMat(unittest.TestCase):
+    def setUp(self):
+        self.cur_job = {
+            "sys_idx": [0, 1],
+            "template":{"lmp": "lmp/input.lammps", "plm": "lmp/input.plumed"},
+            "rev_mat":{
+                "lmp": {"V_NSTEPS": [1000], "V_TEMP": [50, 100], "V_PRES": [1, 10]}, "plm": {"V_DIST0":  [3,4], "V_DIST1": [5, 6]}
+            }
+        }
+        self.ref_matrix = []
+        for i0 in self.cur_job['rev_mat']['lmp']['V_NSTEPS']:
+            for i1 in self.cur_job['rev_mat']['lmp']['V_TEMP']:
+                for i2 in self.cur_job['rev_mat']['lmp']['V_PRES']:
+                    for i3 in self.cur_job['rev_mat']['plm']['V_DIST0']:
+                        for i4 in self.cur_job['rev_mat']['plm']['V_DIST1']:
+                            self.ref_matrix.append([i0, i1, i2, i3, i4])
+        self.ref_keys = ['V_NSTEPS', 'V_TEMP', 'V_PRES', 'V_DIST0', 'V_DIST1']
+        self.ref_nlmp = 3
 
+    def test_parse_cur_job(self):
+        rk, rm, nl = parse_cur_job_revmat(self.cur_job, use_plm = True)
+        self.assertEqual(rk, self.ref_keys)
+        self.assertEqual(nl, self.ref_nlmp)
+        self.assertEqual(rm, self.ref_matrix)
+        
+
+class MakeModelDeviByReviseMatrix(unittest.TestCase):
+    def test_find_only_one_key_1(self):
+        lines = ['aaa bbb ccc\n', 'bbb ccc\n', 'ccc bbb ccc\n']
+        idx = find_only_one_key(lines, ['bbb', 'ccc'])
+        self.assertEqual(idx, 1)
+
+    def test_find_only_one_key_0(self):        
+        lines = ['aaa bbb\n', 'bbb aaa\n', 'ccc ddd\n']
+        with self.assertRaises(RuntimeError):
+            idx = find_only_one_key(lines, ['ccc','eee'])
+
+    def test_find_only_one_key_2(self):        
+        lines = ['aaa bbb\n', 'bbb ccc\n', 'bbb ccc\n', 'fff eee\n']
+        with self.assertRaises(RuntimeError):
+            idx = find_only_one_key(lines, ['bbb','ccc'])
+
+    def test_revise_lmp_input_model_0(self):
+        lines = ['foo\n', 'pair_style deepmd  aaa ccc fff\n', 'bar\n', '\n']
+        ref_lines = copy.deepcopy(lines)
+        lines = revise_lmp_input_model(lines, ['model0', 'model1'], 10, '0.1')
+        for ii in [0, 2, 3] :
+            self.assertEqual(lines[ii], ref_lines[ii])
+        tmp = " ".join(lines[1].split())
+        self.assertEqual(tmp, "pair_style deepmd model0 model1 10 model_devi.out")
+        
+    def test_revise_lmp_input_model_1(self):
+        lines = ['foo\n', 'pair_style deepmd aaa ccc fff\n', 'bar\n', '\n']
+        ref_lines = copy.deepcopy(lines)
+        lines = revise_lmp_input_model(lines, ['model0', 'model1'], 10, '1')
+        for ii in [0, 2, 3] :
+            self.assertEqual(lines[ii], ref_lines[ii])
+        tmp = " ".join(lines[1].split())
+        self.assertEqual(tmp, "pair_style deepmd model0 model1 out_freq 10 out_file model_devi.out")
+        
+    def test_revise_lmp_input_dump(self):
+        lines = ['foo\n', 'dump dpgen_dump ccc fff\n', 'bar\n', '\n']
+        ref_lines = copy.deepcopy(lines)
+        lines = revise_lmp_input_dump(lines, 10)
+        for ii in [0, 2, 3] :
+            self.assertEqual(lines[ii], ref_lines[ii])
+        tmp = " ".join(lines[1].split())
+        self.assertEqual(tmp, "dump dpgen_dump all custom 10 traj/*.lammpstrj id type x y z")
+        
+    def test_revise_lmp_input_plm(self):
+        lines = ['foo\n', 'fix dpgen_plm ccc fff\n', 'bar\n', '\n']
+        ref_lines = copy.deepcopy(lines)
+        lines = revise_lmp_input_plm(lines, 'input.plumed')
+        for ii in [0, 2, 3] :
+            self.assertEqual(lines[ii], ref_lines[ii])
+        tmp = " ".join(lines[1].split())
+        self.assertEqual(tmp, "fix dpgen_plm all plumed plumedfile input.plumed outfile output.plumed")
+        
+    def test_revise_by_key(self):
+        lines = ['foo\n', 'aaa\n', 'bar\n', 'bbb\n', '\n']
+        ref_lines = copy.deepcopy(lines)
+        lines = revise_by_keys(lines, ['aaa', 'bbb'], ['ccc','ddd'])
+        for ii in [0, 2, 4] :
+            self.assertEqual(lines[ii], ref_lines[ii])
+        tmp = " ".join(lines[1].split())
+        self.assertEqual(tmp, "ccc")
+        tmp = " ".join(lines[3].split())
+        self.assertEqual(tmp, "ddd")
+        
 
 if __name__ == '__main__':
     unittest.main()

From 3026a1867c1a5a7090804b71bea37a75f8a6e2b9 Mon Sep 17 00:00:00 2001
From: Han Wang <wang_han@iapcm.ac.cn>
Date: Wed, 4 Dec 2019 21:31:22 +0800
Subject: [PATCH 074/109] add missing data

---
 .../sys-0032/scale-1.040/000000/POSCAR        | 40 +++++++++++++++++++
 .../sys-0032/scale-1.040/000001/POSCAR        | 40 +++++++++++++++++++
 .../sys-0032/scale-1.040/000002/POSCAR        | 40 +++++++++++++++++++
 .../sys-0032/scale-1.060/000000/POSCAR        | 40 +++++++++++++++++++
 .../sys-0032/scale-1.060/000001/POSCAR        | 40 +++++++++++++++++++
 .../sys-0032/scale-1.060/000002/POSCAR        | 40 +++++++++++++++++++
 6 files changed, 240 insertions(+)
 create mode 100644 tests/generator/data/al.fcc.02x02x02/01.scale_pert/sys-0032/scale-1.040/000000/POSCAR
 create mode 100644 tests/generator/data/al.fcc.02x02x02/01.scale_pert/sys-0032/scale-1.040/000001/POSCAR
 create mode 100644 tests/generator/data/al.fcc.02x02x02/01.scale_pert/sys-0032/scale-1.040/000002/POSCAR
 create mode 100644 tests/generator/data/al.fcc.02x02x02/01.scale_pert/sys-0032/scale-1.060/000000/POSCAR
 create mode 100644 tests/generator/data/al.fcc.02x02x02/01.scale_pert/sys-0032/scale-1.060/000001/POSCAR
 create mode 100644 tests/generator/data/al.fcc.02x02x02/01.scale_pert/sys-0032/scale-1.060/000002/POSCAR

diff --git a/tests/generator/data/al.fcc.02x02x02/01.scale_pert/sys-0032/scale-1.040/000000/POSCAR b/tests/generator/data/al.fcc.02x02x02/01.scale_pert/sys-0032/scale-1.040/000000/POSCAR
new file mode 100644
index 000000000..c0d25f361
--- /dev/null
+++ b/tests/generator/data/al.fcc.02x02x02/01.scale_pert/sys-0032/scale-1.040/000000/POSCAR
@@ -0,0 +1,40 @@
+POSCAR file written by OVITO            
+1.04
+     8.0852460528260064    0.0000000700556454    0.0000000165920926
+     0.0000000700459398    8.0852460812690730   -0.0000000878331783
+     0.0000000165994710   -0.0000000878293815    8.0852460822851544
+   Al
+    32
+Direct
+  0.5000000055222689  0.0000000095334217  0.4999999965645718
+  0.5000000034529869  0.7499999969981677  0.2499999981901819
+  0.5000000083231070 -0.0000000011197416  0.0000000014389411
+ -0.0000000004748536  0.0000000005310409  0.0000000063786655
+  0.7499999995103841  0.0000000012827216  0.2499999977659152
+  0.0000000000448991  0.0000000048734801  0.5000000037789456
+  0.2499999999857682  0.5000000028441646  0.7500000018075699
+  0.2500000030261135  0.7499999944555688  0.5000000026313640
+  0.2500000097291175  0.5000000129780301  0.2499999975347162
+  0.5000000035346804  0.7499999971754691  0.7500000063111742
+  0.0000000017515228  0.4999999968593779  0.5000000121259237
+  0.7499999970723630  0.7499999983979078  0.4999999968129619
+  0.7499999910768409  0.0000000040350684  0.7499999999698314
+ -0.0000000040407868  0.7499999975356930  0.2499999972984601
+ -0.0000000013440370  0.5000000065441171 -0.0000000011351674
+  0.5000000069922398  0.2500000092556521  0.7499999925172053
+  0.2500000074218809 -0.0000000006822529  0.2500000011219505
+  0.7499999954414514  0.2500000084865056 -0.0000000006412232
+  0.7499999967294295  0.4999999938307768  0.7500000077731870
+  0.5000000030383994  0.5000000063795264 -0.0000000001750512
+  0.2500000064022705  0.7499999919020213  0.0000000019913002
+  0.0000000043319078  0.7499999907276940  0.7500000023288846
+  0.2500000021034525  0.0000000026582450  0.7499999979035360
+  0.7500000002384054  0.5000000058460430  0.2499999970004410
+  0.2500000006602736  0.2500000097862060 -0.0000000026740404
+ -0.0000000102302524  0.2500000011729165  0.2500000007828680
+ -0.0000000041345229  0.2500000119032792  0.7499999956587747
+  0.7499999985377536  0.7499999945443449  0.0000000074372283
+  0.5000000022954397  0.4999999989012168  0.5000000041503138
+  0.2500000115215270  0.2500000006209973  0.5000000113478330
+  0.7500000009516035  0.2500000011452493  0.5000000093672294
+  0.5000000100332991  0.2500000001020252  0.2500000061404412
diff --git a/tests/generator/data/al.fcc.02x02x02/01.scale_pert/sys-0032/scale-1.040/000001/POSCAR b/tests/generator/data/al.fcc.02x02x02/01.scale_pert/sys-0032/scale-1.040/000001/POSCAR
new file mode 100644
index 000000000..822bfddde
--- /dev/null
+++ b/tests/generator/data/al.fcc.02x02x02/01.scale_pert/sys-0032/scale-1.040/000001/POSCAR
@@ -0,0 +1,40 @@
+Al 
+ 1.0000000000000000
+     8.2797705159728547    0.0000000000000000    0.0000000000000000
+    -0.1719719010892178    8.6486456546964234    0.0000000000000000
+    -0.0010128078172081    0.1054655382171854    8.6490647580104696
+  Al
+  32
+Cartesian
+  4.1405303605609065 -0.0002311813140235 -0.0001044821347404
+  6.2101270205420587  0.0274025201585026  2.1631618555812433
+  1.9860812664680023  4.3565843685598704  2.1650983474684780
+  4.0078083948451368  6.5615160044379568  6.4876604020180846
+  2.0278390331727807  2.2135513637631408  4.3236944572396503
+  4.0953786649781048  2.2492100294422253  6.4810339435242588
+ -0.0875414497269289  4.3743834013208236  4.3201317850229435
+  2.0736042567106190  0.0228671778411013  2.1619287357074879
+  6.1696411956777881  2.2150078661563208  4.3251923877351022
+  2.0231935553242515  2.1675246049264332 -0.0015871536767673
+  2.0697228575465982  0.0796308216693133  6.4881338826802768
+  6.0718728633823593  6.5345829673724909  4.3249524423528563
+  6.1260924754969297  4.3544178531848186  2.1700539999418975
+  6.0794537263845294  6.4852313292416728  0.0015781459271990
+ -0.0432175780609881  2.1967100107205435  2.1657427325361875
+ -0.1253958469750332  6.5189656427965783  2.1567228526414568
+  0.0011475057041571  0.0057830702773709  0.0037842622675432
+  1.9347714457652205  6.5376281930141227  4.3309171757107432
+ -0.1318239919085722  6.5598304611092644  6.4942196366725833
+  6.2126013464109491  0.0748052719901856  6.4830859016113873
+  4.0566814116069088  4.3260615903766206  0.0036768623969541
+  4.0552518198425744  4.3755027511287912  4.3233549827874995
+  4.0931949504606155  2.1918523869765418  2.1567781105403996
+ -0.0859138586713617  4.3244750685396607 -0.0001642409340360
+  4.1410642367747599  0.0526367919882268  4.3269464189048445
+  1.9824002179387932  4.4020860732001070  6.4886621332933405
+  6.1694313242655578  2.1596032692396117  0.0024578050259201
+ -0.0382210609417227  2.2399140383166798  6.4831900551393771
+  4.0090325775580240  6.5139439803656938  2.1610083538493630
+  1.9422012459502989  6.4887816039836386 -0.0014546717614078
+ -0.0024327533265666  0.0493459591650801  4.3216382226840757
+  6.1257798713385236  4.4062854392471484  6.4890048938443261
diff --git a/tests/generator/data/al.fcc.02x02x02/01.scale_pert/sys-0032/scale-1.040/000002/POSCAR b/tests/generator/data/al.fcc.02x02x02/01.scale_pert/sys-0032/scale-1.040/000002/POSCAR
new file mode 100644
index 000000000..a32dbcdcf
--- /dev/null
+++ b/tests/generator/data/al.fcc.02x02x02/01.scale_pert/sys-0032/scale-1.040/000002/POSCAR
@@ -0,0 +1,40 @@
+Al 
+ 1.0000000000000000
+     8.2095456552916222    0.0000000000000000    0.0000000000000000
+    -0.2286402743603651    8.6043644363763701    0.0000000000000000
+    -0.0854312756696513    0.2303641815368059    8.2540862211675634
+  Al
+  32
+Cartesian
+  4.0306771832041433  2.2066722958454528  2.0659499762400642
+  3.9151098746436204  6.5167215563456802  2.0596964734510785
+  4.1077432130635714  0.0034764787551480 -0.0006236073814073
+  6.1379489191490046  0.0621351322778998  2.0578263563441910
+  3.9920482810774476  4.3011538909470639  0.0006335990343001
+ -0.1949340111748481  6.5111390966709761  2.0643396931358104
+ -0.0463856323727473  0.1142994955204610  4.1244311196037833
+  6.0952948094811621  0.1729888932314524  6.1926808591164919
+  4.0664344218920485  0.1100139469701655  4.1206812470148089
+  6.0998444929030891  2.1505475346234428 -0.0008449794097100
+  3.9863296520655518  2.3222885078447568  6.1918144614588400
+ -0.1209016406533330  2.3228404619168743  6.1906560340571639
+  1.8807466235222732  6.4532438855725953 -0.0001516652152993
+ -0.2395163861307614  6.6235798336430829  6.1872455636950026
+  0.0025147768770161 -0.0036452495960412  0.0013932280132470
+ -0.0774171953107122  2.2090747731610469  2.0631980492036508
+  1.9926291269205347  0.1692401152050739  6.1955859284074473
+  1.8357205748765402  6.5677154774808173  4.1263622066414323
+  6.0605653449404215  2.2579040246556001  4.1295688716429204
+ -0.1145731952354822  4.3025332838938875  0.0002034325056423
+  2.0285609270479301  0.0566514378431752  2.0691391271530861
+  3.9484454855523943  4.4183019897368441  4.1259239183811047
+ -0.1562941091523503  4.4148144783033310  4.1218372204552605
+  5.9888446160288860  6.4572474392376282 -0.0053712419664877
+  1.8705640110869806  4.4688581130775136  6.1928385980937639
+  6.0243391462447384  4.3596689965884412  2.0572276103221316
+  1.9548418838285899  2.2603793712918661  4.1249155308753114
+  3.8725040967945219  6.6258873546504624  6.1934468686522104
+  1.9164525698273898  4.3586937734571798  2.0631666976308018
+  5.9422421861362489  6.5697524312290252  4.1250284026673434
+  1.9946789721739846  2.1477356173779114 -0.0011905739695201
+  5.9756996701929497  4.4740331240564739  6.1925671609051642
diff --git a/tests/generator/data/al.fcc.02x02x02/01.scale_pert/sys-0032/scale-1.060/000000/POSCAR b/tests/generator/data/al.fcc.02x02x02/01.scale_pert/sys-0032/scale-1.060/000000/POSCAR
new file mode 100644
index 000000000..d2c2f1415
--- /dev/null
+++ b/tests/generator/data/al.fcc.02x02x02/01.scale_pert/sys-0032/scale-1.060/000000/POSCAR
@@ -0,0 +1,40 @@
+POSCAR file written by OVITO            
+1.06
+     8.0852460528260064    0.0000000700556454    0.0000000165920926
+     0.0000000700459398    8.0852460812690730   -0.0000000878331783
+     0.0000000165994710   -0.0000000878293815    8.0852460822851544
+   Al
+    32
+Direct
+  0.5000000034529869  0.7499999969981677  0.2499999981901819
+  0.5000000083231070 -0.0000000011197416  0.0000000014389411
+  0.7500000009516035  0.2500000011452493  0.5000000093672294
+  0.7499999954414514  0.2500000084865056 -0.0000000006412232
+  0.5000000069922398  0.2500000092556521  0.7499999925172053
+  0.2500000074218809 -0.0000000006822529  0.2500000011219505
+  0.0000000000448991  0.0000000048734801  0.5000000037789456
+  0.0000000043319078  0.7499999907276940  0.7500000023288846
+  0.7500000002384054  0.5000000058460430  0.2499999970004410
+  0.5000000022954397  0.4999999989012168  0.5000000041503138
+  0.2500000097291175  0.5000000129780301  0.2499999975347162
+  0.0000000017515228  0.4999999968593779  0.5000000121259237
+  0.5000000055222689  0.0000000095334217  0.4999999965645718
+  0.5000000035346804  0.7499999971754691  0.7500000063111742
+  0.2500000006602736  0.2500000097862060 -0.0000000026740404
+ -0.0000000041345229  0.2500000119032792  0.7499999956587747
+  0.2499999999857682  0.5000000028441646  0.7500000018075699
+  0.7499999985377536  0.7499999945443449  0.0000000074372283
+  0.7499999995103841  0.0000000012827216  0.2499999977659152
+  0.7499999967294295  0.4999999938307768  0.7500000077731870
+  0.2500000115215270  0.2500000006209973  0.5000000113478330
+  0.7499999970723630  0.7499999983979078  0.4999999968129619
+ -0.0000000004748536  0.0000000005310409  0.0000000063786655
+  0.7499999910768409  0.0000000040350684  0.7499999999698314
+ -0.0000000040407868  0.7499999975356930  0.2499999972984601
+ -0.0000000102302524  0.2500000011729165  0.2500000007828680
+  0.2500000030261135  0.7499999944555688  0.5000000026313640
+  0.2500000021034525  0.0000000026582450  0.7499999979035360
+  0.5000000100332991  0.2500000001020252  0.2500000061404412
+ -0.0000000013440370  0.5000000065441171 -0.0000000011351674
+  0.5000000030383994  0.5000000063795264 -0.0000000001750512
+  0.2500000064022705  0.7499999919020213  0.0000000019913002
diff --git a/tests/generator/data/al.fcc.02x02x02/01.scale_pert/sys-0032/scale-1.060/000001/POSCAR b/tests/generator/data/al.fcc.02x02x02/01.scale_pert/sys-0032/scale-1.060/000001/POSCAR
new file mode 100644
index 000000000..36606db02
--- /dev/null
+++ b/tests/generator/data/al.fcc.02x02x02/01.scale_pert/sys-0032/scale-1.060/000001/POSCAR
@@ -0,0 +1,40 @@
+Al 
+ 1.0000000000000000
+     8.3598251083796260    0.0000000000000000    0.0000000000000000
+    -0.1014062409602743    8.7520905745998139    0.0000000000000000
+    -0.0687187698129919    0.2297954247305231    8.4431009214511974
+  Al
+  32
+Cartesian
+  6.1683189190564951  4.5502784129692708  6.3296577854868010
+  6.1965525638957679  6.5680765848084750  0.0029863609922990
+  2.0396624544096955  0.1732344670827352  6.3322678633817251
+  6.2130901492377939  2.2989932295758888  4.2302216016831782
+ -0.0520945632303641  4.3765674258355460 -0.0019779716557334
+ -0.0014490531239594  0.0012499082614180 -0.0010991018816276
+ -0.1314129164714025  6.7351442984719725  6.3334773536245867
+  4.0456687581733002  6.7364197513151751  6.3249997121311603
+ -0.0937651071923047  6.6208677247742465  2.1123046275329069
+  4.0960677591482924  4.4926744904871390  4.2211873921508687
+  4.0880108686193317  6.6202639448043019  2.1017079440084196
+ -0.0795969571128076  2.3607658696959479  6.3291855425176813
+  1.9821746056744098  6.6786660367944020  4.2232289243210728
+  2.0619415753574173  2.1905908271047374  0.0029559804043806
+ -0.0423314300948392  2.2481168790599964  2.1152302614428637
+  6.2580765405823202  0.0586103911760227  2.1189007725333151
+  6.2474387239813538  2.1793900662909729  0.0007938903889081
+  2.0147829169000611  6.5645865794055860 -0.0001444989147900
+ -0.0258599514919108  0.1117266738246711  4.2242506233803780
+  4.1299576049645594  2.2428788732388525  2.1156576001669882
+  4.1780305116160363  0.0007882753632893  0.0020554992942880
+  6.1980770425407243  4.4325703498605566  2.1177678801435378
+  2.0761042989529921  0.0635168993108198  2.1141164816869815
+  4.1245594764626183  4.3737564400720563  0.0016920716977401
+  4.1049053635275321  2.3584914444124645  6.3317740374727416
+  2.0300731360268154  2.3110137563067226  4.2250342291910101
+ -0.0825921356405149  4.5001269110724493  4.2197502279248438
+  6.2200264777784939  0.1690022116720367  6.3297823964633926
+  4.1402157372025750  0.1130411806163994  4.2157293332569230
+  1.9873675974746883  4.5475018134559129  6.3330281685682630
+  2.0218653030525817  4.4335739372995064  2.1107199808299701
+  6.1513189901181322  6.6792290755422474  4.2210350859448864
diff --git a/tests/generator/data/al.fcc.02x02x02/01.scale_pert/sys-0032/scale-1.060/000002/POSCAR b/tests/generator/data/al.fcc.02x02x02/01.scale_pert/sys-0032/scale-1.060/000002/POSCAR
new file mode 100644
index 000000000..ab31e5a1a
--- /dev/null
+++ b/tests/generator/data/al.fcc.02x02x02/01.scale_pert/sys-0032/scale-1.060/000002/POSCAR
@@ -0,0 +1,40 @@
+Al 
+ 1.0000000000000000
+     8.5898428958538098    0.0000000000000000    0.0000000000000000
+     0.0309441405711094    8.4546949858918694    0.0000000000000000
+    -0.0097566977499483    0.1352429661398475    8.6069624601986590
+  Al
+  32
+Cartesian
+  6.4505233521585650  2.1132720745167752 -0.0002917768226800
+  4.2856914127143968  0.0029095529351454  0.0017848218181846
+  4.3222301043099822  6.3756488928484174  2.1447310524399770
+  2.1717132096933316  6.3408284501098091  0.0012979398509617
+  6.4660412818589155  6.3408284140534965  0.0008257072136749
+  6.4415828204454799  0.0373236972123070  2.1567463274061964
+  4.2931753316114944  0.0669786487775630  4.3027987284844418
+  0.0194443480527756  6.4431701023804635  6.4497730612072282
+  4.3060466719098018  4.2307551072148986 -0.0076141629479681
+  6.4647457206166568  6.4159376018330612  4.3043627733846455
+  6.4543672215286696  4.2616842707592282  2.1501340036161087
+  0.0178633739715773  6.3773805485049309  2.1510337600328335
+  0.0074211862640189  2.1388573002970750  2.1499882226977185
+ -0.0090807869046197  0.0663696800456818  4.3063098880206878
+  2.1561472240230288  2.1159006184690967  0.0003640614342978
+  2.1666028329523659  6.4100635002351751  4.3039636538495722
+  4.2938374404072981  2.2154389707323623  6.4518622611638312
+  2.1607276454484490  4.2586788732587717  2.1431275751005945
+  2.1545652702527165  2.1818588369923066  4.2971275692596187
+  0.0154806579451338  4.2247284677183679  0.0023145646149488
+  6.4488405336196610  2.1799745727408162  4.3004273550219212
+ -0.0067627517102337  2.2110390808792646  6.4508074158614228
+  2.1450165614049719  0.0337577346617454  2.1516105872138498
+  4.3028135306784758  2.1478273727691706  2.1476429789225437
+ -0.0016945267055740  0.0002113575755896  0.0011305540017342
+  2.1361451381142724  0.0977829525904214  6.4504510973337910
+  6.4334091010848260  0.1075410768182249  6.4607377349253454
+  6.4486872853119488  4.3313683286982663  6.4579060261439194
+  4.3048400221154921  4.2973032817718257  4.3031856299503035
+  0.0120088951200463  4.2991197661156040  4.3076853311025882
+  4.3123633082405872  6.4471478938493689  6.4530621833311201
+  2.1555961162651558  4.3296495732327607  6.4545934390565591

From 4bb587b0b3c8890a7002cd406fb37db5dbf2c224 Mon Sep 17 00:00:00 2001
From: Han Wang <wang_han@iapcm.ac.cn>
Date: Wed, 4 Dec 2019 21:34:38 +0800
Subject: [PATCH 075/109] add missing template files

---
 tests/generator/lmp/input.lammps | 35 ++++++++++++++++++++++++++++++++
 tests/generator/lmp/input.plumed |  4 ++++
 2 files changed, 39 insertions(+)
 create mode 100644 tests/generator/lmp/input.lammps
 create mode 100644 tests/generator/lmp/input.plumed

diff --git a/tests/generator/lmp/input.lammps b/tests/generator/lmp/input.lammps
new file mode 100644
index 000000000..946ad05c5
--- /dev/null
+++ b/tests/generator/lmp/input.lammps
@@ -0,0 +1,35 @@
+variable        NSTEPS          equal V_NSTEPS
+variable        THERMO_FREQ     equal 10
+variable        DUMP_FREQ       equal 10
+variable        TEMP            equal V_TEMP
+variable        PRES            equal V_PRES
+variable        TAU_T           equal 0.100000
+variable        TAU_P           equal 0.500000
+
+units           metal
+boundary        p p p
+atom_style      atomic
+
+neighbor        1.0 bin
+
+box          tilt large
+read_data       conf.lmp
+change_box   all triclinic
+mass            1 27.000000
+mass            2 24.000000
+
+pair_style      deepmd ../graph.003.pb ../graph.001.pb ../graph.002.pb ../graph.000.pb  out_freq ${THERMO_FREQ} out_file model_devi.out 
+pair_coeff      
+
+fix		dpgen_plm
+
+thermo_style    custom step temp pe ke etotal press vol lx ly lz xy xz yz
+thermo          ${THERMO_FREQ}
+
+dump            dpgen_dump
+
+velocity        all create ${TEMP} 826513
+fix             1 all npt temp ${TEMP} ${TEMP} ${TAU_T} iso ${PRES} ${PRES} ${TAU_P}
+
+timestep        0.002000
+run             ${NSTEPS}
diff --git a/tests/generator/lmp/input.plumed b/tests/generator/lmp/input.plumed
new file mode 100644
index 000000000..598b924b1
--- /dev/null
+++ b/tests/generator/lmp/input.plumed
@@ -0,0 +1,4 @@
+DISTANCE ATOMS=3,5 LABEL=d1
+DISTANCE ATOMS=2,4 LABEL=d2
+RESTRAINT ARG=d1,d2 AT=V_DIST0,V_DIST1 KAPPA=150.0,150.0 LABEL=restraint
+PRINT ARG=restraint.bias

From 70166909e66471297d7442811cc97422a143960a Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Wed, 4 Dec 2019 18:35:35 -0500
Subject: [PATCH 076/109] support simplify LabeledSystem

---
 dpgen/simplify/simplify.py | 28 ++++++++++++++++++++++------
 1 file changed, 22 insertions(+), 6 deletions(-)

diff --git a/dpgen/simplify/simplify.py b/dpgen/simplify/simplify.py
index 4103f83b6..ddf3aa2ed 100644
--- a/dpgen/simplify/simplify.py
+++ b/dpgen/simplify/simplify.py
@@ -37,16 +37,27 @@
 detail_file_name_prefix = "details"
 
 
+def get_system_cls(jdata):
+    if jdata.get("labeled", False):
+        return dpdata.LabeledSystem
+    return dpdata.System
+
+
+def get_systems(path, jdata):
+    system = get_system_cls(jdata)
+    systems = dpdata.MultiSystems(
+        *[system(os.path.join(path, s), fmt='deepmd/npy') for s in os.listdir(path)])
+    return systems
+
+
 def init_pick(iter_index, jdata, mdata):
     """pick up init data from dataset randomly"""
     pick_data = jdata['pick_data']
     init_pick_number = jdata['init_pick_number']
     # use MultiSystems with System
     # TODO: support System and LabeledSystem
-    # TODO: support MultiSystems with LabeledSystem
     # TODO: support other format
-    systems = dpdata.MultiSystems(
-        *[dpdata.System(os.path.join(pick_data, s), fmt='deepmd/npy') for s in os.listdir(pick_data)])
+    systems = get_systems(pick_data, jdata)
     # label the system
     labels = []
     for key, system in systems.systems.items():
@@ -197,7 +208,8 @@ def post_model_devi(iter_index, jdata, mdata):
         f_std = np.max(f_std, axis=1)
         # (n_frame,)
 
-        for subsys, e_devi, f_devi in zip(dpdata.System(os.path.join(task, rest_data_name), fmt='deepmd/npy'), e_std, f_std):
+        system_cls = get_system_cls(jdata)
+        for subsys, e_devi, f_devi in zip(system_cls(os.path.join(task, rest_data_name), fmt='deepmd/npy'), e_std, f_std):
             if (e_devi < e_trust_hi and e_devi >= e_trust_lo) or (f_devi < f_trust_hi and f_devi >= f_trust_lo) :
                 sys_candinate.append(subsys)
             elif (e_devi >= e_trust_hi ) or (f_devi >= f_trust_hi ):
@@ -251,8 +263,12 @@ def make_fp(iter_index, jdata, mdata):
     work_path = os.path.join(iter_name, fp_name)
     create_path(work_path)
     picked_data_path = os.path.join(iter_name, model_devi_name, picked_data_name)
-    systems = dpdata.MultiSystems(
-        *[dpdata.System(os.path.join(picked_data_path, s), fmt='deepmd/npy') for s in os.listdir(picked_data_path)])
+    if jdata.get("labeled", False):
+        dlog.info("already labeled, skip make_fp and link data directly")
+        os.symlink(os.path.abspath(picked_data_path), os.path.abspath(
+            os.path.join(work_path, "data.%03d" % 0)))
+        return
+    systems = get_systems(picked_data_path, jdata)
     fp_style = jdata['fp_style']
     if 'user_fp_params' in jdata.keys() :
         fp_params = jdata['user_fp_params']

From 0ba73f3c7cf484fbe5208de20147c3f8e2e3a2ff Mon Sep 17 00:00:00 2001
From: Han Wang <wang_han@iapcm.ac.cn>
Date: Thu, 5 Dec 2019 08:08:24 +0800
Subject: [PATCH 077/109] fix bug of requiring nframe in run model devi

---
 dpgen/generator/run.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/dpgen/generator/run.py b/dpgen/generator/run.py
index fcab11136..e95a428e9 100644
--- a/dpgen/generator/run.py
+++ b/dpgen/generator/run.py
@@ -881,8 +881,6 @@ def run_model_devi (iter_index,
 
     fp = open (os.path.join(work_path, 'cur_job.json'), 'r')
     cur_job = json.load (fp)
-    ensemble, nsteps, trj_freq, temps, press, pka_e, dt = parse_cur_job(cur_job)
-    nframes = nsteps // trj_freq + 1
     
     run_tasks_ = all_task
     # for ii in all_task:

From daeaa68634787ce49b5e681a7eca5c5a0185b254 Mon Sep 17 00:00:00 2001
From: Yuan Fengbo <yuanfengbo888@pku.edu.cn>
Date: Thu, 5 Dec 2019 11:08:09 +0800
Subject: [PATCH 078/109] modify var name relating to cuda & gpu

---
 dpgen/dispatcher/Batch.py                  | 18 +++++++++---------
 tests/dispatcher/shell/test_shell_local.py |  4 ++--
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/dpgen/dispatcher/Batch.py b/dpgen/dispatcher/Batch.py
index 3f9629171..0b6d16b71 100644
--- a/dpgen/dispatcher/Batch.py
+++ b/dpgen/dispatcher/Batch.py
@@ -74,13 +74,13 @@ def sub_script(self,
         # loop over commands 
         self.cmd_cnt = 0
         try:
-            self.manual_gpu = res['manual_cuda_devices']
-        except:
-            self.manual_gpu = 0
+            self.manual_cuda_devices = res['manual_cuda_devices']
+        except KeyError:
+            self.manual_cuda_devices = 0
         try:
-            self.manual_gpu_multiplicity = res['manual_cuda_multiplicity']
-        except:
-            self.manual_gpu_multiplicity = 1
+            self.manual_cuda_multiplicity = res['manual_cuda_multiplicity']
+        except KeyError:
+            self.manual_cuda_multiplicity = 1
         for ii in range(len(cmd)):            
             # for one command
             ret += self._sub_script_inner(job_dirs,
@@ -140,7 +140,7 @@ def _sub_script_inner(self,
         for ii,jj in zip(job_dirs, args) :
             ret += 'cd %s\n' % ii
             ret += 'test $? -ne 0 && exit\n\n'
-            if self.manual_gpu <= 0:
+            if self.manual_cuda_devices <= 0:
                 ret += 'if [ ! -f tag_%d_finished ] ;then\n' % idx
                 ret += '  %s 1>> %s 2>> %s \n' % (self.sub_script_cmd(cmd, jj, res), outlog, errlog)
                 if res['allow_failure'] is False:
@@ -151,11 +151,11 @@ def _sub_script_inner(self,
             else :
                 # do not support task-wise restart
                 tmp_cmd = ' %s 1>> %s 2>> %s ' % (self.sub_script_cmd(cmd, jj, res), outlog, errlog)
-                ret += 'CUDA_VISIBLE_DEVICES=%d %s &\n\n' % ((self.cmd_cnt % self.manual_gpu), tmp_cmd)
+                ret += 'CUDA_VISIBLE_DEVICES=%d %s &\n\n' % ((self.cmd_cnt % self.manual_cuda_devices), tmp_cmd)
                 self.cmd_cnt += 1
             ret += 'cd %s\n' % self.context.remote_root
             ret += 'test $? -ne 0 && exit\n'
-            if self.manual_gpu > 0 and self.cmd_cnt % (self.manual_gpu * self.manual_gpu_multiplicity) == 0:
+            if self.manual_cuda_devices > 0 and self.cmd_cnt % (self.manual_cuda_devices * self.manual_cuda_multiplicity) == 0:
                 ret += '\nwait\n\n'
         ret += '\nwait\n\n'
         return ret
diff --git a/tests/dispatcher/shell/test_shell_local.py b/tests/dispatcher/shell/test_shell_local.py
index 8610d9bd5..b4d02ba67 100644
--- a/tests/dispatcher/shell/test_shell_local.py
+++ b/tests/dispatcher/shell/test_shell_local.py
@@ -33,14 +33,14 @@ def tearDown(self):
         if os.path.exists('run.sub.1'):
             os.remove('run.sub.1')
 
-    def test_manual_gpu(self):
+    def test_manual_cuda_devices(self):
         job_dirs = ['task0', 'task1']
         res = {'manual_cuda_devices': 3}
         ret = self.shell.sub_script(job_dirs, ['touch test1', 'touch test2'], res = res)
         with open('run.sub.gpu', 'w') as fp:
             fp.write(ret)        
             
-    def test_manual_gpu_multi(self):
+    def test_manual_cuda_multiplicity(self):
         job_dirs = ['task0', 'task1', 'task2', 'task3']
         res = {'manual_cuda_devices': 2, 'manual_cuda_multiplicity': 2}
         ret = self.shell.sub_script(job_dirs, ['touch test1', 'touch test2'], res = res)

From 6e29e29f56fb93238e383d60c0539134045923aa Mon Sep 17 00:00:00 2001
From: Yuan Fengbo <yuanfengbo888@pku.edu.cn>
Date: Fri, 6 Dec 2019 19:31:11 +0800
Subject: [PATCH 079/109] add doc for manual_cuda_multiplicity &&
 manual_cuda_devices ; add CH4 param.json for deepmdkit-1.1.0

---
 README.md                                     |   2 +
 .../CH4/param_CH4_deepmd-kit-1.1.0.json       | 138 ++++++++++++++++++
 2 files changed, 140 insertions(+)
 create mode 100644 examples/run/dp-lammps-vasp/CH4/param_CH4_deepmd-kit-1.1.0.json

diff --git a/README.md b/README.md
index 675d5e7f4..3fb0931a8 100644
--- a/README.md
+++ b/README.md
@@ -1080,6 +1080,8 @@ The following table gives explicit descriptions on keys in param.json.
 | numb_node | Integer | 1 | Node count required for the job
 | task_per_node | Integer | 4 | Number of CPU cores required
 | numb_gpu | Integer | 4 | Number of GPUs required
+| manual_cuda_devices | 1 | Used with key "manual_cuda_multiplicity" specify the gpu number
+| manual_cuda_multiplicity | 5 | Used in 01.model_devi,used with key "manual_cuda_devices" specify the MD program number running on one GPU  at the same time,dpgen will  automatically allocate MD jobs on different GPU. This can improve GPU usage for GPU like V100.
 | node_cpu | Integer | 4 | Only for LSF. The number of CPU cores on each node that should be allocated to the job.
 | source_list | List of string | "....../vasp.env" | Environment needed for certain job. For example, if "env" is in the list, 'source env' will be written in the script.
 | module_list | List of string | [ "Intel/2018", "Anaconda3"] | For example, If "Intel/2018" is in the list, "module load Intel/2018" will be written in the script.
diff --git a/examples/run/dp-lammps-vasp/CH4/param_CH4_deepmd-kit-1.1.0.json b/examples/run/dp-lammps-vasp/CH4/param_CH4_deepmd-kit-1.1.0.json
new file mode 100644
index 000000000..b2bba7be7
--- /dev/null
+++ b/examples/run/dp-lammps-vasp/CH4/param_CH4_deepmd-kit-1.1.0.json
@@ -0,0 +1,138 @@
+{
+    "type_map": [
+        "H",
+        "C"
+    ],
+    "mass_map": [
+        1,
+        12
+    ],
+    "init_data_prefix": "/data1/yfb222333/2_dpgen_gpu_multi",
+    "init_data_sys": [
+        "CH4.POSCAR.01x01x01/02.md/sys-0004-0001/deepmd"
+    ],
+    "sys_configs_prefix": "/data1/yfb222333/2_dpgen_gpu_multi",
+    "sys_configs": [
+        [
+            "CH4.POSCAR.01x01x01/01.scale_pert/sys-0004-0001/scale*/00000*/POSCAR"
+        ],
+        [
+            "CH4.POSCAR.01x01x01/01.scale_pert/sys-0004-0001/scale*/00001*/POSCAR"
+        ]
+    ],
+    "_comment": " that's all ",
+    "numb_models": 4,
+    "default_training_param": {
+        "model": {
+            "type_map": [
+                "H",
+                "C"
+            ],
+            "descriptor": {
+                "type": "se_a",
+                "sel": [
+                    16,
+                    4
+                ],
+                "rcut_smth": 0.5,
+                "rcut": 5,
+                "neuron": [
+                    120,
+                    120,
+                    120
+                ],
+                "resnet_dt": true,
+                "axis_neuron": 12,
+                "seed": 1
+            },
+            "fitting_net": {
+                "neuron": [
+                    25,
+                    50,
+                    100
+                ],
+                "resnet_dt": false,
+                "seed": 1
+            }
+        },
+        "learning_rate": {
+            "type": "exp",
+            "start_lr": 0.001,
+            "decay_steps": 100,
+            "decay_rate": 0.95
+        },
+        "loss": {
+            "start_pref_e": 0.02,
+            "limit_pref_e": 2,
+            "start_pref_f": 1000,
+            "limit_pref_f": 1,
+            "start_pref_v": 0.0,
+            "limit_pref_v": 0.0
+        },
+        "training": {
+            "set_prefix": "set",
+            "stop_batch": 2000,
+            "batch_size": 1,
+            "disp_file": "lcurve.out",
+            "disp_freq": 1000,
+            "numb_test": 4,
+            "save_freq": 1000,
+            "save_ckpt": "model.ckpt",
+            "load_ckpt": "model.ckpt",
+            "disp_training": true,
+            "time_training": true,
+            "profiling": false,
+            "profiling_file": "timeline.json",
+            "_comment": "that's all"
+        }
+    },
+    "model_devi_dt": 0.002,
+    "model_devi_skip": 0,
+    "model_devi_f_trust_lo": 0.05,
+    "model_devi_f_trust_hi": 0.15,
+    "model_devi_e_trust_lo": 10000000000.0,
+    "model_devi_e_trust_hi": 10000000000.0,
+    "model_devi_clean_traj": true,
+    "model_devi_jobs": [
+        {
+            "sys_idx": [
+                0
+            ],
+            "temps": [
+                100
+            ],
+            "press": [
+                1.0
+            ],
+            "trj_freq": 10,
+            "nsteps": 300,
+            "ensemble": "nvt",
+            "_idx": "00"
+        },
+        {
+            "sys_idx": [
+                1
+            ],
+            "temps": [
+                100
+            ],
+            "press": [
+                1.0
+            ],
+            "trj_freq": 10,
+            "nsteps": 3000,
+            "ensemble": "nvt",
+            "_idx": "01"
+        }
+    ],
+    "fp_style": "vasp",
+    "shuffle_poscar": false,
+    "fp_task_max": 20,
+    "fp_task_min": 5,
+    "fp_pp_path": "/data1/yfb222333/2_dpgen_gpu_multi",
+    "fp_pp_files": [
+        "H/POTCAR",
+        "C/POTCAR"
+    ],
+    "fp_incar": "/data1/yfb222333/2_dpgen_gpu_multi/INCAR_methane"
+}

From 39671383a3b825fd04c3f53ce9dfb05cf3091c37 Mon Sep 17 00:00:00 2001
From: Yuan Fengbo <yuanfengbo888@pku.edu.cn>
Date: Fri, 6 Dec 2019 19:32:59 +0800
Subject: [PATCH 080/109] modify doc

---
 README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 3fb0931a8..5ee6ac3e1 100644
--- a/README.md
+++ b/README.md
@@ -1079,8 +1079,8 @@ The following table gives explicit descriptions on keys in param.json.
 | # Followings are keys in resources
 | numb_node | Integer | 1 | Node count required for the job
 | task_per_node | Integer | 4 | Number of CPU cores required
-| numb_gpu | Integer | 4 | Number of GPUs required
-| manual_cuda_devices | 1 | Used with key "manual_cuda_multiplicity" specify the gpu number
+| numb_gpu | Integer | Integer | 4 | Number of GPUs required
+| manual_cuda_devices | Interger | 1 | Used with key "manual_cuda_multiplicity" specify the gpu number
 | manual_cuda_multiplicity | 5 | Used in 01.model_devi,used with key "manual_cuda_devices" specify the MD program number running on one GPU  at the same time,dpgen will  automatically allocate MD jobs on different GPU. This can improve GPU usage for GPU like V100.
 | node_cpu | Integer | 4 | Only for LSF. The number of CPU cores on each node that should be allocated to the job.
 | source_list | List of string | "....../vasp.env" | Environment needed for certain job. For example, if "env" is in the list, 'source env' will be written in the script.

From 18450384f9657c67239f3c92ddd1b182eef4b14f Mon Sep 17 00:00:00 2001
From: Yuan Fengbo <yuanfengbo888@pku.edu.cn>
Date: Fri, 6 Dec 2019 19:36:20 +0800
Subject: [PATCH 081/109] add param_al_all_gpu-deepmd-kit-1.1.0.json && modify
 doc

---
 README.md                                     |   2 +-
 .../Al/param_al_all_gpu-deepmd-kit-1.1.0.json | 379 ++++++++++++++++++
 2 files changed, 380 insertions(+), 1 deletion(-)
 create mode 100644 examples/run/dp-lammps-vasp/Al/param_al_all_gpu-deepmd-kit-1.1.0.json

diff --git a/README.md b/README.md
index 5ee6ac3e1..d9fed8fe8 100644
--- a/README.md
+++ b/README.md
@@ -1081,7 +1081,7 @@ The following table gives explicit descriptions on keys in param.json.
 | task_per_node | Integer | 4 | Number of CPU cores required
 | numb_gpu | Integer | Integer | 4 | Number of GPUs required
 | manual_cuda_devices | Interger | 1 | Used with key "manual_cuda_multiplicity" specify the gpu number
-| manual_cuda_multiplicity | 5 | Used in 01.model_devi,used with key "manual_cuda_devices" specify the MD program number running on one GPU  at the same time,dpgen will  automatically allocate MD jobs on different GPU. This can improve GPU usage for GPU like V100.
+| manual_cuda_multiplicity |Interger | 5 | Used in 01.model_devi,used with key "manual_cuda_devices" specify the MD program number running on one GPU  at the same time,dpgen will  automatically allocate MD jobs on different GPU. This can improve GPU usage for GPU like V100.
 | node_cpu | Integer | 4 | Only for LSF. The number of CPU cores on each node that should be allocated to the job.
 | source_list | List of string | "....../vasp.env" | Environment needed for certain job. For example, if "env" is in the list, 'source env' will be written in the script.
 | module_list | List of string | [ "Intel/2018", "Anaconda3"] | For example, If "Intel/2018" is in the list, "module load Intel/2018" will be written in the script.
diff --git a/examples/run/dp-lammps-vasp/Al/param_al_all_gpu-deepmd-kit-1.1.0.json b/examples/run/dp-lammps-vasp/Al/param_al_all_gpu-deepmd-kit-1.1.0.json
new file mode 100644
index 000000000..cfe0b855d
--- /dev/null
+++ b/examples/run/dp-lammps-vasp/Al/param_al_all_gpu-deepmd-kit-1.1.0.json
@@ -0,0 +1,379 @@
+{
+    "type_map":		["Al"],
+    "mass_map":		[27],
+
+    "init_data_prefix":	"/data1/yfb222333/2_dpgen_gpu_multi/init/",
+
+    "init_data_sys":	[
+    "al.fcc.02x02x02/02.md/sys-0032/deepmd",
+    "al.hcp.02x02x02/02.md/sys-0016/deepmd",
+    "al.bcc.02x02x02/02.md/sys-0016/deepmd"
+			],
+    "init_batch_size":	[
+    1,
+    2,
+    2
+    ],
+    "sys_configs":	[
+
+    ["/data1/yfb222333/2_dpgen_gpu_multi/init/al.fcc.02x02x02/01.scale_pert/sys-0032/scale-1.000/00000[0-4]/POSCAR"],
+    ["/data1/yfb222333/2_dpgen_gpu_multi/init/al.fcc.02x02x02/01.scale_pert/sys-0032/scale-1.000/00000[5-9]/POSCAR"],
+    ["/data1/yfb222333/2_dpgen_gpu_multi/init/al.fcc.02x02x02/01.scale_pert/sys-0032/scale-1.000/00001*/POSCAR"],
+    ["/data1/yfb222333/2_dpgen_gpu_multi/init/al.fcc.02x02x02/01.scale_pert/sys-0032/scale-1.000/00002*/POSCAR"],
+    ["/data1/yfb222333/2_dpgen_gpu_multi/init/al.fcc.02x02x02/01.scale_pert/sys-0032/scale-1.000/00003*/POSCAR"],
+    ["/data1/yfb222333/2_dpgen_gpu_multi/init/al.fcc.02x02x02/01.scale_pert/sys-0032/scale-1.000/00004*/POSCAR"],
+    ["/data1/yfb222333/2_dpgen_gpu_multi/init/al.fcc.02x02x02/01.scale_pert/sys-0032/scale-1.000/00005*/POSCAR", 
+     "/data1/yfb222333/2_dpgen_gpu_multi/init/al.fcc.02x02x02/01.scale_pert/sys-0032/scale-1.000/00006*/POSCAR"],
+    ["/data1/yfb222333/2_dpgen_gpu_multi/init/al.fcc.02x02x02/01.scale_pert/sys-0032/scale-1.000/00007*/POSCAR", 
+     "/data1/yfb222333/2_dpgen_gpu_multi/init/al.fcc.02x02x02/01.scale_pert/sys-0032/scale-1.000/00008*/POSCAR"],
+
+    ["/data1/yfb222333/2_dpgen_gpu_multi/init/al.hcp.02x02x02/01.scale_pert/sys-0016/scale-1.000/00000[0-4]/POSCAR"],
+    ["/data1/yfb222333/2_dpgen_gpu_multi/init/al.hcp.02x02x02/01.scale_pert/sys-0016/scale-1.000/00000[5-9]/POSCAR"],
+    ["/data1/yfb222333/2_dpgen_gpu_multi/init/al.hcp.02x02x02/01.scale_pert/sys-0016/scale-1.000/00001*/POSCAR"],
+    ["/data1/yfb222333/2_dpgen_gpu_multi/init/al.hcp.02x02x02/01.scale_pert/sys-0016/scale-1.000/00002*/POSCAR"],
+    ["/data1/yfb222333/2_dpgen_gpu_multi/init/al.hcp.02x02x02/01.scale_pert/sys-0016/scale-1.000/00003*/POSCAR"],
+    ["/data1/yfb222333/2_dpgen_gpu_multi/init/al.hcp.02x02x02/01.scale_pert/sys-0016/scale-1.000/00004*/POSCAR"],
+    ["/data1/yfb222333/2_dpgen_gpu_multi/init/al.hcp.02x02x02/01.scale_pert/sys-0016/scale-1.000/00005*/POSCAR", 
+     "/data1/yfb222333/2_dpgen_gpu_multi/init/al.hcp.02x02x02/01.scale_pert/sys-0016/scale-1.000/00006*/POSCAR"],
+    ["/data1/yfb222333/2_dpgen_gpu_multi/init/al.hcp.02x02x02/01.scale_pert/sys-0016/scale-1.000/00007*/POSCAR", 
+     "/data1/yfb222333/2_dpgen_gpu_multi/init/al.hcp.02x02x02/01.scale_pert/sys-0016/scale-1.000/00008*/POSCAR"],
+
+    ["/data1/yfb222333/2_dpgen_gpu_multi/init/al.bcc.02x02x02/01.scale_pert/sys-0016/scale-1.000/00000[0-4]/POSCAR"],
+    ["/data1/yfb222333/2_dpgen_gpu_multi/init/al.bcc.02x02x02/01.scale_pert/sys-0016/scale-1.000/00000[5-9]/POSCAR"],
+    ["/data1/yfb222333/2_dpgen_gpu_multi/init/al.bcc.02x02x02/01.scale_pert/sys-0016/scale-1.000/00001*/POSCAR"],
+    ["/data1/yfb222333/2_dpgen_gpu_multi/init/al.bcc.02x02x02/01.scale_pert/sys-0016/scale-1.000/00002*/POSCAR"],
+    ["/data1/yfb222333/2_dpgen_gpu_multi/init/al.bcc.02x02x02/01.scale_pert/sys-0016/scale-1.000/00003*/POSCAR"],
+    ["/data1/yfb222333/2_dpgen_gpu_multi/init/al.bcc.02x02x02/01.scale_pert/sys-0016/scale-1.000/00004*/POSCAR"],
+    ["/data1/yfb222333/2_dpgen_gpu_multi/init/al.bcc.02x02x02/01.scale_pert/sys-0016/scale-1.000/00005*/POSCAR", 
+     "/data1/yfb222333/2_dpgen_gpu_multi/init/al.bcc.02x02x02/01.scale_pert/sys-0016/scale-1.000/00006*/POSCAR"],
+    ["/data1/yfb222333/2_dpgen_gpu_multi/init/al.bcc.02x02x02/01.scale_pert/sys-0016/scale-1.000/00007*/POSCAR", 
+     "/data1/yfb222333/2_dpgen_gpu_multi/init/al.bcc.02x02x02/01.scale_pert/sys-0016/scale-1.000/00008*/POSCAR"]
+    ],
+    "_comment":		" 00.train ",
+    "numb_models":	4,
+    "default_training_param" : {
+	"model":{
+	"_comment": " model parameters",
+	"type_map":["Al"],
+	"descriptor":{
+	"type":			"se_a",
+	"sel":			[300],
+	"rcut_smth":		2.00,
+	"rcut":			8.00,
+	"neuron":		[240, 240, 240],
+	"resnet_dt":		true,
+	"axis_neuron":		12,
+	"seed":			1
+},
+	"fitting_net":{
+	"neuron":		[25, 50, 100],
+	"resnet_dt":		false,
+	"sedd":			1
+}},
+	"learning_rate":{
+	"type":			"exp",
+	"start_lr":		0.001,
+	"decay_steps":		2000,
+	"decay_rate":		0.95
+},
+	"loss":{
+	"start_pref_e":		0.02,
+	"limit_pref_e":		2,
+	"start_pref_f":		1000,
+	"limit_pref_f":		1,
+	"start_pref_v":		0.0,
+	"limit_pref_v":		0.0
+},
+	"training":{
+	"coord_norm":		true,
+	"type_fitting_net":	false,
+	"_comment": " traing controls",
+	"systems":		[],
+	"set_prefix":		"set",
+	"stop_batch":		20000,
+	"batch_size":		1,
+	"seed":			0,
+	"_comment": " display and restart",
+	"_comment": " frequencies counted in batch",
+	"disp_file":		"lcurve.out",
+	"disp_freq":		2000,
+	"numb_test":		4,
+	"save_freq":		2000,
+	"save_ckpt":		"model.ckpt",
+	"load_ckpt":		"model.ckpt",
+	"disp_training":	true,
+	"time_training":	true,
+	"profiling":		false,
+	"profiling_file":	"timeline.json",
+	"_comment":		"that's all"}
+    },
+
+    "_comment":		" 01.model_devi ",
+    "_comment": "model_devi_skip: the first x of the recorded frames",
+    "model_devi_dt":		0.002,
+    "model_devi_skip":		0,
+    "model_devi_f_trust_lo":	0.05,
+    "model_devi_f_trust_hi":	0.20,
+    "model_devi_e_trust_lo":	1e10,
+    "model_devi_e_trust_hi":	1e10,
+    "model_devi_clean_traj":	false,
+    "model_devi_jobs":	
+[
+  {
+    "_idx": 0,
+    "ensemble": "npt",
+    "nsteps": 1000,
+    "press": [
+      1.0,
+      10.0,
+      100.0,
+      1000.0,
+      5000.0,
+      10000.0,
+      20000.0,
+      50000.0
+    ],
+    "sys_idx": [
+      0,
+      8,
+      16
+    ],
+    "temps": [
+      50,
+      132.0,
+      198.0,
+      264.0
+    ],
+    "trj_freq": 10
+  },
+  {
+    "_idx": 1,
+    "ensemble": "npt",
+    "nsteps": 1000,
+    "press": [
+      1.0,
+      10.0,
+      100.0,
+      1000.0,
+      5000.0,
+      10000.0,
+      20000.0,
+      50000.0
+    ],
+    "sys_idx": [
+      1,
+      9,
+      17
+    ],
+    "temps": [
+      50,
+      132.0,
+      198.0,
+      264.0
+    ],
+    "trj_freq": 10
+  },
+  {
+    "_idx": 2,
+    "ensemble": "npt",
+    "nsteps": 3000,
+    "press": [
+      1.0,
+      10.0,
+      100.0,
+      1000.0,
+      5000.0,
+      10000.0,
+      20000.0,
+      50000.0
+    ],
+    "sys_idx": [
+      2,
+      10,
+      18
+    ],
+    "temps": [
+      50,
+      132.0,
+      198.0,
+      264.0
+    ],
+    "trj_freq": 10
+  },
+  {
+    "_idx": 3,
+    "ensemble": "npt",
+    "nsteps": 3000,
+    "press": [
+      1.0,
+      10.0,
+      100.0,
+      1000.0,
+      5000.0,
+      10000.0,
+      20000.0,
+      50000.0
+    ],
+    "sys_idx": [
+      3,
+      11,
+      19
+    ],
+    "temps": [
+      50,
+      132.0,
+      198.0,
+      264.0
+    ],
+    "trj_freq": 10
+  },
+  {
+    "_idx": 4,
+    "ensemble": "npt",
+    "nsteps": 3000,
+    "press": [
+      1.0,
+      10.0,
+      100.0,
+      1000.0,
+      5000.0,
+      10000.0,
+      20000.0,
+      50000.0
+    ],
+    "sys_idx": [
+      4,
+      12,
+      20
+    ],
+    "temps": [
+      50,
+      132.0,
+      198.0,
+      264.0
+    ],
+    "trj_freq": 10
+  },
+  {
+    "_idx": 5,
+    "ensemble": "npt",
+    "nsteps": 3000,
+    "press": [
+      1.0,
+      10.0,
+      100.0,
+      1000.0,
+      5000.0,
+      10000.0,
+      20000.0,
+      50000.0
+    ],
+    "sys_idx": [
+      5,
+      13,
+      21
+    ],
+    "temps": [
+      50,
+      132.0,
+      198.0,
+      264.0
+    ],
+    "trj_freq": 10
+  },
+  {
+    "_idx": 6,
+    "ensemble": "npt",
+    "nsteps": 3000,
+    "press": [
+      1.0,
+      10.0,
+      100.0,
+      1000.0,
+      5000.0,
+      10000.0,
+      20000.0,
+      50000.0
+    ],
+    "sys_idx": [
+      6,
+      14,
+      22
+    ],
+    "temps": [
+      50,
+      132.0,
+      198.0,
+      264.0
+    ],
+    "trj_freq": 10
+  },
+  {
+    "_idx": 7,
+    "ensemble": "npt",
+    "nsteps": 3000,
+    "press": [
+      1.0,
+      10.0,
+      100.0,
+      1000.0,
+      5000.0,
+      10000.0,
+      20000.0,
+      50000.0
+    ],
+    "sys_idx": [
+      7,
+      15,
+      23
+    ],
+    "temps": [
+      50,
+      132.0,
+      198.0,
+      264.0
+    ],
+    "trj_freq": 10
+  },
+  {
+    "_idx": 8,
+    "ensemble": "npt",
+    "nsteps": 1000,
+    "press": [
+      1.0,
+      10.0,
+      100.0,
+      1000.0,
+      5000.0,
+      10000.0,
+      20000.0,
+      50000.0
+    ],
+    "sys_idx": [
+      0,
+      8,
+      16
+    ],
+    "temps": [
+      330.0,
+      396.0,
+      462.0,
+      528.0,
+      594.0
+    ],
+    "trj_freq": 10
+  }
+],
+
+
+    "_comment":		" 02.fp ",
+    "fp_style":		"vasp",
+    "shuffle_poscar":	false,
+    "fp_task_max":	300,
+    "fp_task_min":	5,
+    "fp_pp_path":	"/data1/yfb222333/2_dpgen_gpu_multi/POTCAR-Al",
+    "fp_pp_files":	["POTCAR"],
+    "fp_incar":         "/data1/yfb222333/2_dpgen_gpu_multi/INCAR_metal_scf_gpu",
+    "_comment":		" that's all "
+}
+

From 840b5a47a4d9aee23fb5f82bcae78fdcf2a8ed3e Mon Sep 17 00:00:00 2001
From: dingzhaohan <dingzhaohan@pku.edu.cn>
Date: Mon, 9 Dec 2019 00:55:20 +0800
Subject: [PATCH 082/109] Develop DP-GEN for Ali

---
 dpgen/dispatcher/ALI.py        |  57 ++++++++++
 dpgen/dispatcher/Dispatcher.py |  36 ++++++-
 dpgen/generator/run.py         | 186 +++++++++++++++++++++++++++------
 dpgen/remote/decide_machine.py |  34 ++++++
 4 files changed, 276 insertions(+), 37 deletions(-)
 create mode 100644 dpgen/dispatcher/ALI.py

diff --git a/dpgen/dispatcher/ALI.py b/dpgen/dispatcher/ALI.py
new file mode 100644
index 000000000..b7c033d3f
--- /dev/null
+++ b/dpgen/dispatcher/ALI.py
@@ -0,0 +1,57 @@
+from aliyunsdkecs.request.v20140526.DescribeInstancesRequest import DescribeInstancesRequest
+from aliyunsdkcore.client import AcsClient
+from aliyunsdkcore.acs_exception.exceptions import ClientException
+from aliyunsdkcore.acs_exception.exceptions import ServerException
+from aliyunsdkecs.request.v20140526.RunInstancesRequest import RunInstancesRequest
+from aliyunsdkecs.request.v20140526.DeleteInstancesRequest import DeleteInstancesRequest
+import time
+import json
+from dpgen.dispatcher.Batch import Batch
+from dpgen.dispatcher.JobStatus import JobStatus
+from dpgen.dispatcher.Shell import Shell
+from dpgen.dispatcher.SSHContext import SSHContext, SSHSession
+
+class ALI():
+    def __init__(self, adata):
+        self.ip_list = None
+        self.regionID = None
+        self.instance_list = None
+        self.AccessKey_ID = adata["AccessKey_ID"]
+        self.AccessKey_Secret = adata["AccessKey_Secret"]
+
+    def create_machine(self, instance_number, instance_type):
+        if True:
+            client = AcsClient(self.AccessKey_ID,self.AccessKey_Secret, 'cn-hangzhou')
+            request = RunInstancesRequest()
+            request.set_accept_format('json')
+            request.set_UniqueSuffix(True)
+            request.set_Password("975481DING!")
+            request.set_Amount(instance_number)
+            request.set_LaunchTemplateName(instance_type + '_cn-hangzhou_i')
+            response = client.do_action_with_exception(request)
+            response = json.loads(response)
+            self.instance_list = response["InstanceIdSets"]["InstanceIdSet"]
+            time.sleep(50)
+            request = DescribeInstancesRequest()
+            request.set_accept_format('json')
+            request.set_InstanceIds(self.instance_list)
+            response = client.do_action_with_exception(request)
+            response = json.loads(response)
+
+            ip = []
+            for i in range(len(response["Instances"]["Instance"])):
+                ip.append(response["Instances"]["Instance"][i]["PublicIpAddress"]['IpAddress'][0])
+            self.ip_list = ip
+            # print(self.ip_list, self.instance_list)
+            return self.ip_list, self.instance_list
+        else:
+            return "create failed"
+
+    def delete_machine(self, instance_id):
+        client = AcsClient(self.AccessKey_ID,self.AccessKey_Secret, 'cn-hangzhou')
+        request = DeleteInstancesRequest()
+        request.set_accept_format('json')
+        request.set_InstanceIds(instance_id)
+        request.set_Force(True)
+        response = client.do_action_with_exception(request)
+
diff --git a/dpgen/dispatcher/Dispatcher.py b/dpgen/dispatcher/Dispatcher.py
index 03aaf2a7a..95ad40a73 100644
--- a/dpgen/dispatcher/Dispatcher.py
+++ b/dpgen/dispatcher/Dispatcher.py
@@ -215,7 +215,7 @@ def all_finished(self,
 
 
 class JobRecord(object):
-    def __init__ (self, path, task_chunks, fname = 'job_record.json'):
+    def __init__ (self, path, task_chunks, fname = 'job_record.json', ip=None):
         self.path = os.path.abspath(path)
         self.fname = os.path.join(self.path, fname)
         self.task_chunks = task_chunks
@@ -232,9 +232,13 @@ def record_remote_context(self,
                               chunk_hash, 
                               local_root, 
                               remote_root, 
-                              job_uuid):
+                              job_uuid,
+                              ip=None):
         self.valid_hash(chunk_hash)
-        self.record[chunk_hash]['context'] = [local_root, remote_root, job_uuid]
+        if not ip:
+            self.record[chunk_hash]['context'] = [local_root, remote_root, job_uuid, ip]
+        else:
+            self.record[chunk_hash]['context'] = [local_root, remote_root, job_uuid]
 
     def get_uuid(self, chunk_hash):
         self.valid_hash(chunk_hash)
@@ -305,3 +309,29 @@ def make_dispatcher(mdata):
         context_type = 'lazy-local'
     disp = Dispatcher(mdata, context_type=context_type, batch_type=batch_type)
     return disp
+
+def make_dispatchers(num, mdata):
+    dispatchers = []
+    for i in range(num):
+        try:
+            hostname = mdata['hostname'][i]
+            context_type = 'ssh'
+        except:
+            context_type = 'local'
+        try:
+            batch_type = mdata['batch']
+        except:
+            dlog.info('cannot find key "batch" in machine file, try to use deprecated key "machine_type"')
+            batch_type = mdata['machine_type']
+        try:
+            lazy_local = mdata['lazy_local']
+        except:
+            lazy_local = False
+        if lazy_local and context_type == 'local':
+            dlog.info('Dispatcher switches to the lazy local mode')
+            context_type = 'lazy-local'
+        remote_profile = mdata.copy()
+        remote_profile['hostname'] = hostname
+        disp = Dispatcher(remote_profile, context_type=context_type, batch_type=batch_type, job_record='jr%d.json' %i)
+        dispatchers.append(disp)
+    return dispatchers
diff --git a/dpgen/generator/run.py b/dpgen/generator/run.py
index e95a428e9..2f6e1e464 100644
--- a/dpgen/generator/run.py
+++ b/dpgen/generator/run.py
@@ -52,7 +52,8 @@
 from dpgen.remote.group_jobs import group_slurm_jobs
 from dpgen.remote.group_jobs import group_local_jobs
 from dpgen.remote.decide_machine import decide_train_machine, decide_fp_machine, decide_model_devi_machine
-from dpgen.dispatcher.Dispatcher import Dispatcher, make_dispatcher
+from dpgen.dispatcher.Dispatcher import Dispatcher, make_dispatcher, make_dispatchers, _split_tasks
+from dpgen.dispatcher.ALI import ALI
 from dpgen.util import sepline
 from dpgen import ROOT_PATH
 from pymatgen.io.vasp import Incar,Kpoints,Potcar
@@ -340,6 +341,20 @@ def detect_batch_size(batch_size, system=None):
     else:
         raise RuntimeError("Unsupported batch size")
 
+def run_ALI(stage, num_of_instance, adata):
+    if stage == "train":
+        instance_type = "ecs.gn5-c8g1.2xlarge"
+    elif stage == "model_devi":
+        instance_type = "ecs.gn5-c8g1.2xlarge"
+    elif stage == "fp":
+        instance_type = "ecs.c6.2xlarge"
+    ali = ALI(adata)
+    return ali.create_machine(num_of_instance, instance_type)
+
+def exit_ALI(instance_id, adata):
+    ali = ALI(adata)
+    ali.delete_machine(instance_id)
+
 def run_train (iter_index,
                jdata,
                mdata,
@@ -444,16 +459,47 @@ def run_train (iter_index,
     except:
         train_group_size = 1
 
-    dispatcher.run_jobs(mdata['train_resources'],
-                        commands,
-                        work_path,
-                        run_tasks,
-                        train_group_size,
-                        trans_comm_data,
-                        forward_files,
-                        backward_files,
-                        outlog = 'train.log',
-                        errlog = 'train.log')
+    if mdata['train_machine']['type'] == 'ALI':
+        task_chunks = _split_tasks(run_tasks, train_group_size)
+        nchunks = len(task_chunks)
+        ip, instance_id = run_ALI('train', nchunks, mdata['ali_auth'])
+        mdata['train_machine']['hostname'] = ip
+        disp = make_dispatchers(nchunks, mdata['train_machine'])
+        job_handlers = []
+        for ii in range(nchunks):
+            job_handler = disp[ii].submit_jobs(mdata['train_resources'],
+                                 commands,
+                                 work_path,
+                                 task_chunks[ii],
+                                 train_group_size,
+                                 trans_comm_data,
+                                 forward_files,
+                                 backward_files,
+                                 outlog = 'train.log',
+                                 errlog = 'train.log')
+            job_handlers.append(job_handler)
+
+        while True:
+            cnt = 0
+            for ii in range(nchunks):
+                if disp[ii].all_finished(job_handlers[ii]):
+                    cnt += 1
+            if cnt == nchunks:
+                break
+            else:
+                time.sleep(10)
+        exit_ALI(instance_id, mdata['ali_auth'])
+    else:
+        dispatcher.run_jobs(mdata['train_resources'],
+                            commands,
+                            work_path,
+                            run_tasks,
+                            train_group_size,
+                            trans_comm_data,
+                            forward_files,
+                            backward_files,
+                            outlog = 'train.log',
+                            errlog = 'train.log')
 
 
 def post_train (iter_index,
@@ -903,16 +949,47 @@ def run_model_devi (iter_index,
         forward_files += ['input.plumed']
         backward_files += ['output.plumed']
 
-    dispatcher.run_jobs(mdata['model_devi_resources'],
-                        commands,
-                        work_path,
-                        run_tasks,
-                        model_devi_group_size,
-                        model_names,
-                        forward_files,
-                        backward_files,
-                        outlog = 'model_devi.log',
-                        errlog = 'model_devi.log')
+    if mdata['model_devi_machine']['type'] == 'ALI':
+        task_chunks = _split_tasks(run_tasks, model_devi_group_size)
+        nchunks = len(task_chunks)
+        ip, instance_id = run_ALI('model_devi', nchunks, mdata['ali_auth'])
+        mdata['model_devi_machine']['hostname'] = ip
+        disp = make_dispatchers(nchunks, mdata['model_devi_machine'])
+        job_handlers = []
+        for ii in range(nchunks):
+            job_handler = disp[ii].submit_jobs(mdata['model_devi_resources'],
+                                               commands,
+                                               work_path,
+                                               task_chunks[ii],
+                                               model_devi_group_size,
+                                               model_names,
+                                               forward_files,
+                                               backward_files,
+                                               outlog = 'model_devi.log',
+                                               errlog = 'model_devi.log')
+            job_handlers.append(job_handler)
+            
+        while True:
+            cnt = 0
+            for ii in range(nchunks):
+                if disp[ii].all_finished(job_handlers[ii]):
+                    cnt += 1
+            if cnt == nchunks:
+                break
+            else:
+                time.sleep(10)
+        exit_ALI(instance_id, mdata['ali_auth'])
+    else:
+        dispatcher.run_jobs(mdata['model_devi_resources'],
+                            commands,
+                            work_path,
+                            run_tasks,
+                            model_devi_group_size,
+                            model_names,
+                            forward_files,
+                            backward_files,
+                            outlog = 'model_devi.log',
+                            errlog = 'model_devi.log')
 
 
 def post_model_devi (iter_index,
@@ -1485,16 +1562,48 @@ def run_fp_inner (iter_index,
     #         fp_run_tasks.append(ii)
     run_tasks = [os.path.basename(ii) for ii in fp_run_tasks]
 
-    dispatcher.run_jobs(mdata['fp_resources'],
-                        [fp_command],
-                        work_path,
-                        run_tasks,
-                        fp_group_size,
-                        forward_common_files,
-                        forward_files,
-                        backward_files,
-                        outlog = log_file,
-                        errlog = log_file)
+    if mdata['fp_machine']['type'] == 'ALI':
+        task_chunks = _split_tasks(run_tasks, fp_group_size)
+        nchunks = len(task_chunks)
+        ip, instance_id = run_ALI('fp', nchunks, mdata['ali_auth'])
+        mdata['fp_machine']['hostname'] = ip
+        disp = make_dispatchers(nchunks, mdata['fp_machine'])
+        job_handlers = []
+        for ii in range(nchunks):
+            job_handler = disp[ii].submit_jobs(mdata['fp_resources'],
+                                               [fp_command],
+                                               work_path,
+                                               task_chunks[ii],
+                                               fp_group_size,
+                                               forward_common_files,
+                                               forward_files,
+                                               backward_files,
+                                               outlog = log_file,
+                                               errlog = log_file)
+            job_handlers.append(job_handler)
+
+        while True:
+            cnt = 0
+            for ii in range(nchunks):
+                if disp[ii].all_finished(job_handlers[ii]):
+                    cnt += 1
+            if cnt == nchunks:
+                break
+            else:
+                time.sleep(10)
+        exit_ALI(instance_id, mdata['ali_auth'])
+
+    else:
+        dispatcher.run_jobs(mdata['fp_resources'],
+                            [fp_command],
+                            work_path,
+                            run_tasks,
+                            fp_group_size,
+                            forward_common_files,
+                            forward_files,
+                            backward_files,
+                            outlog = log_file,
+                            errlog = log_file)
 
 
 def run_fp (iter_index,
@@ -1906,7 +2015,10 @@ def run_iter (param_file, machine_file) :
             elif jj == 1 :
                 log_iter ("run_train", ii, jj)
                 mdata  = decide_train_machine(mdata)
-                disp = make_dispatcher(mdata['train_machine'])
+                if mdata['train_machine']['type'] == 'ALI':
+                    disp = []
+                else:
+                    disp = make_dispatcher(mdata['train_machine'])
                 run_train  (ii, jdata, mdata, disp)
             elif jj == 2 :
                 log_iter ("post_train", ii, jj)
@@ -1919,7 +2031,10 @@ def run_iter (param_file, machine_file) :
             elif jj == 4 :
                 log_iter ("run_model_devi", ii, jj)
                 mdata = decide_model_devi_machine(mdata)
-                disp = make_dispatcher(mdata['model_devi_machine'])
+                if mdata['model_devi_machine']['type'] == 'ALI':
+                    disp = []
+                else:
+                    disp = make_dispatcher(mdata['model_devi_machine'])
                 run_model_devi (ii, jdata, mdata, disp)
             elif jj == 5 :
                 log_iter ("post_model_devi", ii, jj)
@@ -1930,7 +2045,10 @@ def run_iter (param_file, machine_file) :
             elif jj == 7 :
                 log_iter ("run_fp", ii, jj)
                 mdata = decide_fp_machine(mdata)
-                disp = make_dispatcher(mdata['fp_machine'])
+                if mdata['fp_machine']['type'] == 'ALI':
+                    disp = []
+                else:
+                    disp = make_dispatcher(mdata['fp_machine'])
                 run_fp (ii, jdata, mdata, disp)
             elif jj == 8 :
                 log_iter ("post_fp", ii, jj)
diff --git a/dpgen/remote/decide_machine.py b/dpgen/remote/decide_machine.py
index 3a0e3ecf3..ef756d06a 100644
--- a/dpgen/remote/decide_machine.py
+++ b/dpgen/remote/decide_machine.py
@@ -45,6 +45,21 @@ def decide_train_machine(mdata):
 	    		mdata["deepmd_version"] = mdata["train"][0]["deepmd_version"]
 	    	continue_flag = True
 
+	    if mdata["train"][0]["machine"]["type"] == "ALI":
+	    	mdata["train_machine"] = mdata["train"][0]["machine"]
+	    	mdata["train_resources"] = mdata["train"][0]["resources"]
+	    	if 'deepmd_path' in mdata["train"][0]:
+	    		mdata["deepmd_path"] = mdata["train"][0]["deepmd_path"]
+	    	elif 'python_path' in mdata["train"][0]:
+	    		mdata["python_path"] = mdata["train"][0]["python_path"]
+	    	if "group_size" in mdata["train"][0]:
+	    		mdata["train_group_size"] = mdata["train"][0]["group_size"]
+	    	if 'deepmd_version' in mdata["train"][0]:
+	    		mdata["deepmd_version"] = mdata["train"][0]["deepmd_version"]
+	    	mdata["ali_auth"] = mdata["ali_auth"]
+	    	mdata["train_command"] = mdata["train"][0]["command"]
+	    	continue_flag = True
+
 	    pd_flag = False
 	    pd_count_list =[]
 	    # pd for pending job in slurm
@@ -140,6 +155,15 @@ def decide_model_devi_machine(mdata):
 	    	#if "group_size" in mdata["train"][0]:
 	    	mdata["model_devi_group_size"] = mdata["model_devi"][0]["group_size"]
 	    	continue_flag = True
+
+	    if mdata["model_devi"][0]["machine"]["type"] == 'ALI':
+	    	mdata["model_devi_machine"] = mdata["model_devi"][0]["machine"]
+	    	mdata["model_devi_resources"] = mdata["model_devi"][0]["resources"]
+	    	mdata["lmp_command"] = mdata["model_devi"][0]["command"]
+	    	mdata["model_devi_group_size"] = mdata["model_devi"][0]["group_size"]
+	    	mdata["ali_auth"] = mdata["ali_auth"]
+	    	continue_flag = True
+
 	    pd_count_list =[]
 	    pd_flag = False
 	    if not continue_flag:
@@ -216,6 +240,16 @@ def decide_fp_machine(mdata):
 	    	#if "group_size" in mdata["train"][0]:
 	    	mdata["fp_group_size"] = mdata["fp"][0]["group_size"]
 	    	continue_flag = True
+
+	    if mdata["fp"][0]["machine"]["type"] == 'ALI':
+	    	mdata["fp_machine"] = mdata["fp"][0]["machine"]
+	    	mdata["fp_resources"] = mdata["fp"][0]["resources"]
+	    	mdata["fp_command"] = mdata["fp"][0]["command"]
+	    	#if "group_size" in mdata["train"][0]:
+	    	mdata["fp_group_size"] = mdata["fp"][0]["group_size"]
+	    	mdata["ali_auth"] = mdata["ali_auth"]
+	    	continue_flag = True
+
 	    pd_count_list =[]
 	    pd_flag = False
 	    if not continue_flag:

From b3996947fb2631fba1d9d0818e574f5295f4aff1 Mon Sep 17 00:00:00 2001
From: AnguseZhang <529133328@qq.com>
Date: Mon, 9 Dec 2019 01:05:01 +0800
Subject: [PATCH 083/109] Fix bug for ip record

---
 dpgen/dispatcher/Dispatcher.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dpgen/dispatcher/Dispatcher.py b/dpgen/dispatcher/Dispatcher.py
index 95ad40a73..8c71801f4 100644
--- a/dpgen/dispatcher/Dispatcher.py
+++ b/dpgen/dispatcher/Dispatcher.py
@@ -235,7 +235,7 @@ def record_remote_context(self,
                               job_uuid,
                               ip=None):
         self.valid_hash(chunk_hash)
-        if not ip:
+        if ip:
             self.record[chunk_hash]['context'] = [local_root, remote_root, job_uuid, ip]
         else:
             self.record[chunk_hash]['context'] = [local_root, remote_root, job_uuid]

From aef95bff2b061896d208386d48f0ffee51b82f82 Mon Sep 17 00:00:00 2001
From: dingzhaohan <dingzhaohan@pku.edu.cn>
Date: Mon, 9 Dec 2019 16:46:53 +0800
Subject: [PATCH 084/109] complete job restart

---
 dpgen/dispatcher/ALI.py        |  14 ++
 dpgen/dispatcher/Dispatcher.py |  25 ++-
 dpgen/generator/run.py         | 328 +++++++++++++++++++++++----------
 3 files changed, 265 insertions(+), 102 deletions(-)

diff --git a/dpgen/dispatcher/ALI.py b/dpgen/dispatcher/ALI.py
index b7c033d3f..e84230133 100644
--- a/dpgen/dispatcher/ALI.py
+++ b/dpgen/dispatcher/ALI.py
@@ -18,6 +18,7 @@ def __init__(self, adata):
         self.instance_list = None
         self.AccessKey_ID = adata["AccessKey_ID"]
         self.AccessKey_Secret = adata["AccessKey_Secret"]
+        self.strategy = adata["pay_strategy"]
 
     def create_machine(self, instance_number, instance_type):
         if True:
@@ -55,3 +56,16 @@ def delete_machine(self, instance_id):
         request.set_Force(True)
         response = client.do_action_with_exception(request)
 
+def run_ALI(stage, num_of_instance, strategy, adata):
+    if stage == "train":
+        instance_type = "ecs.gn5-c8g1.2xlarge"
+    elif stage == "model_devi":
+        instance_type = "ecs.gn5-c8g1.2xlarge"
+    elif stage == "fp":
+        instance_type = "ecs.c6.2xlarge"
+    ali = ALI(adata)
+    return ali.create_machine(num_of_instance, instance_type, strategy)
+
+def exit_ALI(instance_id, adata):
+    ali = ALI(adata)
+    ali.delete_machine(instance_id)
diff --git a/dpgen/dispatcher/Dispatcher.py b/dpgen/dispatcher/Dispatcher.py
index 8c71801f4..b4ff21c1e 100644
--- a/dpgen/dispatcher/Dispatcher.py
+++ b/dpgen/dispatcher/Dispatcher.py
@@ -153,10 +153,17 @@ def submit_jobs(self,
                     dlog.info('restart from old submission %s for chunk %s' % (job_uuid, cur_hash))
                 # record job and its remote context
                 job_list.append(rjob)
+                ip = None
+                instance_id = None
+                if self.remote_profile['type'] == 'ALI':
+                    ip = self.remote_profile['hostname']
+                    instance_id = self.remote_profile['instance_id']
                 job_record.record_remote_context(cur_hash,                                                 
                                                  context.local_root, 
                                                  context.remote_root, 
-                                                 job_uuid)
+                                                 job_uuid,
+                                                 ip,
+                                                 instance_id)
             else :
                 # finished job, append a None to list
                 job_list.append(None)
@@ -233,12 +240,10 @@ def record_remote_context(self,
                               local_root, 
                               remote_root, 
                               job_uuid,
-                              ip=None):
+                              ip=None,
+                              instance_id=None):
         self.valid_hash(chunk_hash)
-        if ip:
-            self.record[chunk_hash]['context'] = [local_root, remote_root, job_uuid, ip]
-        else:
-            self.record[chunk_hash]['context'] = [local_root, remote_root, job_uuid]
+        self.record[chunk_hash]['context'] = [local_root, remote_root, job_uuid, ip, instance_id]
 
     def get_uuid(self, chunk_hash):
         self.valid_hash(chunk_hash)
@@ -289,7 +294,7 @@ def _new_record(self):
             }
 
 
-def make_dispatcher(mdata):
+def make_dispatcher(mdata, job_record=None):
     try:
         hostname = mdata['hostname']
         context_type = 'ssh'
@@ -307,7 +312,7 @@ def make_dispatcher(mdata):
     if lazy_local and context_type == 'local':
         dlog.info('Dispatcher switches to the lazy local mode')
         context_type = 'lazy-local'
-    disp = Dispatcher(mdata, context_type=context_type, batch_type=batch_type)
+    disp = Dispatcher(mdata, context_type=context_type, batch_type=batch_type, job_record=job_record)
     return disp
 
 def make_dispatchers(num, mdata):
@@ -315,6 +320,7 @@ def make_dispatchers(num, mdata):
     for i in range(num):
         try:
             hostname = mdata['hostname'][i]
+            instance_id = mdata['instance_id'][i]
             context_type = 'ssh'
         except:
             context_type = 'local'
@@ -332,6 +338,7 @@ def make_dispatchers(num, mdata):
             context_type = 'lazy-local'
         remote_profile = mdata.copy()
         remote_profile['hostname'] = hostname
-        disp = Dispatcher(remote_profile, context_type=context_type, batch_type=batch_type, job_record='jr%d.json' %i)
+        remote_profile['instance_id'] = instance_id
+        disp = Dispatcher(remote_profile, context_type=context_type, batch_type=batch_type, job_record='jr.%.06d.json' %i)
         dispatchers.append(disp)
     return dispatchers
diff --git a/dpgen/generator/run.py b/dpgen/generator/run.py
index 2f6e1e464..faaa76cb3 100644
--- a/dpgen/generator/run.py
+++ b/dpgen/generator/run.py
@@ -53,7 +53,8 @@
 from dpgen.remote.group_jobs import group_local_jobs
 from dpgen.remote.decide_machine import decide_train_machine, decide_fp_machine, decide_model_devi_machine
 from dpgen.dispatcher.Dispatcher import Dispatcher, make_dispatcher, make_dispatchers, _split_tasks
-from dpgen.dispatcher.ALI import ALI
+try:
+    from dpgen.dispatcher.ALI import ALI, run_ALI, exit_ALI
 from dpgen.util import sepline
 from dpgen import ROOT_PATH
 from pymatgen.io.vasp import Incar,Kpoints,Potcar
@@ -341,20 +342,6 @@ def detect_batch_size(batch_size, system=None):
     else:
         raise RuntimeError("Unsupported batch size")
 
-def run_ALI(stage, num_of_instance, adata):
-    if stage == "train":
-        instance_type = "ecs.gn5-c8g1.2xlarge"
-    elif stage == "model_devi":
-        instance_type = "ecs.gn5-c8g1.2xlarge"
-    elif stage == "fp":
-        instance_type = "ecs.c6.2xlarge"
-    ali = ALI(adata)
-    return ali.create_machine(num_of_instance, instance_type)
-
-def exit_ALI(instance_id, adata):
-    ali = ALI(adata)
-    ali.delete_machine(instance_id)
-
 def run_train (iter_index,
                jdata,
                mdata,
@@ -462,33 +449,85 @@ def run_train (iter_index,
     if mdata['train_machine']['type'] == 'ALI':
         task_chunks = _split_tasks(run_tasks, train_group_size)
         nchunks = len(task_chunks)
-        ip, instance_id = run_ALI('train', nchunks, mdata['ali_auth'])
-        mdata['train_machine']['hostname'] = ip
-        disp = make_dispatchers(nchunks, mdata['train_machine'])
-        job_handlers = []
-        for ii in range(nchunks):
-            job_handler = disp[ii].submit_jobs(mdata['train_resources'],
-                                 commands,
-                                 work_path,
-                                 task_chunks[ii],
-                                 train_group_size,
-                                 trans_comm_data,
-                                 forward_files,
-                                 backward_files,
-                                 outlog = 'train.log',
-                                 errlog = 'train.log')
-            job_handlers.append(job_handler)
-
-        while True:
-            cnt = 0
+        cwd = os.getcwd()
+        os.chdir(work_path)
+        tmp_dispatchers = []
+        instance_id_list = []
+        if len(glob.glob('jr.*.json')) == nchunks:
             for ii in range(nchunks):
-                if disp[ii].all_finished(job_handlers[ii]):
-                    cnt += 1
-            if cnt == nchunks:
-                break
-            else:
-                time.sleep(10)
-        exit_ALI(instance_id, mdata['ali_auth'])
+                with open('jr.%.06d.json' %ii) as fp:
+                    job_record = json.load(fp)
+                    key = list(job_record.keys())[0]
+                    ip, instance_id = job_record[key]['context'][-2], job_record[key]['context'][-1]
+                    print(ip, instance_id)
+                    mdata['train_machine']['hostname'] = ip
+                    mdata['train_machine']['instance_id'] = instance_id
+                    instance_id_list.append(instance_id)
+                    disp = make_dispatcher(mdata['train_machine'], job_record='jr.%.06d.json' %ii)
+                    max_check = 10
+                    cnt = 0
+                    while not disp.session._check_alive():
+                        cnt += 1
+                        if cnt == max_check:
+                            break
+                    # print('cnt', cnt)
+                    if cnt != max_check:
+                        tmp_dispatchers.append(disp)
+        if len(tmp_dispatchers) == nchunks:
+            os.chdir(cwd)
+            job_handlers = []
+            for ii in range(nchunks):
+                job_handler = tmp_dispatchers[ii].submit_jobs(mdata['train_resources'],
+                                                              commands,
+                                                              work_path,
+                                                              task_chunks[ii],
+                                                              train_group_size,
+                                                              trans_comm_data,
+                                                              forward_files,
+                                                              backward_files,
+                                                              outlog = 'train.log',
+                                                              errlog = 'train.log')
+                job_handlers.append(job_handler)
+            while True:
+                cnt = 0
+                for ii in range(nchunks):
+                    if tmp_dispatchers[ii].all_finished(job_handlers[ii]):
+                        cnt += 1
+                if cnt == nchunks:
+                    break
+                else:
+                    time.sleep(10)
+            exit_ALI(instance_id_list, mdata['ali_auth'])  
+        else:
+            os.chdir(cwd)
+            ip, instance_id = run_ALI('train', nchunks, mdata['ali_auth'])
+            mdata['train_machine']['hostname'] = ip
+            mdata['train_machine']['instance_id'] = instance_id
+            disp = make_dispatchers(nchunks, mdata['train_machine'])
+            job_handlers = []
+            for ii in range(nchunks):
+                job_handler = disp[ii].submit_jobs(mdata['train_resources'],
+                                     commands,
+                                     work_path,
+                                     task_chunks[ii],
+                                     train_group_size,
+                                     trans_comm_data,
+                                     forward_files,
+                                     backward_files,
+                                     outlog = 'train.log',
+                                     errlog = 'train.log')
+                job_handlers.append(job_handler)
+
+            while True:
+                cnt = 0
+                for ii in range(nchunks):
+                    if disp[ii].all_finished(job_handlers[ii]):
+                        cnt += 1
+                if cnt == nchunks:
+                    break
+                else:
+                    time.sleep(10)
+            exit_ALI(instance_id, mdata['ali_auth'])
     else:
         dispatcher.run_jobs(mdata['train_resources'],
                             commands,
@@ -952,33 +991,85 @@ def run_model_devi (iter_index,
     if mdata['model_devi_machine']['type'] == 'ALI':
         task_chunks = _split_tasks(run_tasks, model_devi_group_size)
         nchunks = len(task_chunks)
-        ip, instance_id = run_ALI('model_devi', nchunks, mdata['ali_auth'])
-        mdata['model_devi_machine']['hostname'] = ip
-        disp = make_dispatchers(nchunks, mdata['model_devi_machine'])
-        job_handlers = []
-        for ii in range(nchunks):
-            job_handler = disp[ii].submit_jobs(mdata['model_devi_resources'],
-                                               commands,
-                                               work_path,
-                                               task_chunks[ii],
-                                               model_devi_group_size,
-                                               model_names,
-                                               forward_files,
-                                               backward_files,
-                                               outlog = 'model_devi.log',
-                                               errlog = 'model_devi.log')
-            job_handlers.append(job_handler)
-            
-        while True:
-            cnt = 0
+        cwd = os.getcwd()
+        os.chdir(work_path)
+        tmp_dispatchers = []
+        instance_id_list = []
+        if len(glob.glob('jr.*.json')) == nchunks:
             for ii in range(nchunks):
-                if disp[ii].all_finished(job_handlers[ii]):
-                    cnt += 1
-            if cnt == nchunks:
-                break
-            else:
-                time.sleep(10)
-        exit_ALI(instance_id, mdata['ali_auth'])
+                with open('jr.%.06d.json' %ii) as fp:
+                    job_record = json.load(fp)
+                    key = list(job_record.keys())[0]
+                    ip, instance_id = job_record[key]['context'][-2], job_record[key]['context'][-1]
+                    print(ip, instance_id)
+                    mdata['model_devi_machine']['hostname'] = ip
+                    mdata['model_devi_machine']['instance_id'] = instance_id
+                    instance_id_list.append(instance_id)
+                    disp = make_dispatcher(mdata['model_devi_machine'], job_record='jr.%.06d.json' %ii)
+                    max_check = 10
+                    cnt = 0
+                    while not disp.session._check_alive():
+                        cnt += 1
+                        if cnt == max_check:
+                            break
+                    # print('cnt', cnt)
+                    if cnt != max_check:
+                        tmp_dispatchers.append(disp)
+        if len(tmp_dispatchers) == nchunks:
+            os.chdir(cwd)
+            job_handlers = []
+            for ii in range(nchunks):
+                job_handler = tmp_dispatchers[ii].submit_jobs(mdata['model_devi_resources'],
+                                                              commands,
+                                                              work_path,
+                                                              task_chunks[ii],
+                                                              model_devi_group_size,
+                                                              model_names,
+                                                              forward_files,
+                                                              backward_files,
+                                                              outlog = 'model_devi.log',
+                                                              errlog = 'model_devi.log')
+                job_handlers.append(job_handler)
+            while True:
+                cnt = 0
+                for ii in range(nchunks):
+                    if tmp_dispatchers[ii].all_finished(job_handlers[ii]):
+                        cnt += 1
+                if cnt == nchunks:
+                    break
+                else:
+                    time.sleep(10)
+            exit_ALI(instance_id_list, mdata['ali_auth'])  
+        else:
+            os.chdir(cwd)
+            ip, instance_id = run_ALI('model_devi', nchunks, mdata['ali_auth'])
+            mdata['model_devi_machine']['hostname'] = ip
+            mdata['model_devi_machine']['instance_id'] = instance_id
+            disp = make_dispatchers(nchunks, mdata['model_devi_machine'])
+            job_handlers = []
+            for ii in range(nchunks):
+                job_handler = disp[ii].submit_jobs(mdata['model_devi_resources'],
+                                                   commands,
+                                                   work_path,
+                                                   task_chunks[ii],
+                                                   model_devi_group_size,
+                                                   model_names,
+                                                   forward_files,
+                                                   backward_files,
+                                                   outlog = 'model_devi.log',
+                                                   errlog = 'model_devi.log')
+                job_handlers.append(job_handler)
+                
+            while True:
+                cnt = 0
+                for ii in range(nchunks):
+                    if disp[ii].all_finished(job_handlers[ii]):
+                        cnt += 1
+                if cnt == nchunks:
+                    break
+                else:
+                    time.sleep(10)
+            exit_ALI(instance_id, mdata['ali_auth'])
     else:
         dispatcher.run_jobs(mdata['model_devi_resources'],
                             commands,
@@ -1565,33 +1656,84 @@ def run_fp_inner (iter_index,
     if mdata['fp_machine']['type'] == 'ALI':
         task_chunks = _split_tasks(run_tasks, fp_group_size)
         nchunks = len(task_chunks)
-        ip, instance_id = run_ALI('fp', nchunks, mdata['ali_auth'])
-        mdata['fp_machine']['hostname'] = ip
-        disp = make_dispatchers(nchunks, mdata['fp_machine'])
-        job_handlers = []
-        for ii in range(nchunks):
-            job_handler = disp[ii].submit_jobs(mdata['fp_resources'],
-                                               [fp_command],
-                                               work_path,
-                                               task_chunks[ii],
-                                               fp_group_size,
-                                               forward_common_files,
-                                               forward_files,
-                                               backward_files,
-                                               outlog = log_file,
-                                               errlog = log_file)
-            job_handlers.append(job_handler)
-
-        while True:
-            cnt = 0
+        cwd = os.getcwd()
+        os.chdir(work_path)
+        tmp_dispatchers = []
+        instance_id_list = []
+        if len(glob.glob('jr.*.json')) == nchunks:
             for ii in range(nchunks):
-                if disp[ii].all_finished(job_handlers[ii]):
-                    cnt += 1
-            if cnt == nchunks:
-                break
-            else:
-                time.sleep(10)
-        exit_ALI(instance_id, mdata['ali_auth'])
+                with open('jr.%.06d.json' %ii) as fp:
+                    job_record = json.load(fp)
+                    key = list(job_record.keys())[0]
+                    ip, instance_id = job_record[key]['context'][-2], job_record[key]['context'][-1]
+                    print(ip, instance_id)
+                    mdata['fp_machine']['hostname'] = ip
+                    mdata['fp_machine']['instance_id'] = instance_id
+                    instance_id_list.append(instance_id)
+                    disp = make_dispatcher(mdata['fp_machine'], job_record='jr.%.06d.json' %ii)
+                    max_check = 10
+                    cnt = 0
+                    while not disp.session._check_alive():
+                        cnt += 1
+                        if cnt == max_check:
+                            break
+                    # print('cnt', cnt)
+                    if cnt != max_check:
+                        tmp_dispatchers.append(disp)
+        if len(tmp_dispatchers) == nchunks:
+            os.chdir(cwd)
+            job_handlers = []
+            for ii in range(nchunks):
+                job_handler = tmp_dispatchers[ii].submit_jobs(mdata['fp_resources'],
+                                                              [fp_command],
+                                                              work_path,
+                                                              task_chunks[ii],
+                                                              fp_group_size,
+                                                              forward_common_files,
+                                                              forward_files,
+                                                              backward_files,
+                                                              outlog = log_file,
+                                                              errlog = log_file)
+                job_handlers.append(job_handler)
+            while True:
+                cnt = 0
+                for ii in range(nchunks):
+                    if tmp_dispatchers[ii].all_finished(job_handlers[ii]):
+                        cnt += 1
+                if cnt == nchunks:
+                    break
+                else:
+                    time.sleep(10)
+            exit_ALI(instance_id_list, mdata['ali_auth'])  
+        else:
+            os.chdir(cwd)
+            ip, instance_id = run_ALI('fp', nchunks, mdata['ali_auth'])
+            mdata['fp_machine']['hostname'] = ip
+            mdata['fp_machine']['instance_id'] = instance_id
+            disp = make_dispatchers(nchunks, mdata['fp_machine'])
+            job_handlers = []
+            for ii in range(nchunks):
+                job_handler = disp[ii].submit_jobs(mdata['fp_resources'],
+                                                   [fp_command],
+                                                   work_path,
+                                                   task_chunks[ii],
+                                                   fp_group_size,
+                                                   forward_common_files,
+                                                   forward_files,
+                                                   backward_files,
+                                                   outlog = log_file,
+                                                   errlog = log_file)
+                job_handlers.append(job_handler)
+            while True:
+                cnt = 0
+                for ii in range(nchunks):
+                    if disp[ii].all_finished(job_handlers[ii]):
+                        cnt += 1
+                if cnt == nchunks:
+                    break
+                else:
+                    time.sleep(10)
+            exit_ALI(instance_id, mdata['ali_auth'])
 
     else:
         dispatcher.run_jobs(mdata['fp_resources'],

From 4317c6523a42bbef7fda318ef92766749a742c1b Mon Sep 17 00:00:00 2001
From: dingzhaohan <dingzhaohan@pku.edu.cn>
Date: Mon, 9 Dec 2019 17:07:02 +0800
Subject: [PATCH 085/109] fix import bug

---
 dpgen/generator/run.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/dpgen/generator/run.py b/dpgen/generator/run.py
index faaa76cb3..4725032ca 100644
--- a/dpgen/generator/run.py
+++ b/dpgen/generator/run.py
@@ -55,6 +55,8 @@
 from dpgen.dispatcher.Dispatcher import Dispatcher, make_dispatcher, make_dispatchers, _split_tasks
 try:
     from dpgen.dispatcher.ALI import ALI, run_ALI, exit_ALI
+except:
+    pass
 from dpgen.util import sepline
 from dpgen import ROOT_PATH
 from pymatgen.io.vasp import Incar,Kpoints,Potcar

From 0f9cace453baf5f5a37609e3aa0bd0472a56e574 Mon Sep 17 00:00:00 2001
From: dingzhaohan <dingzhaohan@pku.edu.cn>
Date: Mon, 9 Dec 2019 17:24:27 +0800
Subject: [PATCH 086/109] set ssh login passwd

---
 dpgen/dispatcher/ALI.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/dpgen/dispatcher/ALI.py b/dpgen/dispatcher/ALI.py
index e84230133..a9eab9bfa 100644
--- a/dpgen/dispatcher/ALI.py
+++ b/dpgen/dispatcher/ALI.py
@@ -19,6 +19,7 @@ def __init__(self, adata):
         self.AccessKey_ID = adata["AccessKey_ID"]
         self.AccessKey_Secret = adata["AccessKey_Secret"]
         self.strategy = adata["pay_strategy"]
+        self.pwd = adata["pwd"]
 
     def create_machine(self, instance_number, instance_type):
         if True:
@@ -26,7 +27,7 @@ def create_machine(self, instance_number, instance_type):
             request = RunInstancesRequest()
             request.set_accept_format('json')
             request.set_UniqueSuffix(True)
-            request.set_Password("975481DING!")
+            request.set_Password(self.pwd)
             request.set_Amount(instance_number)
             request.set_LaunchTemplateName(instance_type + '_cn-hangzhou_i')
             response = client.do_action_with_exception(request)
@@ -43,7 +44,6 @@ def create_machine(self, instance_number, instance_type):
             for i in range(len(response["Instances"]["Instance"])):
                 ip.append(response["Instances"]["Instance"][i]["PublicIpAddress"]['IpAddress'][0])
             self.ip_list = ip
-            # print(self.ip_list, self.instance_list)
             return self.ip_list, self.instance_list
         else:
             return "create failed"
@@ -56,7 +56,7 @@ def delete_machine(self, instance_id):
         request.set_Force(True)
         response = client.do_action_with_exception(request)
 
-def run_ALI(stage, num_of_instance, strategy, adata):
+def run_ALI(stage, num_of_instance, adata):
     if stage == "train":
         instance_type = "ecs.gn5-c8g1.2xlarge"
     elif stage == "model_devi":
@@ -64,7 +64,7 @@ def run_ALI(stage, num_of_instance, strategy, adata):
     elif stage == "fp":
         instance_type = "ecs.c6.2xlarge"
     ali = ALI(adata)
-    return ali.create_machine(num_of_instance, instance_type, strategy)
+    return ali.create_machine(num_of_instance, instance_type)
 
 def exit_ALI(instance_id, adata):
     ali = ALI(adata)

From acb10963c5034aff80718122dc7739a35a2520c8 Mon Sep 17 00:00:00 2001
From: dingzhaohan <dingzhaohan@pku.edu.cn>
Date: Mon, 9 Dec 2019 18:38:07 +0800
Subject: [PATCH 087/109] ""

---
 dpgen/generator/run.py | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/dpgen/generator/run.py b/dpgen/generator/run.py
index 4725032ca..8ff72896a 100644
--- a/dpgen/generator/run.py
+++ b/dpgen/generator/run.py
@@ -457,22 +457,21 @@ def run_train (iter_index,
         instance_id_list = []
         if len(glob.glob('jr.*.json')) == nchunks:
             for ii in range(nchunks):
-                with open('jr.%.06d.json' %ii) as fp:
+                with open('jr.%.06d.json' % ii) as fp:
                     job_record = json.load(fp)
                     key = list(job_record.keys())[0]
                     ip, instance_id = job_record[key]['context'][-2], job_record[key]['context'][-1]
-                    print(ip, instance_id)
+                    # print(ip, instance_id)
                     mdata['train_machine']['hostname'] = ip
                     mdata['train_machine']['instance_id'] = instance_id
                     instance_id_list.append(instance_id)
-                    disp = make_dispatcher(mdata['train_machine'], job_record='jr.%.06d.json' %ii)
+                    disp = make_dispatcher(mdata['train_machine'], job_record='jr.%.06d.json' % ii)
                     max_check = 10
                     cnt = 0
                     while not disp.session._check_alive():
                         cnt += 1
                         if cnt == max_check:
                             break
-                    # print('cnt', cnt)
                     if cnt != max_check:
                         tmp_dispatchers.append(disp)
         if len(tmp_dispatchers) == nchunks:
@@ -1003,7 +1002,7 @@ def run_model_devi (iter_index,
                     job_record = json.load(fp)
                     key = list(job_record.keys())[0]
                     ip, instance_id = job_record[key]['context'][-2], job_record[key]['context'][-1]
-                    print(ip, instance_id)
+                    # print(ip, instance_id)
                     mdata['model_devi_machine']['hostname'] = ip
                     mdata['model_devi_machine']['instance_id'] = instance_id
                     instance_id_list.append(instance_id)
@@ -1668,7 +1667,7 @@ def run_fp_inner (iter_index,
                     job_record = json.load(fp)
                     key = list(job_record.keys())[0]
                     ip, instance_id = job_record[key]['context'][-2], job_record[key]['context'][-1]
-                    print(ip, instance_id)
+                    # print(ip, instance_id)
                     mdata['fp_machine']['hostname'] = ip
                     mdata['fp_machine']['instance_id'] = instance_id
                     instance_id_list.append(instance_id)

From d85ef379759b5bcfc904bac8670d77ab3faa5b0a Mon Sep 17 00:00:00 2001
From: Han Wang <wang_han@iapcm.ac.cn>
Date: Tue, 10 Dec 2019 15:51:16 +0800
Subject: [PATCH 088/109] try to test with python 3.7

---
 .travis.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.travis.yml b/.travis.yml
index f73b75720..bb970bb61 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -2,6 +2,7 @@ language: python
 python:
   - "3.6.3"
   - "3.6-dev"  # 3.6 development branch
+  - "3.7"
 # command to install dependencies
 env:
   matrix:

From d9b5ae4f661ff93c5f0889fd93e933ee3aaa5c36 Mon Sep 17 00:00:00 2001
From: Han Wang <wang_han@iapcm.ac.cn>
Date: Tue, 10 Dec 2019 17:08:12 +0800
Subject: [PATCH 089/109] update the requirement for pymatgen due to pyhton 3.7

---
 .travis.yml | 5 +----
 setup.py    | 2 +-
 2 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index bb970bb61..b0963bb18 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,13 +1,10 @@
 language: python
 python:
-  - "3.6.3"
-  - "3.6-dev"  # 3.6 development branch
+  - "3.6"
   - "3.7"
 # command to install dependencies
 env:
   matrix:
-  - PYMATGEN_VERSION=2017.9.1
-  - PYMATGEN_VERSION=2018.1.19
   - PYMATGEN_VERSION=2019.1.13
   - PYMATGEN_VERSION=2019.7.30
 before_install:
diff --git a/setup.py b/setup.py
index 325b2b005..71286b666 100755
--- a/setup.py
+++ b/setup.py
@@ -17,7 +17,7 @@
 with open(path.join('dpgen', '_date.py'), 'w') as fp :
     fp.write('date = \'%s\'' % today)
 
-install_requires=['numpy>=1.14.3', 'dpdata>=0.1.12', 'pymatgen>=2017.9.1', 'ase', 'monty>2.0.0', 'paramiko', 'custodian']
+install_requires=['numpy>=1.14.3', 'dpdata>=0.1.12', 'pymatgen>=2019.1.13', 'ase', 'monty>2.0.0', 'paramiko', 'custodian']
 
 setuptools.setup(
     name=NAME,

From 383967be30a1e2c0231c58d7e3c231a66c77f772 Mon Sep 17 00:00:00 2001
From: dingzhaohan <dingzhaohan@pku.edu.cn>
Date: Fri, 13 Dec 2019 12:54:31 +0800
Subject: [PATCH 090/109] optimize ali code

---
 dpgen/dispatcher/Dispatcher.py | 165 ++++++++++++++++++
 dpgen/generator/run.py         | 307 +++++++--------------------------
 2 files changed, 232 insertions(+), 240 deletions(-)

diff --git a/dpgen/dispatcher/Dispatcher.py b/dpgen/dispatcher/Dispatcher.py
index b4ff21c1e..bc8672dae 100644
--- a/dpgen/dispatcher/Dispatcher.py
+++ b/dpgen/dispatcher/Dispatcher.py
@@ -342,3 +342,168 @@ def make_dispatchers(num, mdata):
         disp = Dispatcher(remote_profile, context_type=context_type, batch_type=batch_type, job_record='jr.%.06d.json' %i)
         dispatchers.append(disp)
     return dispatchers
+
+def ali_start_jobs(stage,
+                   mdata,
+                   run_tasks,
+                   work_path,
+                   cwd,
+                   commands,
+                   trans_comm_data = None,
+                   train_group_size = None,
+                   model_devi_group_size = None,
+                   fp_group_size = None,
+                   model_names = None,
+                   forward_files,
+                   backward_files,
+                   log_file = None)
+    os.chdir(cwd)
+    task_chunks = _split_tasks(run_tasks)
+    nchunks = len(task_chunks)
+    ip, instance_id = run_ALI('stage', nchunks, mdata['ali_auth'])
+    mdata[stage + '_machine']['hostname'] = ip
+    mdata[stage + '_machine']['instance_id'] = instance_id
+    disp = make_dispatchers(nchunks, mdata[stage + '_machine'])
+    job_handlers = []
+    if stage == 'train':
+        for ii in range(nchunks):
+            job_handler = disp[ii].submit_jobs(mdata['fp_resources'],
+                                               [fp_command],
+                                               work_path,
+                                               task_chunks[ii],
+                                               fp_group_size,
+                                               forward_common_files,                                               forward_files,
+                                               backward_files,
+                                               outlog = log_file,
+                                               errlog = log_file)
+            job_handlers.append(job_handler)
+    elif stage == 'model_devi':
+        for ii in range(nchunks):
+            job_handler = disp[ii].submit_jobs(mdata['fp_resources'],
+                                               [fp_command],
+                                               work_path,
+                                               task_chunks[ii],
+                                               fp_group_size,
+                                               forward_common_files,                                               forward_files,
+                                               backward_files,
+                                               outlog = log_file,
+                                               errlog = log_file)
+            job_handlers.append(job_handler)
+    elif stage == 'fp':
+        for ii in range(nchunks):
+            job_handler = disp[ii].submit_jobs(mdata['fp_resources'],
+                                               [fp_command],
+                                               work_path,
+                                               task_chunks[ii],
+                                               fp_group_size,
+                                               forward_common_files,                                               forward_files,
+                                               backward_files,
+                                               outlog = log_file,
+                                               errlog = log_file)
+            job_handlers.append(job_handler)
+    while True:
+        cnt = 0
+        for ii in range(nchunks):
+            if disp[ii].all_finished(job_handlers[ii]):
+                cnt += 1
+        if cnt == nchunks:
+            break
+        else:
+            time.sleep(10)
+    exit_ALI(instance_id, mdata['ali_auth'])
+
+def ali_restart_jobs(stage,
+                     cwd,
+                     mdata,
+                     commands,
+                     work_path,
+                     run_tasks,
+                     train_group_size = None,
+                     model_devi_group_size = None,
+                     fp_group_size = None,
+                     trans_comm_data = None,
+                     model_names = None,
+                     forward_common_files = None,
+                     forward_files,
+                     backward_files,
+                     log_file=None):
+    task_chunks = _split_tasks(run_tasks, stage + '_group_size')
+    nchunks = len(task_chunks)
+    os.chdir(work_path)
+    tmp_dispatchers = []
+    instance_id_list = []
+    if len(glob.glob('jr.*.json')) == nchunks:
+        for ii in range(chunks):
+            with open('jr.%.06d.json' % ii) as fp:
+                job_record = json.load(fp)
+                key = list(job_record.keys())[0]
+                ip, instance_id = job_record[key]['context'][-2], job_record[key]['context'][-1]
+                # print(ip, instance_id)
+                mdata[stage + '_machine']['hostname'] = ip
+                mdata[stage + '_machine']['instance_id'] = instance_id
+                instance_id_list.append(instance_id)
+                disp = make_dispatcher(mdata[stage + '_machine'], job_record='jr.%.06d.json' %ii)
+                max_check = 10
+                cnt = 0
+                while not disp.session._check_alive():
+                    cnt += 1
+                    if cnt == max_check:
+                        break
+                if cnt != max_check:
+                    tmp_dispatchers.append(disp)
+    restart = False
+    if len(tmp_dispatchers) == nchunks:
+        restart = True
+        os.chdir(cwd)
+        job_handlers = []
+        if stage == 'train':
+            for ii in range(nchunks):
+                job_handler = tmp_dispatchers[ii].submit_jobs(mdata['train_resources'],
+                                                              commands,
+                                                              work_path,
+                                                              task_chunks[ii],
+                                                              train_group_size,
+                                                              trans_comm_data,
+                                                              forward_files,
+                                                              backward_files,
+                                                              outlog = 'train.log',
+                                                              errlog = 'train.log')
+                job_handlers.append(job_handler)
+        elif stage == 'model_devi':
+            for ii in range(nchunks):
+                job_handler = tmp_dispatchers[ii].submit_jobs(mdata['model_devi_resources'],
+                                                              commands,
+                                                              work_path,
+                                                              task_chunks[ii],
+                                                              model_devi_group_size,
+                                                              model_names,
+                                                              forward_files,
+                                                              backward_files,
+                                                              outlog = 'model_devi.log',
+                                                              errlog = 'model_devi.log')
+                job_handlers.append(job_handler)
+        elif stage == 'fp':
+            for ii in range(nchunks):
+                job_handler = tmp_dispatchers[ii].submit_jobs(mdata['fp_resources'],
+                                                              commands,
+                                                              work_path,
+                                                              task_chunks[ii],
+                                                              fp_group_size,
+                                                              forward_common_files,
+                                                              forward_files,
+                                                              backward_files,
+                                                              outlog = log_file,
+                                                              errlog = log_file,
+                                                              )
+                job_handlers.append(job_handler)
+        while True:
+            cnt = 0
+            for ii in range(nchunks):
+                if tmp_dispatchers[ii].all_finished(job_handlers[ii]):
+                    cnt += 1
+            if cnt == nchunks:
+                break
+            else:
+                time.sleep(10)
+        exit_ALI(instance_id_list, mdata['ali_auth'])
+        return restart
diff --git a/dpgen/generator/run.py b/dpgen/generator/run.py
index 8ff72896a..65f9d9646 100644
--- a/dpgen/generator/run.py
+++ b/dpgen/generator/run.py
@@ -449,86 +449,28 @@ def run_train (iter_index,
         train_group_size = 1
 
     if mdata['train_machine']['type'] == 'ALI':
-        task_chunks = _split_tasks(run_tasks, train_group_size)
-        nchunks = len(task_chunks)
-        cwd = os.getcwd()
-        os.chdir(work_path)
-        tmp_dispatchers = []
-        instance_id_list = []
-        if len(glob.glob('jr.*.json')) == nchunks:
-            for ii in range(nchunks):
-                with open('jr.%.06d.json' % ii) as fp:
-                    job_record = json.load(fp)
-                    key = list(job_record.keys())[0]
-                    ip, instance_id = job_record[key]['context'][-2], job_record[key]['context'][-1]
-                    # print(ip, instance_id)
-                    mdata['train_machine']['hostname'] = ip
-                    mdata['train_machine']['instance_id'] = instance_id
-                    instance_id_list.append(instance_id)
-                    disp = make_dispatcher(mdata['train_machine'], job_record='jr.%.06d.json' % ii)
-                    max_check = 10
-                    cnt = 0
-                    while not disp.session._check_alive():
-                        cnt += 1
-                        if cnt == max_check:
-                            break
-                    if cnt != max_check:
-                        tmp_dispatchers.append(disp)
-        if len(tmp_dispatchers) == nchunks:
-            os.chdir(cwd)
-            job_handlers = []
-            for ii in range(nchunks):
-                job_handler = tmp_dispatchers[ii].submit_jobs(mdata['train_resources'],
-                                                              commands,
-                                                              work_path,
-                                                              task_chunks[ii],
-                                                              train_group_size,
-                                                              trans_comm_data,
-                                                              forward_files,
-                                                              backward_files,
-                                                              outlog = 'train.log',
-                                                              errlog = 'train.log')
-                job_handlers.append(job_handler)
-            while True:
-                cnt = 0
-                for ii in range(nchunks):
-                    if tmp_dispatchers[ii].all_finished(job_handlers[ii]):
-                        cnt += 1
-                if cnt == nchunks:
-                    break
-                else:
-                    time.sleep(10)
-            exit_ALI(instance_id_list, mdata['ali_auth'])  
+        if ali_restart_jobs(stage = 'train',
+                            cwd = cwd,
+                            mdata = mdata,
+                            commands = commands,
+                            work_path = work_path,
+                            run_tasks = run_tasks,
+                            train_group_size = train_group_size,
+                            trans_comm_data = trans_comm_data,
+                            forward_files = forward_files,
+                            backward_files = backward_files):
+            pass
         else:
-            os.chdir(cwd)
-            ip, instance_id = run_ALI('train', nchunks, mdata['ali_auth'])
-            mdata['train_machine']['hostname'] = ip
-            mdata['train_machine']['instance_id'] = instance_id
-            disp = make_dispatchers(nchunks, mdata['train_machine'])
-            job_handlers = []
-            for ii in range(nchunks):
-                job_handler = disp[ii].submit_jobs(mdata['train_resources'],
-                                     commands,
-                                     work_path,
-                                     task_chunks[ii],
-                                     train_group_size,
-                                     trans_comm_data,
-                                     forward_files,
-                                     backward_files,
-                                     outlog = 'train.log',
-                                     errlog = 'train.log')
-                job_handlers.append(job_handler)
-
-            while True:
-                cnt = 0
-                for ii in range(nchunks):
-                    if disp[ii].all_finished(job_handlers[ii]):
-                        cnt += 1
-                if cnt == nchunks:
-                    break
-                else:
-                    time.sleep(10)
-            exit_ALI(instance_id, mdata['ali_auth'])
+            ali_start_jobs(stasge = 'train',
+                           cwd = cwd,
+                           mdata = mdata,
+                           commands = commands,
+                           work_path = work_path,
+                           run_tasks = run_tasks,
+                           train_group_size = train_group_size,
+                           trans_comm_data = trans_comm_data,
+                           forward_files = forward_files,
+                           backward_files = backward_files)
     else:
         dispatcher.run_jobs(mdata['train_resources'],
                             commands,
@@ -989,88 +931,28 @@ def run_model_devi (iter_index,
         forward_files += ['input.plumed']
         backward_files += ['output.plumed']
 
+    cwd = os.getcwd()
     if mdata['model_devi_machine']['type'] == 'ALI':
-        task_chunks = _split_tasks(run_tasks, model_devi_group_size)
-        nchunks = len(task_chunks)
-        cwd = os.getcwd()
-        os.chdir(work_path)
-        tmp_dispatchers = []
-        instance_id_list = []
-        if len(glob.glob('jr.*.json')) == nchunks:
-            for ii in range(nchunks):
-                with open('jr.%.06d.json' %ii) as fp:
-                    job_record = json.load(fp)
-                    key = list(job_record.keys())[0]
-                    ip, instance_id = job_record[key]['context'][-2], job_record[key]['context'][-1]
-                    # print(ip, instance_id)
-                    mdata['model_devi_machine']['hostname'] = ip
-                    mdata['model_devi_machine']['instance_id'] = instance_id
-                    instance_id_list.append(instance_id)
-                    disp = make_dispatcher(mdata['model_devi_machine'], job_record='jr.%.06d.json' %ii)
-                    max_check = 10
-                    cnt = 0
-                    while not disp.session._check_alive():
-                        cnt += 1
-                        if cnt == max_check:
-                            break
-                    # print('cnt', cnt)
-                    if cnt != max_check:
-                        tmp_dispatchers.append(disp)
-        if len(tmp_dispatchers) == nchunks:
-            os.chdir(cwd)
-            job_handlers = []
-            for ii in range(nchunks):
-                job_handler = tmp_dispatchers[ii].submit_jobs(mdata['model_devi_resources'],
-                                                              commands,
-                                                              work_path,
-                                                              task_chunks[ii],
-                                                              model_devi_group_size,
-                                                              model_names,
-                                                              forward_files,
-                                                              backward_files,
-                                                              outlog = 'model_devi.log',
-                                                              errlog = 'model_devi.log')
-                job_handlers.append(job_handler)
-            while True:
-                cnt = 0
-                for ii in range(nchunks):
-                    if tmp_dispatchers[ii].all_finished(job_handlers[ii]):
-                        cnt += 1
-                if cnt == nchunks:
-                    break
-                else:
-                    time.sleep(10)
-            exit_ALI(instance_id_list, mdata['ali_auth'])  
+        if ali_restart_jobs(stage = 'model_devi',
+                            cwd = cwd,
+                            mdata = mdata,
+                            work_path = work_path,
+                            run_tasks = run_tasks,
+                            model_devi_group_size = model_devi_group_size,
+                            model_names = model_names,
+                            forward_files = forward_files,
+                            backward_files = backward_files):
+            pass
         else:
-            os.chdir(cwd)
-            ip, instance_id = run_ALI('model_devi', nchunks, mdata['ali_auth'])
-            mdata['model_devi_machine']['hostname'] = ip
-            mdata['model_devi_machine']['instance_id'] = instance_id
-            disp = make_dispatchers(nchunks, mdata['model_devi_machine'])
-            job_handlers = []
-            for ii in range(nchunks):
-                job_handler = disp[ii].submit_jobs(mdata['model_devi_resources'],
-                                                   commands,
-                                                   work_path,
-                                                   task_chunks[ii],
-                                                   model_devi_group_size,
-                                                   model_names,
-                                                   forward_files,
-                                                   backward_files,
-                                                   outlog = 'model_devi.log',
-                                                   errlog = 'model_devi.log')
-                job_handlers.append(job_handler)
-                
-            while True:
-                cnt = 0
-                for ii in range(nchunks):
-                    if disp[ii].all_finished(job_handlers[ii]):
-                        cnt += 1
-                if cnt == nchunks:
-                    break
-                else:
-                    time.sleep(10)
-            exit_ALI(instance_id, mdata['ali_auth'])
+            ali_start_jobs(stage = 'model_devi',
+                           run_tasks = run_tasks,
+                           work_path = work_path,
+                           cwd = cwd,
+                           mdata = mdata,
+                           model_devi_group_size = model_devi_group_size,
+                           model_names = model_names,
+                           forward_files = forward_files,
+                           backward_files = backward_files)
     else:
         dispatcher.run_jobs(mdata['model_devi_resources'],
                             commands,
@@ -1653,89 +1535,34 @@ def run_fp_inner (iter_index,
     #     if not check_fin(ii) :
     #         fp_run_tasks.append(ii)
     run_tasks = [os.path.basename(ii) for ii in fp_run_tasks]
-
+    cwd = os.getcwd()
     if mdata['fp_machine']['type'] == 'ALI':
-        task_chunks = _split_tasks(run_tasks, fp_group_size)
-        nchunks = len(task_chunks)
-        cwd = os.getcwd()
-        os.chdir(work_path)
-        tmp_dispatchers = []
-        instance_id_list = []
-        if len(glob.glob('jr.*.json')) == nchunks:
-            for ii in range(nchunks):
-                with open('jr.%.06d.json' %ii) as fp:
-                    job_record = json.load(fp)
-                    key = list(job_record.keys())[0]
-                    ip, instance_id = job_record[key]['context'][-2], job_record[key]['context'][-1]
-                    # print(ip, instance_id)
-                    mdata['fp_machine']['hostname'] = ip
-                    mdata['fp_machine']['instance_id'] = instance_id
-                    instance_id_list.append(instance_id)
-                    disp = make_dispatcher(mdata['fp_machine'], job_record='jr.%.06d.json' %ii)
-                    max_check = 10
-                    cnt = 0
-                    while not disp.session._check_alive():
-                        cnt += 1
-                        if cnt == max_check:
-                            break
-                    # print('cnt', cnt)
-                    if cnt != max_check:
-                        tmp_dispatchers.append(disp)
-        if len(tmp_dispatchers) == nchunks:
-            os.chdir(cwd)
-            job_handlers = []
-            for ii in range(nchunks):
-                job_handler = tmp_dispatchers[ii].submit_jobs(mdata['fp_resources'],
-                                                              [fp_command],
-                                                              work_path,
-                                                              task_chunks[ii],
-                                                              fp_group_size,
-                                                              forward_common_files,
-                                                              forward_files,
-                                                              backward_files,
-                                                              outlog = log_file,
-                                                              errlog = log_file)
-                job_handlers.append(job_handler)
-            while True:
-                cnt = 0
-                for ii in range(nchunks):
-                    if tmp_dispatchers[ii].all_finished(job_handlers[ii]):
-                        cnt += 1
-                if cnt == nchunks:
-                    break
-                else:
-                    time.sleep(10)
-            exit_ALI(instance_id_list, mdata['ali_auth'])  
+        if ali_restart_jobs(stage = 'fp',
+                            mdata = mdata,
+                            run_tasks = run_tasks,
+                            work_path = work_path,
+                            cwd = cwd,
+                            command = [fp_command],
+                            fp_group_size = fp_group_size,
+                            forward_common_files = forward_common_files,
+                            forward_files = forward_files,
+                            backward_files = backward_files,
+                            outlog = log_file,
+                            errlog = log_file):
+            pass
         else:
-            os.chdir(cwd)
-            ip, instance_id = run_ALI('fp', nchunks, mdata['ali_auth'])
-            mdata['fp_machine']['hostname'] = ip
-            mdata['fp_machine']['instance_id'] = instance_id
-            disp = make_dispatchers(nchunks, mdata['fp_machine'])
-            job_handlers = []
-            for ii in range(nchunks):
-                job_handler = disp[ii].submit_jobs(mdata['fp_resources'],
-                                                   [fp_command],
-                                                   work_path,
-                                                   task_chunks[ii],
-                                                   fp_group_size,
-                                                   forward_common_files,
-                                                   forward_files,
-                                                   backward_files,
-                                                   outlog = log_file,
-                                                   errlog = log_file)
-                job_handlers.append(job_handler)
-            while True:
-                cnt = 0
-                for ii in range(nchunks):
-                    if disp[ii].all_finished(job_handlers[ii]):
-                        cnt += 1
-                if cnt == nchunks:
-                    break
-                else:
-                    time.sleep(10)
-            exit_ALI(instance_id, mdata['ali_auth'])
-
+            ali_start_jobs(stage = 'fp',
+                           mdata = mdata,
+                           run_tasks = run_tasks,
+                           work_path = work_path,
+                           cwd = cwd,
+                           command = [fp_command],
+                           fp_group_size = fp_group_size,
+                           forward_common_files = forward_common_files,
+                           forward_files = forward_files,
+                           backward_files = backward_files,
+                           outlog = log_file,
+                           errlog = log_file)
     else:
         dispatcher.run_jobs(mdata['fp_resources'],
                             [fp_command],

From 5efe8c5dee0ae3319bff1e7361edc85daa18134c Mon Sep 17 00:00:00 2001
From: dingzhaohan <dingzhaohan@pku.edu.cn>
Date: Fri, 13 Dec 2019 12:57:56 +0800
Subject: [PATCH 091/109] fix bug

---
 dpgen/dispatcher/Dispatcher.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/dpgen/dispatcher/Dispatcher.py b/dpgen/dispatcher/Dispatcher.py
index bc8672dae..fb86a0248 100644
--- a/dpgen/dispatcher/Dispatcher.py
+++ b/dpgen/dispatcher/Dispatcher.py
@@ -372,7 +372,8 @@ def ali_start_jobs(stage,
                                                work_path,
                                                task_chunks[ii],
                                                fp_group_size,
-                                               forward_common_files,                                               forward_files,
+                                               forward_common_files,                                               
+                                               forward_files,
                                                backward_files,
                                                outlog = log_file,
                                                errlog = log_file)
@@ -384,7 +385,8 @@ def ali_start_jobs(stage,
                                                work_path,
                                                task_chunks[ii],
                                                fp_group_size,
-                                               forward_common_files,                                               forward_files,
+                                               forward_common_files,                                               
+                                               forward_files,
                                                backward_files,
                                                outlog = log_file,
                                                errlog = log_file)
@@ -396,7 +398,8 @@ def ali_start_jobs(stage,
                                                work_path,
                                                task_chunks[ii],
                                                fp_group_size,
-                                               forward_common_files,                                               forward_files,
+                                               forward_common_files,                                               
+                                               forward_files,
                                                backward_files,
                                                outlog = log_file,
                                                errlog = log_file)

From 7488493eecfd28edb90e4a3752679d3b4265a30b Mon Sep 17 00:00:00 2001
From: dingzhaohan <dingzhaohan@pku.edu.cn>
Date: Fri, 13 Dec 2019 13:45:41 +0800
Subject: [PATCH 092/109] fix bug

---
 dpgen/dispatcher/Dispatcher.py | 65 ++++++++++++++++++++++------------
 dpgen/generator/run.py         | 30 +++++++---------
 2 files changed, 55 insertions(+), 40 deletions(-)

diff --git a/dpgen/dispatcher/Dispatcher.py b/dpgen/dispatcher/Dispatcher.py
index fb86a0248..324311e6b 100644
--- a/dpgen/dispatcher/Dispatcher.py
+++ b/dpgen/dispatcher/Dispatcher.py
@@ -1,4 +1,4 @@
-import os,sys,time,random,json
+import os,sys,time,random,json,glob
 
 from dpgen.dispatcher.LocalContext import LocalSession
 from dpgen.dispatcher.LocalContext import LocalContext
@@ -11,6 +11,10 @@
 from dpgen.dispatcher.Shell import Shell
 from dpgen.dispatcher.AWS import AWS 
 from dpgen.dispatcher.JobStatus import JobStatus
+try:
+    from dpgen.dispatcher.ALI import ALI, run_ALI, exit_ALI
+except:
+    pass
 from dpgen import dlog
 from hashlib import sha1
 
@@ -349,52 +353,60 @@ def ali_start_jobs(stage,
                    work_path,
                    cwd,
                    commands,
-                   trans_comm_data = None,
+                   forward_files,
+                   backward_files,
                    train_group_size = None,
                    model_devi_group_size = None,
                    fp_group_size = None,
+                   trans_comm_data = None,
                    model_names = None,
-                   forward_files,
-                   backward_files,
-                   log_file = None)
+                   forward_common_files = None,
+                   log_file = None):
     os.chdir(cwd)
-    task_chunks = _split_tasks(run_tasks)
+    group_size = 1
+    if stage == 'train':
+        group_size = train_group_size
+    elif stage == 'model_devi':
+        group_size = model_devi_group_size
+    elif stage == 'fp':
+        group_size = fp_group_size
+    task_chunks = _split_tasks(run_tasks, group_size)
     nchunks = len(task_chunks)
-    ip, instance_id = run_ALI('stage', nchunks, mdata['ali_auth'])
+    ip, instance_id = run_ALI(stage, nchunks, mdata['ali_auth'])
     mdata[stage + '_machine']['hostname'] = ip
     mdata[stage + '_machine']['instance_id'] = instance_id
     disp = make_dispatchers(nchunks, mdata[stage + '_machine'])
     job_handlers = []
     if stage == 'train':
         for ii in range(nchunks):
-            job_handler = disp[ii].submit_jobs(mdata['fp_resources'],
-                                               [fp_command],
+            job_handler = disp[ii].submit_jobs(mdata['train_resources'],
+                                               commands,
                                                work_path,
                                                task_chunks[ii],
-                                               fp_group_size,
-                                               forward_common_files,                                               
+                                               train_group_size,
+                                               trans_comm_data,                                               
                                                forward_files,
                                                backward_files,
-                                               outlog = log_file,
-                                               errlog = log_file)
+                                               outlog = 'train.log',
+                                               errlog = 'train.log')
             job_handlers.append(job_handler)
     elif stage == 'model_devi':
         for ii in range(nchunks):
-            job_handler = disp[ii].submit_jobs(mdata['fp_resources'],
-                                               [fp_command],
+            job_handler = disp[ii].submit_jobs(mdata['model_devi_resources'],
+                                               commands,
                                                work_path,
                                                task_chunks[ii],
-                                               fp_group_size,
-                                               forward_common_files,                                               
+                                               model_devi_group_size,
+                                               model_names,                                               
                                                forward_files,
                                                backward_files,
-                                               outlog = log_file,
-                                               errlog = log_file)
+                                               outlog = 'model_devi.log',
+                                               errlog = 'model_devi.log')
             job_handlers.append(job_handler)
     elif stage == 'fp':
         for ii in range(nchunks):
             job_handler = disp[ii].submit_jobs(mdata['fp_resources'],
-                                               [fp_command],
+                                               commands,
                                                work_path,
                                                task_chunks[ii],
                                                fp_group_size,
@@ -421,16 +433,23 @@ def ali_restart_jobs(stage,
                      commands,
                      work_path,
                      run_tasks,
+                     forward_files,
+                     backward_files,
                      train_group_size = None,
                      model_devi_group_size = None,
                      fp_group_size = None,
                      trans_comm_data = None,
                      model_names = None,
                      forward_common_files = None,
-                     forward_files,
-                     backward_files,
                      log_file=None):
-    task_chunks = _split_tasks(run_tasks, stage + '_group_size')
+    group_size = 1
+    if stage == 'train':
+        group_size = train_group_size
+    elif stage == 'model_devi':
+        group_size = model_devi_group_size
+    elif stage == 'fp':
+        group_size = fp_group_size
+    task_chunks = _split_tasks(run_tasks, group_size)
     nchunks = len(task_chunks)
     os.chdir(work_path)
     tmp_dispatchers = []
diff --git a/dpgen/generator/run.py b/dpgen/generator/run.py
index 65f9d9646..ca880ca08 100644
--- a/dpgen/generator/run.py
+++ b/dpgen/generator/run.py
@@ -52,11 +52,7 @@
 from dpgen.remote.group_jobs import group_slurm_jobs
 from dpgen.remote.group_jobs import group_local_jobs
 from dpgen.remote.decide_machine import decide_train_machine, decide_fp_machine, decide_model_devi_machine
-from dpgen.dispatcher.Dispatcher import Dispatcher, make_dispatcher, make_dispatchers, _split_tasks
-try:
-    from dpgen.dispatcher.ALI import ALI, run_ALI, exit_ALI
-except:
-    pass
+from dpgen.dispatcher.Dispatcher import Dispatcher, make_dispatcher, make_dispatchers, _split_tasks, ali_restart_jobs, ali_start_jobs
 from dpgen.util import sepline
 from dpgen import ROOT_PATH
 from pymatgen.io.vasp import Incar,Kpoints,Potcar
@@ -461,7 +457,7 @@ def run_train (iter_index,
                             backward_files = backward_files):
             pass
         else:
-            ali_start_jobs(stasge = 'train',
+            ali_start_jobs(stage = 'train',
                            cwd = cwd,
                            mdata = mdata,
                            commands = commands,
@@ -936,6 +932,7 @@ def run_model_devi (iter_index,
         if ali_restart_jobs(stage = 'model_devi',
                             cwd = cwd,
                             mdata = mdata,
+                            commands = commands,
                             work_path = work_path,
                             run_tasks = run_tasks,
                             model_devi_group_size = model_devi_group_size,
@@ -945,10 +942,11 @@ def run_model_devi (iter_index,
             pass
         else:
             ali_start_jobs(stage = 'model_devi',
-                           run_tasks = run_tasks,
-                           work_path = work_path,
                            cwd = cwd,
                            mdata = mdata,
+                           commands = commands,
+                           run_tasks = run_tasks,
+                           work_path = work_path,
                            model_devi_group_size = model_devi_group_size,
                            model_names = model_names,
                            forward_files = forward_files,
@@ -1538,31 +1536,29 @@ def run_fp_inner (iter_index,
     cwd = os.getcwd()
     if mdata['fp_machine']['type'] == 'ALI':
         if ali_restart_jobs(stage = 'fp',
+                            cwd = cwd,
                             mdata = mdata,
-                            run_tasks = run_tasks,
+                            commands = [fp_command],
                             work_path = work_path,
-                            cwd = cwd,
-                            command = [fp_command],
+                            run_tasks = run_tasks,
                             fp_group_size = fp_group_size,
                             forward_common_files = forward_common_files,
                             forward_files = forward_files,
                             backward_files = backward_files,
-                            outlog = log_file,
-                            errlog = log_file):
+                            log_file = log_file):
             pass
         else:
             ali_start_jobs(stage = 'fp',
                            mdata = mdata,
+                           cwd = cwd,
+                           commands = [fp_command],
                            run_tasks = run_tasks,
                            work_path = work_path,
-                           cwd = cwd,
-                           command = [fp_command],
                            fp_group_size = fp_group_size,
                            forward_common_files = forward_common_files,
                            forward_files = forward_files,
                            backward_files = backward_files,
-                           outlog = log_file,
-                           errlog = log_file)
+                           log_file = log_file)
     else:
         dispatcher.run_jobs(mdata['fp_resources'],
                             [fp_command],

From 491c94393ed7d9f88158139fbd9b63f5d0c2c33c Mon Sep 17 00:00:00 2001
From: Han Wang <wang_han@iapcm.ac.cn>
Date: Fri, 13 Dec 2019 15:35:12 +0800
Subject: [PATCH 093/109] add job.json to record revision by keyword
 replacement

---
 dpgen/generator/run.py          |  4 ++++
 tests/generator/test_make_md.py | 21 ++++++++++++++++++++-
 2 files changed, 24 insertions(+), 1 deletion(-)

diff --git a/dpgen/generator/run.py b/dpgen/generator/run.py
index e95a428e9..c7cebe833 100644
--- a/dpgen/generator/run.py
+++ b/dpgen/generator/run.py
@@ -739,6 +739,10 @@ def _make_model_devi_revmat(iter_index, jdata, mdata, conf_systems):
                 # dump input of lammps
                 with open('input.lammps', 'w') as fp:
                     fp.write(''.join(lmp_lines))
+                with open('job.json', 'w') as fp:
+                    job = {}
+                    for ii,jj in zip(rev_keys, rev_item) : job[ii] = jj
+                    json.dump(job, fp, indent = 4)
                 os.chdir(cwd_)
                 task_counter += 1
             conf_counter += 1
diff --git a/tests/generator/test_make_md.py b/tests/generator/test_make_md.py
index 6d321ba57..3ee930344 100644
--- a/tests/generator/test_make_md.py
+++ b/tests/generator/test_make_md.py
@@ -192,6 +192,7 @@ def test_make_model_devi (self) :
         # check the first task
         md_dir = os.path.join('iter.%06d' % 0, '01.model_devi')
         tasks = glob.glob(os.path.join(md_dir, 'task.*'))
+        tasks.sort()
         # 4 accounts for 2 systems each with 2 frames
         self.assertEqual(len(tasks), (len(jdata['model_devi_jobs'][0]['rev_mat']['lmp']['V_NSTEPS']) * 
                                       len(jdata['model_devi_jobs'][0]['rev_mat']['lmp']['V_TEMP']) * 
@@ -199,7 +200,25 @@ def test_make_model_devi (self) :
                                       len(jdata['model_devi_jobs'][0]['rev_mat']['plm']['V_DIST0']) * 
                                       len(jdata['model_devi_jobs'][0]['rev_mat']['plm']['V_DIST1']) * 
                                       4))
-        tasks.sort()
+
+        cur_job = jdata['model_devi_jobs'][0]
+        rev_keys = ['V_NSTEPS', 'V_TEMP', 'V_PRES', 'V_DIST0', 'V_DIST1']
+        rev_matrix = []
+        for i0 in cur_job['rev_mat']['lmp']['V_NSTEPS']:
+            for i1 in cur_job['rev_mat']['lmp']['V_TEMP']:
+                for i2 in cur_job['rev_mat']['lmp']['V_PRES']:
+                    for i3 in cur_job['rev_mat']['plm']['V_DIST0']:
+                        for i4 in cur_job['rev_mat']['plm']['V_DIST1']:
+                            rev_matrix.append([i0, i1, i2, i3, i4])        
+        numb_rev = len(rev_matrix)
+        for ii in range(len(tasks)):
+            with open(os.path.join(tasks[ii], 'job.json')) as fp:
+                rev_values = rev_matrix[ii % numb_rev]
+                job_recd = json.load(fp)
+                for kk in job_recd.keys():
+                    kidx = rev_keys.index(kk)
+                self.assertEqual(rev_values[kidx], job_recd[kk])
+
         cwd_ = os.getcwd()
         os.chdir(tasks[0])
         with open('input.lammps') as fp:

From 4e7b1a4fb3c86d3927bce3e33341f878d536e819 Mon Sep 17 00:00:00 2001
From: dingzhaohan <dingzhaohan@pku.edu.cn>
Date: Sat, 14 Dec 2019 15:50:21 +0800
Subject: [PATCH 094/109] rewrite ALI.py

---
 dpgen/dispatcher/ALI.py        | 172 ++++++++++++++++++++------
 dpgen/dispatcher/Dispatcher.py | 215 ---------------------------------
 dpgen/generator/run.py         | 155 ++++++++----------------
 3 files changed, 189 insertions(+), 353 deletions(-)

diff --git a/dpgen/dispatcher/ALI.py b/dpgen/dispatcher/ALI.py
index a9eab9bfa..08573dfd9 100644
--- a/dpgen/dispatcher/ALI.py
+++ b/dpgen/dispatcher/ALI.py
@@ -4,31 +4,147 @@
 from aliyunsdkcore.acs_exception.exceptions import ServerException
 from aliyunsdkecs.request.v20140526.RunInstancesRequest import RunInstancesRequest
 from aliyunsdkecs.request.v20140526.DeleteInstancesRequest import DeleteInstancesRequest
-import time
-import json
-from dpgen.dispatcher.Batch import Batch
-from dpgen.dispatcher.JobStatus import JobStatus
-from dpgen.dispatcher.Shell import Shell
-from dpgen.dispatcher.SSHContext import SSHContext, SSHSession
+import time, json, os, glob
+from dpgen.dispatcher.Dispatcher import Dispatcher, _split_tasks
+from os import listdir
+from os.path import isfile, join
+
+determine_machine = {
+    "gpu": {
+            1: "ecs.gn5-c8g1.2xlarge",
+    },
+    "cpu": {
+            1: "ecs.c6.large",
+            4: "ecs.c6.2xlarge",
+            8: "ecs.c6.4xlarge"
+    }
+}
 
 class ALI():
-    def __init__(self, adata):
+    def __init__(self, adata, mdata, nchunks, work_path, cwd):
         self.ip_list = None
-        self.regionID = None
         self.instance_list = None
-        self.AccessKey_ID = adata["AccessKey_ID"]
-        self.AccessKey_Secret = adata["AccessKey_Secret"]
-        self.strategy = adata["pay_strategy"]
-        self.pwd = adata["pwd"]
+        self.dispatchers = None
+        self.job_handlers = None
+        self.adata = adata
+        self.mdata = mdata
+        self.nchunks = nchunks
+        self.work_path = work_path
+        self.cwd = cwd
+        self.regionID = 'cn-hangzhou'
+        
+    def init(self):
+        if self.check_restart():
+            pass
+        else:
+            self.create_machine()
+            self.dispatchers = self.make_dispatchers()
+
+    def check_restart(self):
+        os.chdir(self.work_path)
+        dispatchers = []
+        instance_list = []
+        if len(glob.glob('jr.*.json')) == self.nchunks:
+            for ii in range(self.nchunks):
+                with open('jr.%.06d.json' % ii) as fp:
+                    job_record = json.load(fp)
+                    key = list(job_record.keys())[0]
+                    ip, instance_id = job_record[key]['context'][-2], job_record[key]['context'][-1]
+                    instance_list.append(instance_id)
+                    profile = {
+                        'type': 'ALI',
+                        'hostname': ip,
+                        'instance_id': instance_id,
+                        'port': 22,
+                        'username': 'root',
+                        'password': self.adata['password'],
+                        'work_path': '/root/dpgen_work'
+                    }
+                    disp = Dispatcher(profile, context_type='ssh', batch_type='shell', job_record='jr.%.06d.json' % ii)
+                    max_check = 10
+                    cnt = 0
+                    while not disp.session._check_alive():
+                        cnt += 1
+                        if cnt == max_check:
+                            break
+                    if cnt != max_check:
+                        dispatchers.append(disp)
+        restart = False
+        if len(dispatchers) == self.nchunks:
+            restart = True
+            self.dispatchers = dispatchers
+            self.instance_list = instance_list
+        os.chdir(self.cwd)
+        return restart
+
+    def run_jobs(self,
+                 resources,
+                 command,
+                 work_path,
+                 tasks,
+                 group_size,
+                 forward_common_files,
+                 forward_task_files,
+                 backward_task_files,
+                 forward_task_deference = True,
+                 outlog = 'log',
+                 errlog = 'err'):
+        task_chunks = _split_tasks(tasks, group_size)
+        job_handlers = []
+        for ii in range(self.nchunks):
+            job_handler = self.dispatchers[ii].submit_jobs(resources,
+                                                           command,
+                                                           work_path,
+                                                           task_chunks[ii],
+                                                           group_size,
+                                                           forward_common_files,
+                                                           forward_task_files,
+                                                           backward_task_files,
+                                                           forward_task_deference,
+                                                           outlog,
+                                                           errlog)
+            job_handlers.append(job_handler)
+        while True:
+            cnt = 0
+            for ii in range(self.nchunks):
+                if self.dispatchers[ii].all_finished(job_handlers[ii]):
+                    cnt += 1
+            if cnt == self.nchunks:
+                break
+            else:
+                time.sleep(10)
+        self.delete_machine()
 
-    def create_machine(self, instance_number, instance_type):
+    def make_dispatchers(self):
+        dispatchers = []
+        for ii in range(self.nchunks):
+            remote_profile = {
+                'type': 'ALI',
+                'hostname': self.ip_list[ii],
+                'instance_id': self.instance_list[ii],
+                'port': 22,
+                'username': 'root',
+                'password': self.adata['password'],
+                'work_path': '/root/dpgen_work'
+            }
+            disp = Dispatcher(remote_profile, context_type='ssh', batch_type='shell', job_record='jr.%.06d.json' % ii)
+            dispatchers.append(disp)
+        return dispatchers
+
+    def create_machine(self):
+        AccessKey_ID = self.adata["AccessKey_ID"]
+        AccessKey_Secret = self.adata["AccessKey_Secret"]
+        strategy = self.adata["pay_strategy"]
+        pwd = self.adata["password"]
+        regionID = self.regionID
+        instance_type = determine_machine[self.mdata['partition']][self.mdata['numb_gpu']]
         if True:
-            client = AcsClient(self.AccessKey_ID,self.AccessKey_Secret, 'cn-hangzhou')
+            client = AcsClient(AccessKey_ID,AccessKey_Secret, 'cn-hangzhou')
             request = RunInstancesRequest()
             request.set_accept_format('json')
             request.set_UniqueSuffix(True)
-            request.set_Password(self.pwd)
-            request.set_Amount(instance_number)
+            request.set_Password(pwd)
+            request.set_Amount(self.nchunks)
             request.set_LaunchTemplateName(instance_type + '_cn-hangzhou_i')
             response = client.do_action_with_exception(request)
             response = json.loads(response)
@@ -39,33 +155,21 @@ def create_machine(self, instance_number, instance_type):
             request.set_InstanceIds(self.instance_list)
             response = client.do_action_with_exception(request)
             response = json.loads(response)
-
             ip = []
             for i in range(len(response["Instances"]["Instance"])):
                 ip.append(response["Instances"]["Instance"][i]["PublicIpAddress"]['IpAddress'][0])
             self.ip_list = ip
-            return self.ip_list, self.instance_list
         else:
             return "create failed"
 
-    def delete_machine(self, instance_id):
-        client = AcsClient(self.AccessKey_ID,self.AccessKey_Secret, 'cn-hangzhou')
+    def delete_machine(self):
+        AccessKey_ID = self.adata["AccessKey_ID"]
+        AccessKey_Secret = self.adata["AccessKey_Secret"]
+        regionID = self.regionID
+        client = AcsClient(AccessKey_ID,AccessKey_Secret, regionID)
         request = DeleteInstancesRequest()
         request.set_accept_format('json')
-        request.set_InstanceIds(instance_id)
+        request.set_InstanceIds(self.instance_list)
         request.set_Force(True)
         response = client.do_action_with_exception(request)
 
-def run_ALI(stage, num_of_instance, adata):
-    if stage == "train":
-        instance_type = "ecs.gn5-c8g1.2xlarge"
-    elif stage == "model_devi":
-        instance_type = "ecs.gn5-c8g1.2xlarge"
-    elif stage == "fp":
-        instance_type = "ecs.c6.2xlarge"
-    ali = ALI(adata)
-    return ali.create_machine(num_of_instance, instance_type)
-
-def exit_ALI(instance_id, adata):
-    ali = ALI(adata)
-    ali.delete_machine(instance_id)
diff --git a/dpgen/dispatcher/Dispatcher.py b/dpgen/dispatcher/Dispatcher.py
index 324311e6b..60e50b0c9 100644
--- a/dpgen/dispatcher/Dispatcher.py
+++ b/dpgen/dispatcher/Dispatcher.py
@@ -11,10 +11,6 @@
 from dpgen.dispatcher.Shell import Shell
 from dpgen.dispatcher.AWS import AWS 
 from dpgen.dispatcher.JobStatus import JobStatus
-try:
-    from dpgen.dispatcher.ALI import ALI, run_ALI, exit_ALI
-except:
-    pass
 from dpgen import dlog
 from hashlib import sha1
 
@@ -318,214 +314,3 @@ def make_dispatcher(mdata, job_record=None):
         context_type = 'lazy-local'
     disp = Dispatcher(mdata, context_type=context_type, batch_type=batch_type, job_record=job_record)
     return disp
-
-def make_dispatchers(num, mdata):
-    dispatchers = []
-    for i in range(num):
-        try:
-            hostname = mdata['hostname'][i]
-            instance_id = mdata['instance_id'][i]
-            context_type = 'ssh'
-        except:
-            context_type = 'local'
-        try:
-            batch_type = mdata['batch']
-        except:
-            dlog.info('cannot find key "batch" in machine file, try to use deprecated key "machine_type"')
-            batch_type = mdata['machine_type']
-        try:
-            lazy_local = mdata['lazy_local']
-        except:
-            lazy_local = False
-        if lazy_local and context_type == 'local':
-            dlog.info('Dispatcher switches to the lazy local mode')
-            context_type = 'lazy-local'
-        remote_profile = mdata.copy()
-        remote_profile['hostname'] = hostname
-        remote_profile['instance_id'] = instance_id
-        disp = Dispatcher(remote_profile, context_type=context_type, batch_type=batch_type, job_record='jr.%.06d.json' %i)
-        dispatchers.append(disp)
-    return dispatchers
-
-def ali_start_jobs(stage,
-                   mdata,
-                   run_tasks,
-                   work_path,
-                   cwd,
-                   commands,
-                   forward_files,
-                   backward_files,
-                   train_group_size = None,
-                   model_devi_group_size = None,
-                   fp_group_size = None,
-                   trans_comm_data = None,
-                   model_names = None,
-                   forward_common_files = None,
-                   log_file = None):
-    os.chdir(cwd)
-    group_size = 1
-    if stage == 'train':
-        group_size = train_group_size
-    elif stage == 'model_devi':
-        group_size = model_devi_group_size
-    elif stage == 'fp':
-        group_size = fp_group_size
-    task_chunks = _split_tasks(run_tasks, group_size)
-    nchunks = len(task_chunks)
-    ip, instance_id = run_ALI(stage, nchunks, mdata['ali_auth'])
-    mdata[stage + '_machine']['hostname'] = ip
-    mdata[stage + '_machine']['instance_id'] = instance_id
-    disp = make_dispatchers(nchunks, mdata[stage + '_machine'])
-    job_handlers = []
-    if stage == 'train':
-        for ii in range(nchunks):
-            job_handler = disp[ii].submit_jobs(mdata['train_resources'],
-                                               commands,
-                                               work_path,
-                                               task_chunks[ii],
-                                               train_group_size,
-                                               trans_comm_data,                                               
-                                               forward_files,
-                                               backward_files,
-                                               outlog = 'train.log',
-                                               errlog = 'train.log')
-            job_handlers.append(job_handler)
-    elif stage == 'model_devi':
-        for ii in range(nchunks):
-            job_handler = disp[ii].submit_jobs(mdata['model_devi_resources'],
-                                               commands,
-                                               work_path,
-                                               task_chunks[ii],
-                                               model_devi_group_size,
-                                               model_names,                                               
-                                               forward_files,
-                                               backward_files,
-                                               outlog = 'model_devi.log',
-                                               errlog = 'model_devi.log')
-            job_handlers.append(job_handler)
-    elif stage == 'fp':
-        for ii in range(nchunks):
-            job_handler = disp[ii].submit_jobs(mdata['fp_resources'],
-                                               commands,
-                                               work_path,
-                                               task_chunks[ii],
-                                               fp_group_size,
-                                               forward_common_files,                                               
-                                               forward_files,
-                                               backward_files,
-                                               outlog = log_file,
-                                               errlog = log_file)
-            job_handlers.append(job_handler)
-    while True:
-        cnt = 0
-        for ii in range(nchunks):
-            if disp[ii].all_finished(job_handlers[ii]):
-                cnt += 1
-        if cnt == nchunks:
-            break
-        else:
-            time.sleep(10)
-    exit_ALI(instance_id, mdata['ali_auth'])
-
-def ali_restart_jobs(stage,
-                     cwd,
-                     mdata,
-                     commands,
-                     work_path,
-                     run_tasks,
-                     forward_files,
-                     backward_files,
-                     train_group_size = None,
-                     model_devi_group_size = None,
-                     fp_group_size = None,
-                     trans_comm_data = None,
-                     model_names = None,
-                     forward_common_files = None,
-                     log_file=None):
-    group_size = 1
-    if stage == 'train':
-        group_size = train_group_size
-    elif stage == 'model_devi':
-        group_size = model_devi_group_size
-    elif stage == 'fp':
-        group_size = fp_group_size
-    task_chunks = _split_tasks(run_tasks, group_size)
-    nchunks = len(task_chunks)
-    os.chdir(work_path)
-    tmp_dispatchers = []
-    instance_id_list = []
-    if len(glob.glob('jr.*.json')) == nchunks:
-        for ii in range(chunks):
-            with open('jr.%.06d.json' % ii) as fp:
-                job_record = json.load(fp)
-                key = list(job_record.keys())[0]
-                ip, instance_id = job_record[key]['context'][-2], job_record[key]['context'][-1]
-                # print(ip, instance_id)
-                mdata[stage + '_machine']['hostname'] = ip
-                mdata[stage + '_machine']['instance_id'] = instance_id
-                instance_id_list.append(instance_id)
-                disp = make_dispatcher(mdata[stage + '_machine'], job_record='jr.%.06d.json' %ii)
-                max_check = 10
-                cnt = 0
-                while not disp.session._check_alive():
-                    cnt += 1
-                    if cnt == max_check:
-                        break
-                if cnt != max_check:
-                    tmp_dispatchers.append(disp)
-    restart = False
-    if len(tmp_dispatchers) == nchunks:
-        restart = True
-        os.chdir(cwd)
-        job_handlers = []
-        if stage == 'train':
-            for ii in range(nchunks):
-                job_handler = tmp_dispatchers[ii].submit_jobs(mdata['train_resources'],
-                                                              commands,
-                                                              work_path,
-                                                              task_chunks[ii],
-                                                              train_group_size,
-                                                              trans_comm_data,
-                                                              forward_files,
-                                                              backward_files,
-                                                              outlog = 'train.log',
-                                                              errlog = 'train.log')
-                job_handlers.append(job_handler)
-        elif stage == 'model_devi':
-            for ii in range(nchunks):
-                job_handler = tmp_dispatchers[ii].submit_jobs(mdata['model_devi_resources'],
-                                                              commands,
-                                                              work_path,
-                                                              task_chunks[ii],
-                                                              model_devi_group_size,
-                                                              model_names,
-                                                              forward_files,
-                                                              backward_files,
-                                                              outlog = 'model_devi.log',
-                                                              errlog = 'model_devi.log')
-                job_handlers.append(job_handler)
-        elif stage == 'fp':
-            for ii in range(nchunks):
-                job_handler = tmp_dispatchers[ii].submit_jobs(mdata['fp_resources'],
-                                                              commands,
-                                                              work_path,
-                                                              task_chunks[ii],
-                                                              fp_group_size,
-                                                              forward_common_files,
-                                                              forward_files,
-                                                              backward_files,
-                                                              outlog = log_file,
-                                                              errlog = log_file,
-                                                              )
-                job_handlers.append(job_handler)
-        while True:
-            cnt = 0
-            for ii in range(nchunks):
-                if tmp_dispatchers[ii].all_finished(job_handlers[ii]):
-                    cnt += 1
-            if cnt == nchunks:
-                break
-            else:
-                time.sleep(10)
-        exit_ALI(instance_id_list, mdata['ali_auth'])
-        return restart
diff --git a/dpgen/generator/run.py b/dpgen/generator/run.py
index ca880ca08..15c09a580 100644
--- a/dpgen/generator/run.py
+++ b/dpgen/generator/run.py
@@ -52,7 +52,8 @@
 from dpgen.remote.group_jobs import group_slurm_jobs
 from dpgen.remote.group_jobs import group_local_jobs
 from dpgen.remote.decide_machine import decide_train_machine, decide_fp_machine, decide_model_devi_machine
-from dpgen.dispatcher.Dispatcher import Dispatcher, make_dispatcher, make_dispatchers, _split_tasks, ali_restart_jobs, ali_start_jobs
+from dpgen.dispatcher.Dispatcher import Dispatcher, _split_tasks
+from dpgen.dispatcher.ALI import ALI
 from dpgen.util import sepline
 from dpgen import ROOT_PATH
 from pymatgen.io.vasp import Incar,Kpoints,Potcar
@@ -444,41 +445,23 @@ def run_train (iter_index,
     except:
         train_group_size = 1
 
+    task_chunks = _split_tasks(run_tasks, train_group_size)
+    nchunks = len(task_chunks)
     if mdata['train_machine']['type'] == 'ALI':
-        if ali_restart_jobs(stage = 'train',
-                            cwd = cwd,
-                            mdata = mdata,
-                            commands = commands,
-                            work_path = work_path,
-                            run_tasks = run_tasks,
-                            train_group_size = train_group_size,
-                            trans_comm_data = trans_comm_data,
-                            forward_files = forward_files,
-                            backward_files = backward_files):
-            pass
-        else:
-            ali_start_jobs(stage = 'train',
-                           cwd = cwd,
-                           mdata = mdata,
-                           commands = commands,
-                           work_path = work_path,
-                           run_tasks = run_tasks,
-                           train_group_size = train_group_size,
-                           trans_comm_data = trans_comm_data,
-                           forward_files = forward_files,
-                           backward_files = backward_files)
-    else:
-        dispatcher.run_jobs(mdata['train_resources'],
-                            commands,
-                            work_path,
-                            run_tasks,
-                            train_group_size,
-                            trans_comm_data,
-                            forward_files,
-                            backward_files,
-                            outlog = 'train.log',
-                            errlog = 'train.log')
-
+        adata = mdata['ali_auth']
+        dispatcher = ALI(adata, mdata['train_resources'], nchunks, work_path, cwd)
+        dispatcher.init()
+        
+    dispatcher.run_jobs(mdata['train_resources'],
+                        commands,
+                        work_path,
+                        run_tasks,
+                        train_group_size,
+                        trans_comm_data,
+                        forward_files,
+                        backward_files,
+                        outlog = 'train.log',
+                        errlog = 'train.log')
 
 def post_train (iter_index,
                 jdata,
@@ -928,40 +911,23 @@ def run_model_devi (iter_index,
         backward_files += ['output.plumed']
 
     cwd = os.getcwd()
+    task_chunks = _split_tasks(run_tasks, model_devi_group_size)
+    nchunks = len(task_chunks)
     if mdata['model_devi_machine']['type'] == 'ALI':
-        if ali_restart_jobs(stage = 'model_devi',
-                            cwd = cwd,
-                            mdata = mdata,
-                            commands = commands,
-                            work_path = work_path,
-                            run_tasks = run_tasks,
-                            model_devi_group_size = model_devi_group_size,
-                            model_names = model_names,
-                            forward_files = forward_files,
-                            backward_files = backward_files):
-            pass
-        else:
-            ali_start_jobs(stage = 'model_devi',
-                           cwd = cwd,
-                           mdata = mdata,
-                           commands = commands,
-                           run_tasks = run_tasks,
-                           work_path = work_path,
-                           model_devi_group_size = model_devi_group_size,
-                           model_names = model_names,
-                           forward_files = forward_files,
-                           backward_files = backward_files)
-    else:
-        dispatcher.run_jobs(mdata['model_devi_resources'],
-                            commands,
-                            work_path,
-                            run_tasks,
-                            model_devi_group_size,
-                            model_names,
-                            forward_files,
-                            backward_files,
-                            outlog = 'model_devi.log',
-                            errlog = 'model_devi.log')
+        adata = mdata['ali_auth']
+        dispatcher = ALI(adata, mdata['model_devi_resources'], nchunks, work_path, cwd)
+        dispatcher.init()
+    
+    dispatcher.run_jobs(mdata['model_devi_resources'],
+                        commands,
+                        work_path,
+                        run_tasks,
+                        model_devi_group_size,
+                        model_names,
+                        forward_files,
+                        backward_files,
+                        outlog = 'model_devi.log',
+                        errlog = 'model_devi.log')
 
 
 def post_model_devi (iter_index,
@@ -1514,7 +1480,7 @@ def run_fp_inner (iter_index,
                   forward_files,
                   backward_files,
                   check_fin,
-                  log_file = "log",
+                  log_file = "fp.log",
                   forward_common_files=[]) :
     fp_command = mdata['fp_command']
     fp_group_size = mdata['fp_group_size']
@@ -1534,42 +1500,23 @@ def run_fp_inner (iter_index,
     #         fp_run_tasks.append(ii)
     run_tasks = [os.path.basename(ii) for ii in fp_run_tasks]
     cwd = os.getcwd()
+    task_chunks = _split_tasks(run_tasks, fp_group_size)
+    nchunks = len(task_chunks)
     if mdata['fp_machine']['type'] == 'ALI':
-        if ali_restart_jobs(stage = 'fp',
-                            cwd = cwd,
-                            mdata = mdata,
-                            commands = [fp_command],
-                            work_path = work_path,
-                            run_tasks = run_tasks,
-                            fp_group_size = fp_group_size,
-                            forward_common_files = forward_common_files,
-                            forward_files = forward_files,
-                            backward_files = backward_files,
-                            log_file = log_file):
-            pass
-        else:
-            ali_start_jobs(stage = 'fp',
-                           mdata = mdata,
-                           cwd = cwd,
-                           commands = [fp_command],
-                           run_tasks = run_tasks,
-                           work_path = work_path,
-                           fp_group_size = fp_group_size,
-                           forward_common_files = forward_common_files,
-                           forward_files = forward_files,
-                           backward_files = backward_files,
-                           log_file = log_file)
-    else:
-        dispatcher.run_jobs(mdata['fp_resources'],
-                            [fp_command],
-                            work_path,
-                            run_tasks,
-                            fp_group_size,
-                            forward_common_files,
-                            forward_files,
-                            backward_files,
-                            outlog = log_file,
-                            errlog = log_file)
+        adata = mdata['ali_auth']
+        dispatcher = ALI(adata, mdata['fp_resources'], nchunks, work_path, cwd)
+        dispatcher.init()
+
+    dispatcher.run_jobs(mdata['fp_resources'],
+                        [fp_command],
+                        work_path,
+                        run_tasks,
+                        fp_group_size,
+                        forward_common_files,
+                        forward_files,
+                        backward_files,
+                        outlog = log_file,
+                        errlog = log_file)
 
 
 def run_fp (iter_index,

From 70b0899d41e1e467becb6270cff1fe1cdcdd83f3 Mon Sep 17 00:00:00 2001
From: dingzhaohan <dingzhaohan@pku.edu.cn>
Date: Sat, 14 Dec 2019 15:54:31 +0800
Subject: [PATCH 095/109] fix import bug

---
 dpgen/generator/run.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/dpgen/generator/run.py b/dpgen/generator/run.py
index 15c09a580..0ae2c6914 100644
--- a/dpgen/generator/run.py
+++ b/dpgen/generator/run.py
@@ -53,7 +53,10 @@
 from dpgen.remote.group_jobs import group_local_jobs
 from dpgen.remote.decide_machine import decide_train_machine, decide_fp_machine, decide_model_devi_machine
 from dpgen.dispatcher.Dispatcher import Dispatcher, _split_tasks
-from dpgen.dispatcher.ALI import ALI
+try:
+    from dpgen.dispatcher.ALI import ALI
+except:
+    pass
 from dpgen.util import sepline
 from dpgen import ROOT_PATH
 from pymatgen.io.vasp import Incar,Kpoints,Potcar

From be7518e554212eab47d1c3e71a341cacc677068e Mon Sep 17 00:00:00 2001
From: dingzhaohan <dingzhaohan@pku.edu.cn>
Date: Sat, 14 Dec 2019 20:45:24 +0800
Subject: [PATCH 096/109] optimize code

---
 dpgen/dispatcher/ALI.py | 48 ++++++++++++++---------------------------
 dpgen/generator/run.py  |  6 +++---
 2 files changed, 19 insertions(+), 35 deletions(-)

diff --git a/dpgen/dispatcher/ALI.py b/dpgen/dispatcher/ALI.py
index 08573dfd9..7dde25688 100644
--- a/dpgen/dispatcher/ALI.py
+++ b/dpgen/dispatcher/ALI.py
@@ -21,17 +21,15 @@
 }
 
 class ALI():
-    def __init__(self, adata, mdata, nchunks, work_path, cwd):
+    def __init__(self, adata, mdata_resources, mdata_machine, nchunks, work_path):
         self.ip_list = None
         self.instance_list = None
         self.dispatchers = None
-        self.job_handlers = None
         self.adata = adata
-        self.mdata = mdata
+        self.mdata_resources = mdata_resources
+        self.mdata_machine = mdata_machine
         self.nchunks = nchunks
         self.work_path = work_path
-        self.cwd = cwd
-        self.regionID = 'cn-hangzhou'
         
     def init(self):
         if self.check_restart():
@@ -41,25 +39,18 @@ def init(self):
             self.dispatchers = self.make_dispatchers()
 
     def check_restart(self):
-        os.chdir(self.work_path)
         dispatchers = []
         instance_list = []
-        if len(glob.glob('jr.*.json')) == self.nchunks:
+        if len(glob.glob(os.path.join(self.work_path, 'jr.*.json'))) == self.nchunks:
             for ii in range(self.nchunks):
-                with open('jr.%.06d.json' % ii) as fp:
+                with open(os.path.join(self.work_path, 'jr.%.06d.json' % ii)) as fp:
                     job_record = json.load(fp)
                     key = list(job_record.keys())[0]
                     ip, instance_id = job_record[key]['context'][-2], job_record[key]['context'][-1]
                     instance_list.append(instance_id)
-                    profile = {
-                        'type': 'ALI',
-                        'hostname': ip,
-                        'instance_id': instance_id,
-                        'port': 22,
-                        'username': 'root',
-                        'password': self.adata['password'],
-                        'work_path': '/root/dpgen_work'
-                    }
+                    profile = self.mdata_machine.copy()
+                    profile['hostname'] = ip
+                    profile['instance_id'] = instance_id
                     disp = Dispatcher(profile, context_type='ssh', batch_type='shell', job_record='jr.%.06d.json' % ii)
                     max_check = 10
                     cnt = 0
@@ -74,7 +65,6 @@ def check_restart(self):
             restart = True
             self.dispatchers = dispatchers
             self.instance_list = instance_list
-        os.chdir(self.cwd)
         return restart
 
     def run_jobs(self,
@@ -118,16 +108,10 @@ def run_jobs(self,
     def make_dispatchers(self):
         dispatchers = []
         for ii in range(self.nchunks):
-            remote_profile = {
-                'type': 'ALI',
-                'hostname': self.ip_list[ii],
-                'instance_id': self.instance_list[ii],
-                'port': 22,
-                'username': 'root',
-                'password': self.adata['password'],
-                'work_path': '/root/dpgen_work'
-            }
-            disp = Dispatcher(remote_profile, context_type='ssh', batch_type='shell', job_record='jr.%.06d.json' % ii)
+            profile = self.mdata_machine.copy()
+            profile['hostname'] = self.ip_list[ii]
+            profile['instance_id'] = self.instance_list[ii]
+            disp = Dispatcher(profile, context_type='ssh', batch_type='shell', job_record='jr.%.06d.json' % ii)
             dispatchers.append(disp)
         return dispatchers
 
@@ -136,10 +120,10 @@ def create_machine(self):
         AccessKey_Secret = self.adata["AccessKey_Secret"]
         strategy = self.adata["pay_strategy"]
         pwd = self.adata["password"]
-        regionID = self.regionID
-        instance_type = determine_machine[self.mdata['partition']][self.mdata['numb_gpu']]
+        regionID = self.mdata_machine['regionID']
+        instance_type = determine_machine[self.mdata_resources['partition']][self.mdata_resources['numb_gpu']]
         if True:
-            client = AcsClient(AccessKey_ID,AccessKey_Secret, 'cn-hangzhou')
+            client = AcsClient(AccessKey_ID,AccessKey_Secret, regionID)
             request = RunInstancesRequest()
             request.set_accept_format('json')
             request.set_UniqueSuffix(True)
@@ -165,7 +149,7 @@ def create_machine(self):
     def delete_machine(self):
         AccessKey_ID = self.adata["AccessKey_ID"]
         AccessKey_Secret = self.adata["AccessKey_Secret"]
-        regionID = self.regionID
+        regionID = self.mdata_machine['regionID']
         client = AcsClient(AccessKey_ID,AccessKey_Secret, regionID)
         request = DeleteInstancesRequest()
         request.set_accept_format('json')
diff --git a/dpgen/generator/run.py b/dpgen/generator/run.py
index 0ae2c6914..35f27489f 100644
--- a/dpgen/generator/run.py
+++ b/dpgen/generator/run.py
@@ -452,7 +452,7 @@ def run_train (iter_index,
     nchunks = len(task_chunks)
     if mdata['train_machine']['type'] == 'ALI':
         adata = mdata['ali_auth']
-        dispatcher = ALI(adata, mdata['train_resources'], nchunks, work_path, cwd)
+        dispatcher = ALI(adata, mdata['train_resources'], mdata['train_machine'], nchunks, work_path)
         dispatcher.init()
         
     dispatcher.run_jobs(mdata['train_resources'],
@@ -918,7 +918,7 @@ def run_model_devi (iter_index,
     nchunks = len(task_chunks)
     if mdata['model_devi_machine']['type'] == 'ALI':
         adata = mdata['ali_auth']
-        dispatcher = ALI(adata, mdata['model_devi_resources'], nchunks, work_path, cwd)
+        dispatcher = ALI(adata, mdata['model_devi_resources'], mdata['model_devi_machine'], nchunks, work_path)
         dispatcher.init()
     
     dispatcher.run_jobs(mdata['model_devi_resources'],
@@ -1507,7 +1507,7 @@ def run_fp_inner (iter_index,
     nchunks = len(task_chunks)
     if mdata['fp_machine']['type'] == 'ALI':
         adata = mdata['ali_auth']
-        dispatcher = ALI(adata, mdata['fp_resources'], nchunks, work_path, cwd)
+        dispatcher = ALI(adata, mdata['fp_resources'], mdata['fp_machine'], nchunks, work_path)
         dispatcher.init()
 
     dispatcher.run_jobs(mdata['fp_resources'],

From 6a000cfa5f7cd0076981004badad3826fa3c5c45 Mon Sep 17 00:00:00 2001
From: dingzhaohan <dingzhaohan@pku.edu.cn>
Date: Sat, 14 Dec 2019 20:48:56 +0800
Subject: [PATCH 097/109] optimize code

---
 dpgen/dispatcher/ALI.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/dpgen/dispatcher/ALI.py b/dpgen/dispatcher/ALI.py
index 7dde25688..e68d091ed 100644
--- a/dpgen/dispatcher/ALI.py
+++ b/dpgen/dispatcher/ALI.py
@@ -6,8 +6,7 @@
 from aliyunsdkecs.request.v20140526.DeleteInstancesRequest import DeleteInstancesRequest
 import time, json, os, glob
 from dpgen.dispatcher.Dispatcher import Dispatcher, _split_tasks
-from os import listdir
-from os.path import isfile, join
+from os.path import join
 
 determine_machine = {
     "gpu": {

From 3833cdfc4cacc7e4ed06b786fae466a723c7d0c0 Mon Sep 17 00:00:00 2001
From: AnguseZhang <529133328@qq.con>
Date: Sat, 14 Dec 2019 22:45:39 +0800
Subject: [PATCH 098/109] Fix bugs

---
 dpgen/dispatcher/Dispatcher.py | 11 +++++----
 dpgen/generator/run.py         | 26 +++++++++-----------
 dpgen/remote/decide_machine.py | 45 ++++++----------------------------
 3 files changed, 25 insertions(+), 57 deletions(-)

diff --git a/dpgen/dispatcher/Dispatcher.py b/dpgen/dispatcher/Dispatcher.py
index 60e50b0c9..915d7e201 100644
--- a/dpgen/dispatcher/Dispatcher.py
+++ b/dpgen/dispatcher/Dispatcher.py
@@ -155,9 +155,10 @@ def submit_jobs(self,
                 job_list.append(rjob)
                 ip = None
                 instance_id = None
-                if self.remote_profile['type'] == 'ALI':
-                    ip = self.remote_profile['hostname']
-                    instance_id = self.remote_profile['instance_id']
+                if "type" in self.remote_profile:
+                    if self.remote_profile['type'] == 'ALI':
+                        ip = self.remote_profile['hostname']
+                        instance_id = self.remote_profile['instance_id']
                 job_record.record_remote_context(cur_hash,                                                 
                                                  context.local_root, 
                                                  context.remote_root, 
@@ -294,7 +295,7 @@ def _new_record(self):
             }
 
 
-def make_dispatcher(mdata, job_record=None):
+def make_dispatcher(mdata):
     try:
         hostname = mdata['hostname']
         context_type = 'ssh'
@@ -312,5 +313,5 @@ def make_dispatcher(mdata, job_record=None):
     if lazy_local and context_type == 'local':
         dlog.info('Dispatcher switches to the lazy local mode')
         context_type = 'lazy-local'
-    disp = Dispatcher(mdata, context_type=context_type, batch_type=batch_type, job_record=job_record)
+    disp = Dispatcher(mdata, context_type=context_type, batch_type=batch_type)
     return disp
diff --git a/dpgen/generator/run.py b/dpgen/generator/run.py
index 35f27489f..f3c7c326e 100644
--- a/dpgen/generator/run.py
+++ b/dpgen/generator/run.py
@@ -52,10 +52,11 @@
 from dpgen.remote.group_jobs import group_slurm_jobs
 from dpgen.remote.group_jobs import group_local_jobs
 from dpgen.remote.decide_machine import decide_train_machine, decide_fp_machine, decide_model_devi_machine
-from dpgen.dispatcher.Dispatcher import Dispatcher, _split_tasks
+from dpgen.dispatcher.Dispatcher import Dispatcher, _split_tasks, make_dispatcher
 try:
     from dpgen.dispatcher.ALI import ALI
-except:
+except ImportError as e:
+    dlog.info(e)
     pass
 from dpgen.util import sepline
 from dpgen import ROOT_PATH
@@ -450,9 +451,8 @@ def run_train (iter_index,
 
     task_chunks = _split_tasks(run_tasks, train_group_size)
     nchunks = len(task_chunks)
-    if mdata['train_machine']['type'] == 'ALI':
-        adata = mdata['ali_auth']
-        dispatcher = ALI(adata, mdata['train_resources'], mdata['train_machine'], nchunks, work_path)
+    if "ali_auth" in mdata:
+        dispatcher = ALI(mdata['ali_auth'], mdata['train_resources'], mdata['train_machine'], nchunks, work_path)
         dispatcher.init()
         
     dispatcher.run_jobs(mdata['train_resources'],
@@ -916,9 +916,8 @@ def run_model_devi (iter_index,
     cwd = os.getcwd()
     task_chunks = _split_tasks(run_tasks, model_devi_group_size)
     nchunks = len(task_chunks)
-    if mdata['model_devi_machine']['type'] == 'ALI':
-        adata = mdata['ali_auth']
-        dispatcher = ALI(adata, mdata['model_devi_resources'], mdata['model_devi_machine'], nchunks, work_path)
+    if "ali_auth" in mdata:
+        dispatcher = ALI(mdata['ali_auth'], mdata['model_devi_resources'], mdata['model_devi_machine'], nchunks, work_path)
         dispatcher.init()
     
     dispatcher.run_jobs(mdata['model_devi_resources'],
@@ -1505,9 +1504,8 @@ def run_fp_inner (iter_index,
     cwd = os.getcwd()
     task_chunks = _split_tasks(run_tasks, fp_group_size)
     nchunks = len(task_chunks)
-    if mdata['fp_machine']['type'] == 'ALI':
-        adata = mdata['ali_auth']
-        dispatcher = ALI(adata, mdata['fp_resources'], mdata['fp_machine'], nchunks, work_path)
+    if "ali_auth" in mdata:
+        dispatcher = ALI(mdata['ali_auth'], mdata['fp_resources'], mdata['fp_machine'], nchunks, work_path)
         dispatcher.init()
 
     dispatcher.run_jobs(mdata['fp_resources'],
@@ -1931,7 +1929,7 @@ def run_iter (param_file, machine_file) :
             elif jj == 1 :
                 log_iter ("run_train", ii, jj)
                 mdata  = decide_train_machine(mdata)
-                if mdata['train_machine']['type'] == 'ALI':
+                if "ali_auth" in mdata: 
                     disp = []
                 else:
                     disp = make_dispatcher(mdata['train_machine'])
@@ -1947,7 +1945,7 @@ def run_iter (param_file, machine_file) :
             elif jj == 4 :
                 log_iter ("run_model_devi", ii, jj)
                 mdata = decide_model_devi_machine(mdata)
-                if mdata['model_devi_machine']['type'] == 'ALI':
+                if "ali_auth" in mdata:
                     disp = []
                 else:
                     disp = make_dispatcher(mdata['model_devi_machine'])
@@ -1961,7 +1959,7 @@ def run_iter (param_file, machine_file) :
             elif jj == 7 :
                 log_iter ("run_fp", ii, jj)
                 mdata = decide_fp_machine(mdata)
-                if mdata['fp_machine']['type'] == 'ALI':
+                if "ali_auth" in mdata:
                     disp = []
                 else:
                     disp = make_dispatcher(mdata['fp_machine'])
diff --git a/dpgen/remote/decide_machine.py b/dpgen/remote/decide_machine.py
index ef756d06a..8484731be 100644
--- a/dpgen/remote/decide_machine.py
+++ b/dpgen/remote/decide_machine.py
@@ -13,7 +13,6 @@
 def decide_train_machine(mdata):
 	if 'train' in mdata:
 	    continue_flag = False
-	    ## decide whether to use an existing machine
 	    if 'record.machine' in os.listdir():
 	        try:
 	            with open('record.machine', 'r') as _infile:
@@ -32,20 +31,7 @@ def decide_train_machine(mdata):
 	                    continue_flag = True
 	        except:
 	            pass
-	    if "hostname" not in mdata["train"][0]["machine"]:
-	    	mdata["train_machine"] = mdata["train"][0]["machine"]
-	    	mdata["train_resources"] = mdata["train"][0]["resources"]
-	    	if 'deepmd_path' in mdata["train"][0]:
-	    		mdata["deepmd_path"] = mdata["train"][0]["deepmd_path"]
-	    	elif 'python_path' in mdata["train"][0]:
-	    		mdata["python_path"] = mdata["train"][0]["python_path"]
-	    	if "group_size" in mdata["train"][0]:
-	    		mdata["train_group_size"] = mdata["train"][0]["group_size"]
-	    	if 'deepmd_version' in mdata["train"][0]:
-	    		mdata["deepmd_version"] = mdata["train"][0]["deepmd_version"]
-	    	continue_flag = True
-
-	    if mdata["train"][0]["machine"]["type"] == "ALI":
+	    if ("hostname" not in mdata["train"][0]["machine"]) or (len(mdata["train"]) == 1):
 	    	mdata["train_machine"] = mdata["train"][0]["machine"]
 	    	mdata["train_resources"] = mdata["train"][0]["resources"]
 	    	if 'deepmd_path' in mdata["train"][0]:
@@ -56,10 +42,10 @@ def decide_train_machine(mdata):
 	    		mdata["train_group_size"] = mdata["train"][0]["group_size"]
 	    	if 'deepmd_version' in mdata["train"][0]:
 	    		mdata["deepmd_version"] = mdata["train"][0]["deepmd_version"]
-	    	mdata["ali_auth"] = mdata["ali_auth"]
-	    	mdata["train_command"] = mdata["train"][0]["command"]
+	    	if 'command' in mdata["train"][0]:
+	    		mdata["train_command"] = mdata["train"][0]["command"]
 	    	continue_flag = True
-
+		
 	    pd_flag = False
 	    pd_count_list =[]
 	    # pd for pending job in slurm
@@ -148,7 +134,7 @@ def decide_model_devi_machine(mdata):
 	                    continue_flag = True
 	        except:
 	            pass
-	    if "hostname" not in mdata["model_devi"][0]["machine"]:
+	    if ("hostname" not in mdata["model_devi"][0]["machine"]) or (len(mdata["model_devi"]) == 1):
 	    	mdata["model_devi_machine"] = mdata["model_devi"][0]["machine"]
 	    	mdata["model_devi_resources"] = mdata["model_devi"][0]["resources"]
 	    	mdata["lmp_command"] = mdata["model_devi"][0]["command"]
@@ -156,14 +142,6 @@ def decide_model_devi_machine(mdata):
 	    	mdata["model_devi_group_size"] = mdata["model_devi"][0]["group_size"]
 	    	continue_flag = True
 
-	    if mdata["model_devi"][0]["machine"]["type"] == 'ALI':
-	    	mdata["model_devi_machine"] = mdata["model_devi"][0]["machine"]
-	    	mdata["model_devi_resources"] = mdata["model_devi"][0]["resources"]
-	    	mdata["lmp_command"] = mdata["model_devi"][0]["command"]
-	    	mdata["model_devi_group_size"] = mdata["model_devi"][0]["group_size"]
-	    	mdata["ali_auth"] = mdata["ali_auth"]
-	    	continue_flag = True
-
 	    pd_count_list =[]
 	    pd_flag = False
 	    if not continue_flag:
@@ -233,23 +211,15 @@ def decide_fp_machine(mdata):
 	                    continue_flag = True
 	        except:
 	            pass
-	    if "hostname" not in mdata["fp"][0]["machine"]:
-	    	mdata["fp_machine"] = mdata["fp"][0]["machine"]
-	    	mdata["fp_resources"] = mdata["fp"][0]["resources"]
-	    	mdata["fp_command"] = mdata["fp"][0]["command"]
-	    	#if "group_size" in mdata["train"][0]:
-	    	mdata["fp_group_size"] = mdata["fp"][0]["group_size"]
-	    	continue_flag = True
-
-	    if mdata["fp"][0]["machine"]["type"] == 'ALI':
+	    if ("hostname" not in mdata["fp"][0]["machine"]) or (len(mdata["fp"]) == 1):
 	    	mdata["fp_machine"] = mdata["fp"][0]["machine"]
 	    	mdata["fp_resources"] = mdata["fp"][0]["resources"]
 	    	mdata["fp_command"] = mdata["fp"][0]["command"]
 	    	#if "group_size" in mdata["train"][0]:
 	    	mdata["fp_group_size"] = mdata["fp"][0]["group_size"]
-	    	mdata["ali_auth"] = mdata["ali_auth"]
 	    	continue_flag = True
 
+	   
 	    pd_count_list =[]
 	    pd_flag = False
 	    if not continue_flag:
@@ -290,6 +260,5 @@ def decide_fp_machine(mdata):
 		            profile['group_size'] = mdata['fp_group_size']
 		            profile['command'] = mdata['fp_command']
 		            json.dump(profile, _outfile, indent = 4)
-#	print("mdata", mdata)
 	return mdata
 

From ee60862d2c1c1c9218c50369f0576ac67ffffcea Mon Sep 17 00:00:00 2001
From: AnguseZhang <529133328@qq.con>
Date: Sun, 15 Dec 2019 22:45:59 +0800
Subject: [PATCH 099/109] Make dispatcher inside run_task

---
 dpgen/generator/run.py | 50 +++++++++++++++++-------------------------
 1 file changed, 20 insertions(+), 30 deletions(-)

diff --git a/dpgen/generator/run.py b/dpgen/generator/run.py
index f3c7c326e..8c2390137 100644
--- a/dpgen/generator/run.py
+++ b/dpgen/generator/run.py
@@ -347,8 +347,7 @@ def detect_batch_size(batch_size, system=None):
 
 def run_train (iter_index,
                jdata,
-               mdata,
-               dispatcher) :
+               mdata) :
     # load json param
     numb_models = jdata['numb_models']
     # train_param = jdata['train_param']
@@ -454,7 +453,9 @@ def run_train (iter_index,
     if "ali_auth" in mdata:
         dispatcher = ALI(mdata['ali_auth'], mdata['train_resources'], mdata['train_machine'], nchunks, work_path)
         dispatcher.init()
-        
+    else:
+        dispatcher = make_dispatcher(mdata['train_machine'])
+
     dispatcher.run_jobs(mdata['train_resources'],
                         commands,
                         work_path,
@@ -872,8 +873,7 @@ def _make_model_devi_native(iter_index, jdata, mdata, conf_systems):
 
 def run_model_devi (iter_index,
                     jdata,
-                    mdata,
-                    dispatcher) :
+                    mdata) :
     #rmdlog.info("This module has been run !")
     lmp_exec = mdata['lmp_command']
     model_devi_group_size = mdata['model_devi_group_size']
@@ -919,7 +919,8 @@ def run_model_devi (iter_index,
     if "ali_auth" in mdata:
         dispatcher = ALI(mdata['ali_auth'], mdata['model_devi_resources'], mdata['model_devi_machine'], nchunks, work_path)
         dispatcher.init()
-    
+    else:
+        dispatcher = make_dispatcher(mdata['model_devi_machine'])
     dispatcher.run_jobs(mdata['model_devi_resources'],
                         commands,
                         work_path,
@@ -1478,7 +1479,6 @@ def _cp2k_check_fin(ii):
 def run_fp_inner (iter_index,
                   jdata,
                   mdata,
-                  dispatcher,
                   forward_files,
                   backward_files,
                   check_fin,
@@ -1507,7 +1507,8 @@ def run_fp_inner (iter_index,
     if "ali_auth" in mdata:
         dispatcher = ALI(mdata['ali_auth'], mdata['fp_resources'], mdata['fp_machine'], nchunks, work_path)
         dispatcher.init()
-
+    else:
+        dispatcher = make_dispatcher(mdata['fp_machine'])
     dispatcher.run_jobs(mdata['fp_resources'],
                         [fp_command],
                         work_path,
@@ -1520,10 +1521,10 @@ def run_fp_inner (iter_index,
                         errlog = log_file)
 
 
+
 def run_fp (iter_index,
             jdata,
-            mdata,
-            dispatcher) :
+            mdata) :
     fp_style = jdata['fp_style']
     fp_pp_files = jdata['fp_pp_files']
 
@@ -1539,24 +1540,24 @@ def run_fp (iter_index,
             forward_files.append('KPOINTS')
         else:
             forward_common_files=[]
-        run_fp_inner(iter_index, jdata, mdata, dispatcher, forward_files, backward_files, _vasp_check_fin,
+        run_fp_inner(iter_index, jdata, mdata,  forward_files, backward_files, _vasp_check_fin,
                      forward_common_files=forward_common_files)
     elif fp_style == "pwscf" :
         forward_files = ['input'] + fp_pp_files
         backward_files = ['output']
-        run_fp_inner(iter_index, jdata, mdata, dispatcher, forward_files, backward_files, _qe_check_fin, log_file = 'output')
+        run_fp_inner(iter_index, jdata, mdata,  forward_files, backward_files, _qe_check_fin, log_file = 'output')
     elif fp_style == "siesta":
         forward_files = ['input'] + fp_pp_files
         backward_files = ['output']
-        run_fp_inner(iter_index, jdata, mdata, dispatcher, forward_files, backward_files, _siesta_check_fin, log_file='output')
+        run_fp_inner(iter_index, jdata, mdata,  forward_files, backward_files, _siesta_check_fin, log_file='output')
     elif fp_style == "gaussian":
         forward_files = ['input']
         backward_files = ['output']
-        run_fp_inner(iter_index, jdata, mdata, dispatcher, forward_files, backward_files, _gaussian_check_fin, log_file = 'output')
+        run_fp_inner(iter_index, jdata, mdata, forward_files, backward_files, _gaussian_check_fin, log_file = 'output')
     elif fp_style == "cp2k":
         forward_files = ['input.inp', 'coord.xyz']
         backward_files = ['output']
-        run_fp_inner(iter_index, jdata, mdata, dispatcher, forward_files, backward_files, _cp2k_check_fin, log_file = 'output')
+        run_fp_inner(iter_index, jdata, mdata, forward_files, backward_files, _cp2k_check_fin, log_file = 'output')
     else :
         raise RuntimeError ("unsupported fp style")
 
@@ -1929,11 +1930,7 @@ def run_iter (param_file, machine_file) :
             elif jj == 1 :
                 log_iter ("run_train", ii, jj)
                 mdata  = decide_train_machine(mdata)
-                if "ali_auth" in mdata: 
-                    disp = []
-                else:
-                    disp = make_dispatcher(mdata['train_machine'])
-                run_train  (ii, jdata, mdata, disp)
+                run_train  (ii, jdata, mdata)
             elif jj == 2 :
                 log_iter ("post_train", ii, jj)
                 post_train (ii, jdata, mdata)
@@ -1945,11 +1942,8 @@ def run_iter (param_file, machine_file) :
             elif jj == 4 :
                 log_iter ("run_model_devi", ii, jj)
                 mdata = decide_model_devi_machine(mdata)
-                if "ali_auth" in mdata:
-                    disp = []
-                else:
-                    disp = make_dispatcher(mdata['model_devi_machine'])
-                run_model_devi (ii, jdata, mdata, disp)
+                run_model_devi (ii, jdata, mdata)
+                
             elif jj == 5 :
                 log_iter ("post_model_devi", ii, jj)
                 post_model_devi (ii, jdata, mdata)
@@ -1959,11 +1953,7 @@ def run_iter (param_file, machine_file) :
             elif jj == 7 :
                 log_iter ("run_fp", ii, jj)
                 mdata = decide_fp_machine(mdata)
-                if "ali_auth" in mdata:
-                    disp = []
-                else:
-                    disp = make_dispatcher(mdata['fp_machine'])
-                run_fp (ii, jdata, mdata, disp)
+                run_fp (ii, jdata, mdata)
             elif jj == 8 :
                 log_iter ("post_fp", ii, jj)
                 post_fp (ii, jdata)

From 00e136b712f7e85f15dee689b141b9f12d74e6eb Mon Sep 17 00:00:00 2001
From: zhaohan <32747623+dingzhaohan@users.noreply.github.com>
Date: Sun, 15 Dec 2019 23:30:47 +0800
Subject: [PATCH 100/109] Update ALI.py

optimize code
---
 dpgen/dispatcher/ALI.py | 58 ++++++++++++++++-------------------------
 1 file changed, 22 insertions(+), 36 deletions(-)

diff --git a/dpgen/dispatcher/ALI.py b/dpgen/dispatcher/ALI.py
index e68d091ed..5f1910f96 100644
--- a/dpgen/dispatcher/ALI.py
+++ b/dpgen/dispatcher/ALI.py
@@ -8,17 +8,6 @@
 from dpgen.dispatcher.Dispatcher import Dispatcher, _split_tasks
 from os.path import join
 
-determine_machine = {
-    "gpu": {
-            1: "ecs.gn5-c8g1.2xlarge",
-    },
-    "cpu": {
-            1: "ecs.c6.large",
-            4: "ecs.c6.2xlarge",
-            8: "ecs.c6.4xlarge"
-    }
-}
-
 class ALI():
     def __init__(self, adata, mdata_resources, mdata_machine, nchunks, work_path):
         self.ip_list = None
@@ -120,31 +109,28 @@ def create_machine(self):
         strategy = self.adata["pay_strategy"]
         pwd = self.adata["password"]
         regionID = self.mdata_machine['regionID']
-        instance_type = determine_machine[self.mdata_resources['partition']][self.mdata_resources['numb_gpu']]
-        if True:
-            client = AcsClient(AccessKey_ID,AccessKey_Secret, regionID)
-            request = RunInstancesRequest()
-            request.set_accept_format('json')
-            request.set_UniqueSuffix(True)
-            request.set_Password(pwd)
-            request.set_Amount(self.nchunks)
-            request.set_LaunchTemplateName(instance_type + '_cn-hangzhou_i')
-            response = client.do_action_with_exception(request)
-            response = json.loads(response)
-            self.instance_list = response["InstanceIdSets"]["InstanceIdSet"]
-            time.sleep(50)
-            request = DescribeInstancesRequest()
-            request.set_accept_format('json')
-            request.set_InstanceIds(self.instance_list)
-            response = client.do_action_with_exception(request)
-            response = json.loads(response)
-            ip = []
-            for i in range(len(response["Instances"]["Instance"])):
-                ip.append(response["Instances"]["Instance"][i]["PublicIpAddress"]['IpAddress'][0])
-            self.ip_list = ip
-        else:
-            return "create failed"
-
+        template_name = '%s_%s_%s' % (self.mdata_resources['partition'], self.mdata_resources['numb_gpu'], strategy)
+        client = AcsClient(AccessKey_ID,AccessKey_Secret, regionID)
+        request = RunInstancesRequest()
+        request.set_accept_format('json')
+        request.set_UniqueSuffix(True)
+        request.set_Password(pwd)
+        request.set_Amount(self.nchunks)
+        request.set_LaunchTemplateName(template_name)
+        response = client.do_action_with_exception(request)
+        response = json.loads(response)
+        self.instance_list = response["InstanceIdSets"]["InstanceIdSet"]
+        time.sleep(50)
+        request = DescribeInstancesRequest()
+        request.set_accept_format('json')
+        request.set_InstanceIds(self.instance_list)
+        response = client.do_action_with_exception(request)
+        response = json.loads(response)
+        ip = []
+        for i in range(len(response["Instances"]["Instance"])):
+            ip.append(response["Instances"]["Instance"][i]["PublicIpAddress"]['IpAddress'][0])
+        self.ip_list = ip
+        
     def delete_machine(self):
         AccessKey_ID = self.adata["AccessKey_ID"]
         AccessKey_Secret = self.adata["AccessKey_Secret"]

From 40b6357a57d6cbbe0d69de48f101444675870d92 Mon Sep 17 00:00:00 2001
From: AnguseZhang <529133328@qq.con>
Date: Mon, 16 Dec 2019 12:31:16 +0800
Subject: [PATCH 101/109] init surf from_poscar

---
 dpgen/data/surf.py                 | 75 +++++++++++++++++++++---------
 tests/data/POSCAR                  | 12 +++++
 tests/data/context_surf_poscar.py  |  6 +++
 tests/data/surf_poscar.json        | 46 ++++++++++++++++++
 tests/data/test_gen_surf_poscar.py | 48 +++++++++++++++++++
 tests/dispatcher/loc/task0/test0   |  1 +
 tests/dispatcher/loc/task0/test1   |  1 +
 tests/dispatcher/loc/task0/test2   |  1 +
 tests/dispatcher/loc/task1/test0   |  1 +
 tests/dispatcher/loc/task1/test1   |  1 +
 tests/dispatcher/loc/task1/test2   |  1 +
 11 files changed, 170 insertions(+), 23 deletions(-)
 create mode 100644 tests/data/POSCAR
 create mode 100644 tests/data/context_surf_poscar.py
 create mode 100644 tests/data/surf_poscar.json
 create mode 100644 tests/data/test_gen_surf_poscar.py
 create mode 100644 tests/dispatcher/loc/task0/test0
 create mode 100644 tests/dispatcher/loc/task0/test1
 create mode 100644 tests/dispatcher/loc/task0/test2
 create mode 100644 tests/dispatcher/loc/task1/test0
 create mode 100644 tests/dispatcher/loc/task1/test1
 create mode 100644 tests/dispatcher/loc/task1/test2

diff --git a/dpgen/data/surf.py b/dpgen/data/surf.py
index f28f6e3ba..78c6f918c 100644
--- a/dpgen/data/surf.py
+++ b/dpgen/data/surf.py
@@ -60,17 +60,29 @@ def replace (file_name, pattern, subst) :
 global_dirname_04 = '02.md'
 
 def out_dir_name(jdata) :
-    cell_type = jdata['cell_type']
-    elements = jdata['elements']
     super_cell = jdata['super_cell']    
 
-    ele_str = "surf."
-    for ii in elements:
-        ele_str = ele_str + ii.lower()
-    cell_str = "%02d" % (super_cell[0])
-    for ii in range(1,len(super_cell)) :
-        cell_str = cell_str + ("x%02d" % super_cell[ii])
-    return ele_str + '.' + cell_type + '.' + cell_str
+    from_poscar= jdata.get('from_poscar',False)
+
+    if  from_poscar:
+        from_poscar_path = jdata['from_poscar_path']
+        poscar_name = os.path.basename(from_poscar_path)
+        cell_str = "%02d" % (super_cell[0])
+        for ii in range(1,len(super_cell)) :
+            cell_str = cell_str + ("x%02d" % super_cell[ii])
+        return poscar_name + '.' + cell_str
+    else:
+        cell_type = jdata['cell_type']
+        elements = jdata['elements']
+        super_cell = jdata['super_cell']    
+
+        ele_str = "surf."
+        for ii in elements:
+            ele_str = ele_str + ii.lower()
+        cell_str = "%02d" % (super_cell[0])
+        for ii in range(1,len(super_cell)) :
+            cell_str = cell_str + ("x%02d" % super_cell[ii])
+        return ele_str + '.' + cell_type + '.' + cell_str
 
 def class_cell_type(jdata) :
     ct = jdata['cell_type']
@@ -177,33 +189,46 @@ def poscar_elong (poscar_in, poscar_out, elong) :
         fout.write("".join(lines))
 
 def make_unit_cell (jdata) :
-    latt = jdata['latt']
+
+    from_poscar= jdata.get('from_poscar',False)
+    if not from_poscar:
+       latt = jdata['latt']
+       cell_type = class_cell_type(jdata)
+
     out_dir = jdata['out_dir']
     path_uc = os.path.join(out_dir, global_dirname_02)
-    cell_type = class_cell_type(jdata)
 
     cwd = os.getcwd()    
     # for ii in scale :
     # path_work = create_path(os.path.join(path_uc, '%.3f' % ii))
     path_work = create_path(path_uc)    
     os.chdir(path_work)
-    with open('POSCAR.unit', 'w') as fp:
-        fp.write (cell_type.poscar_unit(latt))
+    if not from_poscar:
+       with open('POSCAR.unit', 'w') as fp:
+           fp.write (cell_type.poscar_unit(latt))
     os.chdir(cwd)        
 
 def make_super_cell_pymatgen (jdata) :
-    make_unit_cell(jdata)
 
+    make_unit_cell(jdata)
     out_dir = jdata['out_dir']
     path_uc = os.path.join(out_dir, global_dirname_02)
-    from_path = path_uc
-    from_file = os.path.join(from_path, 'POSCAR.unit')
-    ss = Structure.from_file(from_file)
-    # ase only support X type  element
-    for i in range(len(ss)):
-        ss[i]='X'
-    ss=AseAtomsAdaptor.get_atoms(ss)
 
+    from_poscar= jdata.get('from_poscar',False)
+
+    if from_poscar:
+        from_poscar_path = jdata['from_poscar_path']
+        poscar_name = os.path.basename(from_poscar_path)
+        ss = Structure.from_file(poscar_name)
+    else:
+        from_path = path_uc
+        from_file = os.path.join(from_path, 'POSCAR.unit')
+        ss = Structure.from_file(from_file)
+        # ase only support X type  element
+        for i in range(len(ss)):
+            ss[i]='X'
+
+    ss=AseAtomsAdaptor.get_atoms(ss)
 
     all_millers = jdata['millers']
     path_sc = os.path.join(out_dir, global_dirname_02)
@@ -256,6 +281,7 @@ def place_element (jdata) :
     super_cell = jdata['super_cell']
     cell_type = class_cell_type(jdata)
     elements = jdata['elements']
+    from_poscar= jdata.get('from_poscar',False)
     path_sc = os.path.join(out_dir, global_dirname_02)
     path_pe = os.path.join(out_dir, global_dirname_02)    
     path_sc = os.path.abspath(path_sc)
@@ -285,7 +311,10 @@ def place_element (jdata) :
             path_work = os.path.join(path_surf, comb_name)
             create_path(path_work)
             pos_out = os.path.join(path_work, 'POSCAR')
-            poscar_ele(pos_in, pos_out, elements, ii)
+            if from_poscar:
+               shutil.copy2( pos_in, pos_out) 
+            else:
+               poscar_ele(pos_in, pos_out, elements, ii)
             poscar_shuffle(pos_out, pos_out)
 
 def make_vasp_relax (jdata) :
@@ -528,7 +557,7 @@ def gen_init_surf(args):
     out_dir = out_dir_name(jdata)
     jdata['out_dir'] = out_dir
     dlog.info ("# working dir %s" % out_dir)
-
+    
     if args.MACHINE is not None:
        # Decide a proper machine
        mdata = decide_fp_machine(mdata)
diff --git a/tests/data/POSCAR b/tests/data/POSCAR
new file mode 100644
index 000000000..c8163bf00
--- /dev/null
+++ b/tests/data/POSCAR
@@ -0,0 +1,12 @@
+FCC : a = 4.100000 
+4.0999999999999996
+1.0000000000000000 0.0000000000000000 0.0000000000000000
+0.0000000000000000 1.0000000000000000 0.0000000000000000
+0.0000000000000000 0.0000000000000000 1.0000000000000000
+Al
+4
+Direct
+0.0000000000000000 0.0000000000000000 0.0000000000000000
+0.5000000000000000 0.5000000000000000 0.0000000000000000
+0.5000000000000000 0.0000000000000000 0.5000000000000000
+0.0000000000000000 0.5000000000000000 0.5000000000000000
diff --git a/tests/data/context_surf_poscar.py b/tests/data/context_surf_poscar.py
new file mode 100644
index 000000000..89bc492cc
--- /dev/null
+++ b/tests/data/context_surf_poscar.py
@@ -0,0 +1,6 @@
+import sys,os
+sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..')))
+from dpgen.data.surf import *
+
+param_file = 'surf_poscar.json'
+
diff --git a/tests/data/surf_poscar.json b/tests/data/surf_poscar.json
new file mode 100644
index 000000000..b5727ff83
--- /dev/null
+++ b/tests/data/surf_poscar.json
@@ -0,0 +1,46 @@
+{
+  "stages": [
+    1,
+    2
+  ],
+  "cell_type": "fcc",
+  "from_poscar":	true,
+  "from_poscar_path":	"POSCAR",
+  "super_cell": [
+    1,
+    1,
+    1
+  ],
+  "layer_numb": 3,
+  "vacuum_max": 9,
+  "vacuum_resol": [
+    0.5,
+    1
+  ],
+  "mid_point": 4.0,
+  "head_ratio": 0.6,
+  "vacuum_numb": 20,
+  "millers": [
+    [
+      1,
+      0,
+      0
+    ]
+  ],
+  "elements": [
+    "Al"
+  ],
+  "potcars": [
+    "./POTCAR"
+  ],
+  "relax_incar" : "INCAR_metal_rlx_low",
+  "scale": [
+    1.0
+  ],
+  "skip_relax": true,
+  "pert_numb": 5,
+  "pert_box": 0.03,
+  "pert_atom": 0.01,
+  "coll_ndata": 5000,
+  "_comment": "that's all"
+}
diff --git a/tests/data/test_gen_surf_poscar.py b/tests/data/test_gen_surf_poscar.py
new file mode 100644
index 000000000..5c49b0578
--- /dev/null
+++ b/tests/data/test_gen_surf_poscar.py
@@ -0,0 +1,48 @@
+import os,sys,json,glob,shutil
+import unittest
+from pymatgen import Structure
+
+sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
+__package__ = 'data'
+from .context import setUpModule
+from .context_surf_poscar import *
+
+class TestGenSurfPOSCAR(unittest.TestCase):
+    def setUp(self):
+        self.surfs=["surf-100"] 
+        self.elongs=["elong-0.500", "elong-1.000", "elong-1.500", "elong-2.000", "elong-2.500",\
+             "elong-3.000", "elong-3.500", "elong-4.000", "elong-5.000", "elong-6.000",\
+             "elong-7.000", "elong-8.000" ]
+        with open (param_file, 'r') as fp :
+            jdata = json.load (fp)
+        out_dir = out_dir_name(jdata)
+        jdata['out_dir'] = out_dir
+        self.root_dir= out_dir
+        create_path(out_dir)
+        make_super_cell_pymatgen(jdata)
+        place_element(jdata)
+        make_vasp_relax(jdata)
+        make_scale(jdata)
+        pert_scaled(jdata)
+
+    def tearDown(self):
+        shutil.rmtree(self.root_dir)
+
+    def test(self):
+        surfs=glob.glob("POSCAR.01x01x01/01.scale_pert/surf*")
+        surfs=[ii.split('/')[-1] for ii in surfs]
+        surfs.sort()
+        self.assertEqual(surfs,self.surfs)
+        poscars=glob.glob("POSCAR.01x01x01/00.place_ele/surf*/sys*/POSCAR")
+        for poscar in poscars:
+            surf=poscar.split('/')[-3]
+            st1=Structure.from_file(surf+'.POSCAR')
+            st2=Structure.from_file(poscar)
+            self.assertEqual(st1,st2)
+        
+        for surf in self.surfs:
+            elongs=glob.glob("POSCAR.01x01x01/01.scale_pert/"+surf+"/sys-*/scale-1.000/el*")
+            elongs=[ii.split('/')[-1] for ii in elongs]
+            elongs.sort()
+            self.assertEqual(elongs,self.elongs)
+             
diff --git a/tests/dispatcher/loc/task0/test0 b/tests/dispatcher/loc/task0/test0
new file mode 100644
index 000000000..8ec3f00a6
--- /dev/null
+++ b/tests/dispatcher/loc/task0/test0
@@ -0,0 +1 @@
+47fdc6b6-bb6b-4146-90b3-463f304642f7
\ No newline at end of file
diff --git a/tests/dispatcher/loc/task0/test1 b/tests/dispatcher/loc/task0/test1
new file mode 100644
index 000000000..5d6b4c73c
--- /dev/null
+++ b/tests/dispatcher/loc/task0/test1
@@ -0,0 +1 @@
+b10e29bd-d748-4dcf-ba81-9ef3fb5e14c8
\ No newline at end of file
diff --git a/tests/dispatcher/loc/task0/test2 b/tests/dispatcher/loc/task0/test2
new file mode 100644
index 000000000..bcabbd476
--- /dev/null
+++ b/tests/dispatcher/loc/task0/test2
@@ -0,0 +1 @@
+3ed98c8b-6c0c-421d-bc0f-221bb37a614b
\ No newline at end of file
diff --git a/tests/dispatcher/loc/task1/test0 b/tests/dispatcher/loc/task1/test0
new file mode 100644
index 000000000..fae5db3ee
--- /dev/null
+++ b/tests/dispatcher/loc/task1/test0
@@ -0,0 +1 @@
+dc0388e6-3ed2-42d7-aedb-44b679919a17
\ No newline at end of file
diff --git a/tests/dispatcher/loc/task1/test1 b/tests/dispatcher/loc/task1/test1
new file mode 100644
index 000000000..f674fdc7b
--- /dev/null
+++ b/tests/dispatcher/loc/task1/test1
@@ -0,0 +1 @@
+5b4bbd6e-8178-45a4-a284-33c05281bfad
\ No newline at end of file
diff --git a/tests/dispatcher/loc/task1/test2 b/tests/dispatcher/loc/task1/test2
new file mode 100644
index 000000000..00e47a42b
--- /dev/null
+++ b/tests/dispatcher/loc/task1/test2
@@ -0,0 +1 @@
+befedb43-0546-4991-8fb5-a830417f9f60
\ No newline at end of file

From a394a2a16ed0324423c1ba15c5d9678df7d49a4a Mon Sep 17 00:00:00 2001
From: zhaohan <32747623+dingzhaohan@users.noreply.github.com>
Date: Mon, 16 Dec 2019 13:05:33 +0800
Subject: [PATCH 102/109] use "username_task" to clarify different instances

---
 dpgen/dispatcher/ALI.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/dpgen/dispatcher/ALI.py b/dpgen/dispatcher/ALI.py
index 5f1910f96..5ceb5ad8d 100644
--- a/dpgen/dispatcher/ALI.py
+++ b/dpgen/dispatcher/ALI.py
@@ -110,11 +110,13 @@ def create_machine(self):
         pwd = self.adata["password"]
         regionID = self.mdata_machine['regionID']
         template_name = '%s_%s_%s' % (self.mdata_resources['partition'], self.mdata_resources['numb_gpu'], strategy)
+        instance_name = self.adata["instance_name"]
         client = AcsClient(AccessKey_ID,AccessKey_Secret, regionID)
         request = RunInstancesRequest()
         request.set_accept_format('json')
         request.set_UniqueSuffix(True)
         request.set_Password(pwd)
+        request.set_InstanceName(instance_name)
         request.set_Amount(self.nchunks)
         request.set_LaunchTemplateName(template_name)
         response = client.do_action_with_exception(request)

From 8e8f38e1394b855f5d5c6499b2a2ee4cbcacb481 Mon Sep 17 00:00:00 2001
From: Jinzh Zeng <jinzhe.zeng@rutgers.edu>
Date: Mon, 16 Dec 2019 02:40:51 -0500
Subject: [PATCH 103/109] bugfix

---
 dpgen/simplify/simplify.py | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/dpgen/simplify/simplify.py b/dpgen/simplify/simplify.py
index ddf3aa2ed..8219a19c2 100644
--- a/dpgen/simplify/simplify.py
+++ b/dpgen/simplify/simplify.py
@@ -265,6 +265,8 @@ def make_fp(iter_index, jdata, mdata):
     picked_data_path = os.path.join(iter_name, model_devi_name, picked_data_name)
     if jdata.get("labeled", False):
         dlog.info("already labeled, skip make_fp and link data directly")
+        os.symlink(os.path.abspath(picked_data_path), os.path.abspath(
+            os.path.join(work_path, "task.%03d" % 0)))
         os.symlink(os.path.abspath(picked_data_path), os.path.abspath(
             os.path.join(work_path, "data.%03d" % 0)))
         return
@@ -394,12 +396,18 @@ def run_iter(param_file, machine_file):
                 make_fp(ii, jdata, mdata)
             elif jj == 7:
                 log_iter("run_fp", ii, jj)
-                mdata = decide_fp_machine(mdata)
-                disp = make_dispatcher(mdata['fp_machine'])
-                run_fp(ii, jdata, mdata, disp)
+                if jdata.get("labeled", False):
+                    dlog.info("already have labeled data, skip run_fp")
+                else:
+                    mdata = decide_fp_machine(mdata)
+                    disp = make_dispatcher(mdata['fp_machine'])
+                    run_fp(ii, jdata, mdata, disp)
             elif jj == 8:
                 log_iter("post_fp", ii, jj)
-                post_fp(ii, jdata)
+                if jdata.get("labeled", False):
+                    dlog.info("already have labeled data, skip post_fp")
+                else:
+                    post_fp(ii, jdata)
             else:
                 raise RuntimeError("unknown task %d, something wrong" % jj)
             record_iter(record, ii, jj)

From c2cf1601aaa98a40bba53e7b0769cc30342c601b Mon Sep 17 00:00:00 2001
From: Jinzh Zeng <jinzhe.zeng@rutgers.edu>
Date: Wed, 18 Dec 2019 02:38:29 -0500
Subject: [PATCH 104/109] change python_path to python_test_path since the
 directory of training and testing may not be the same

---
 dpgen/simplify/simplify.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dpgen/simplify/simplify.py b/dpgen/simplify/simplify.py
index 8219a19c2..97bf9dc3e 100644
--- a/dpgen/simplify/simplify.py
+++ b/dpgen/simplify/simplify.py
@@ -146,7 +146,7 @@ def run_model_devi(iter_index, jdata, mdata, dispatcher):
         )
         # TODO: support 0.x?
         command = "{python} -m deepmd test -m {model} -s {system} -n {numb_test} -d {detail_file}".format(
-            python=mdata['python_path'],
+            python=mdata['python_test_path'],
             model=mm,
             system=rest_data_name,
             numb_test=data_size,

From 71b42c997f2fb0181648fa448c81fa09fa3599a4 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Sat, 21 Dec 2019 01:11:51 -0500
Subject: [PATCH 105/109] support nopbc

---
 dpgen/generator/run.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/dpgen/generator/run.py b/dpgen/generator/run.py
index 7fa034773..de4ebc552 100644
--- a/dpgen/generator/run.py
+++ b/dpgen/generator/run.py
@@ -415,17 +415,21 @@ def run_train (iter_index,
             for single_sys in os.listdir(os.path.join(ii)):
                 trans_comm_data += glob.glob(os.path.join(ii, single_sys, 'set.*'))
                 trans_comm_data += glob.glob(os.path.join(ii, single_sys, 'type.raw'))
+                trans_comm_data += glob.glob(os.path.join(ii, single_sys, 'nopbc'))
         else:
             trans_comm_data += glob.glob(os.path.join(ii, 'set.*'))
             trans_comm_data += glob.glob(os.path.join(ii, 'type.raw'))
+            trans_comm_data += glob.glob(os.path.join(ii, 'nopbc'))
     for ii in fp_data :
         if jdata.get('use_clusters', False):
             for single_sys in os.listdir(os.path.join(ii)):
                 trans_comm_data += glob.glob(os.path.join(ii, single_sys, 'set.*'))
                 trans_comm_data += glob.glob(os.path.join(ii, single_sys, 'type.raw'))
+                trans_comm_data += glob.glob(os.path.join(ii, single_sys, 'nopbc'))
         else:
             trans_comm_data += glob.glob(os.path.join(ii, 'set.*'))
             trans_comm_data += glob.glob(os.path.join(ii, 'type.raw'))
+            trans_comm_data += glob.glob(os.path.join(ii, 'nopbc'))
     os.chdir(cwd)
 
     try:

From 6ebfdf38fb73124e2d59bfb8362948582a1a1e86 Mon Sep 17 00:00:00 2001
From: Han Wang <wang_han@iapcm.ac.cn>
Date: Sat, 21 Dec 2019 18:00:15 +0800
Subject: [PATCH 106/109] fix bugs in unittest

---
 tests/dispatcher/loc/task0/test0    | 1 -
 tests/dispatcher/loc/task0/test1    | 1 -
 tests/dispatcher/loc/task0/test2    | 1 -
 tests/dispatcher/loc/task1/test0    | 1 -
 tests/dispatcher/loc/task1/test1    | 1 -
 tests/dispatcher/loc/task1/test2    | 1 -
 tests/generator/test_nbands_esti.py | 8 ++++++--
 7 files changed, 6 insertions(+), 8 deletions(-)
 delete mode 100644 tests/dispatcher/loc/task0/test0
 delete mode 100644 tests/dispatcher/loc/task0/test1
 delete mode 100644 tests/dispatcher/loc/task0/test2
 delete mode 100644 tests/dispatcher/loc/task1/test0
 delete mode 100644 tests/dispatcher/loc/task1/test1
 delete mode 100644 tests/dispatcher/loc/task1/test2

diff --git a/tests/dispatcher/loc/task0/test0 b/tests/dispatcher/loc/task0/test0
deleted file mode 100644
index 8ec3f00a6..000000000
--- a/tests/dispatcher/loc/task0/test0
+++ /dev/null
@@ -1 +0,0 @@
-47fdc6b6-bb6b-4146-90b3-463f304642f7
\ No newline at end of file
diff --git a/tests/dispatcher/loc/task0/test1 b/tests/dispatcher/loc/task0/test1
deleted file mode 100644
index 5d6b4c73c..000000000
--- a/tests/dispatcher/loc/task0/test1
+++ /dev/null
@@ -1 +0,0 @@
-b10e29bd-d748-4dcf-ba81-9ef3fb5e14c8
\ No newline at end of file
diff --git a/tests/dispatcher/loc/task0/test2 b/tests/dispatcher/loc/task0/test2
deleted file mode 100644
index bcabbd476..000000000
--- a/tests/dispatcher/loc/task0/test2
+++ /dev/null
@@ -1 +0,0 @@
-3ed98c8b-6c0c-421d-bc0f-221bb37a614b
\ No newline at end of file
diff --git a/tests/dispatcher/loc/task1/test0 b/tests/dispatcher/loc/task1/test0
deleted file mode 100644
index fae5db3ee..000000000
--- a/tests/dispatcher/loc/task1/test0
+++ /dev/null
@@ -1 +0,0 @@
-dc0388e6-3ed2-42d7-aedb-44b679919a17
\ No newline at end of file
diff --git a/tests/dispatcher/loc/task1/test1 b/tests/dispatcher/loc/task1/test1
deleted file mode 100644
index f674fdc7b..000000000
--- a/tests/dispatcher/loc/task1/test1
+++ /dev/null
@@ -1 +0,0 @@
-5b4bbd6e-8178-45a4-a284-33c05281bfad
\ No newline at end of file
diff --git a/tests/dispatcher/loc/task1/test2 b/tests/dispatcher/loc/task1/test2
deleted file mode 100644
index 00e47a42b..000000000
--- a/tests/dispatcher/loc/task1/test2
+++ /dev/null
@@ -1 +0,0 @@
-befedb43-0546-4991-8fb5-a830417f9f60
\ No newline at end of file
diff --git a/tests/generator/test_nbands_esti.py b/tests/generator/test_nbands_esti.py
index 3e1c39756..82497912b 100644
--- a/tests/generator/test_nbands_esti.py
+++ b/tests/generator/test_nbands_esti.py
@@ -71,5 +71,9 @@ def test_get_res(self):
             'ele_temp': 20000.0,
             'nbands': 81
         }
-        self.assertEqual(res, ref)
-    
+        self.assertEqual(res['natoms'], ref['natoms'])
+        self.assertAlmostEqual(res['vol'], ref['vol'])
+        self.assertAlmostEqual(res['nvalence'][0], ref['nvalence'][0])
+        self.assertEqual(len(res['nvalence']), len(ref['nvalence']))
+        self.assertAlmostEqual(res['ele_temp'], ref['ele_temp'])
+        self.assertEqual(res['nbands'], ref['nbands'])    

From 157b15c91b51fa7aa41c5d96b73f4f53064b0456 Mon Sep 17 00:00:00 2001
From: Han Wang <wang_han@iapcm.ac.cn>
Date: Sat, 21 Dec 2019 18:03:09 +0800
Subject: [PATCH 107/109] require less places for ele temp

---
 tests/generator/test_nbands_esti.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/generator/test_nbands_esti.py b/tests/generator/test_nbands_esti.py
index 82497912b..b5edf447b 100644
--- a/tests/generator/test_nbands_esti.py
+++ b/tests/generator/test_nbands_esti.py
@@ -75,5 +75,5 @@ def test_get_res(self):
         self.assertAlmostEqual(res['vol'], ref['vol'])
         self.assertAlmostEqual(res['nvalence'][0], ref['nvalence'][0])
         self.assertEqual(len(res['nvalence']), len(ref['nvalence']))
-        self.assertAlmostEqual(res['ele_temp'], ref['ele_temp'])
+        self.assertAlmostEqual(res['ele_temp'], ref['ele_temp'], places = 1)
         self.assertEqual(res['nbands'], ref['nbands'])    

From 1a44d86e0a9811ea895aeccdae0d2c9824b0b243 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Sat, 21 Dec 2019 20:08:10 -0500
Subject: [PATCH 108/109] fix bug since run_train and run_fp changed

---
 dpgen/simplify/simplify.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/dpgen/simplify/simplify.py b/dpgen/simplify/simplify.py
index 97bf9dc3e..a44afb51a 100644
--- a/dpgen/simplify/simplify.py
+++ b/dpgen/simplify/simplify.py
@@ -374,7 +374,7 @@ def run_iter(param_file, machine_file):
                 log_iter("run_train", ii, jj)
                 mdata = decide_train_machine(mdata)
                 disp = make_dispatcher(mdata['train_machine'])
-                run_train(ii, jdata, mdata, disp)
+                run_train(ii, jdata, mdata)
             elif jj == 2:
                 log_iter("post_train", ii, jj)
                 post_train(ii, jdata, mdata)
@@ -401,7 +401,7 @@ def run_iter(param_file, machine_file):
                 else:
                     mdata = decide_fp_machine(mdata)
                     disp = make_dispatcher(mdata['fp_machine'])
-                    run_fp(ii, jdata, mdata, disp)
+                    run_fp(ii, jdata, mdata)
             elif jj == 8:
                 log_iter("post_fp", ii, jj)
                 if jdata.get("labeled", False):

From 18d9f63f37fd1641509588110e13216a7500f622 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Sat, 4 Jan 2020 21:35:13 -0500
Subject: [PATCH 109/109] fix bug

---
 dpgen/simplify/simplify.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dpgen/simplify/simplify.py b/dpgen/simplify/simplify.py
index a44afb51a..ba99b0b6c 100644
--- a/dpgen/simplify/simplify.py
+++ b/dpgen/simplify/simplify.py
@@ -255,7 +255,7 @@ def post_model_devi(iter_index, jdata, mdata):
     # dump the accurate data -- to another directory
     sys_data_path = os.path.join(work_path, accurate_data_name)
     sys_accurate.to_deepmd_raw(sys_data_path)
-    sys_accurate.to_deepmd_npy(sys_data_path, set_size=rest_idx.size)
+    sys_accurate.to_deepmd_npy(sys_data_path, set_size=sys_accurate.get_nframes())
 
 
 def make_fp(iter_index, jdata, mdata):