Skip to content

Commit

Permalink
Calypso speedup by refactorizing model deviation. (#217)
Browse files Browse the repository at this point in the history
1. add a slice when running op `run_caly_model_devi`
2. support `distanceofion` in dict format, e.g. distanceofion = {"Mg":
0.8, "Al": 0.9}, the value denotes the radius of each element, and the
mindistance of different element is equal to the sum of value of element
Mg and Al and multipy 0.7.
3. support providing a list of list format `name_of_element`, e.g.
name_of_element = [["Mg", "Al", "Cu", "Li"], ["La", "Ce", "Ca", "Li"],
["H"]], code will randomly choose one of element in each list to set up
a input file of calypso.

<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->
## Summary by CodeRabbit

- **New Features**
- Introduced a new operation `PrepCalyModelDevi` for organizing and
preparing model deviations.
- Enhanced the concurrent learning operation to include the preparation
of Calypso model deviations.
- Updated the Calypso task generation logic to handle different types of
input parameters more flexibly.

- **Bug Fixes**
- Improved error handling and logging in the Calypso operation to better
manage failures.

- **Tests**
- Added new tests to validate the creation and handling of Calypso tasks
with various configurations.
- Implemented unit tests for the new `PrepCalyModelDevi` class to ensure
its functionality.

- **Refactor**
- Reorganized logic and removed outdated code in several operations to
streamline processes and improve clarity.
<!-- end of auto-generated comment: release notes by coderabbit.ai -->
  • Loading branch information
wangzyphysics authored Apr 30, 2024
1 parent 8bdea14 commit 07df321
Show file tree
Hide file tree
Showing 14 changed files with 503 additions and 86 deletions.
7 changes: 6 additions & 1 deletion dpgen2/entrypoint/submit.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@
CollRunCaly,
PrepCalyDPOptim,
PrepCalyInput,
PrepCalyModelDevi,
PrepDPTrain,
PrepLmp,
RunCalyDPOptim,
Expand Down Expand Up @@ -183,6 +184,7 @@ def make_concurrent_learning_op(
"prep-run-calypso",
prep_caly_input_op=PrepCalyInput,
caly_evo_step_op=caly_evo_step_op,
prep_caly_model_devi_op=PrepCalyModelDevi,
run_caly_model_devi_op=RunCalyModelDevi,
prep_config=prep_explore_config,
run_config=run_explore_config,
Expand Down Expand Up @@ -803,8 +805,10 @@ def get_superop(key):
return re.sub("prep-dp-optim-[0-9]*-[0-9]*", "prep-run-explore", key)
elif "run-dp-optim-" in key:
return re.sub("run-dp-optim-[0-9]*-[0-9]*-[0-9]*", "prep-run-explore", key)
elif "prep-caly-model-devi" in key:
return key.replace("prep-caly-model-devi", "prep-run-explore")
elif "run-caly-model-devi" in key:
return key.replace("run-caly-model-devi", "prep-run-explore")
return re.sub("run-caly-model-devi-[0-9]*", "prep-run-explore", key)
return None


Expand Down Expand Up @@ -849,6 +853,7 @@ def get_resubmit_keys(
"collect-run-calypso",
"prep-dp-optim",
"run-dp-optim",
"prep-caly-model-devi",
"run-caly-model-devi",
"prep-run-explore",
"prep-lmp",
Expand Down
110 changes: 105 additions & 5 deletions dpgen2/exploration/task/caly_task_group.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
import copy
import logging
import random
from typing import (
List,
)

import numpy as np

from dpgen2.constants import (
calypso_check_opt_file,
calypso_input_file,
Expand All @@ -18,6 +23,55 @@
ExplorationTaskGroup,
)

atomic_symbols = (
'X', # placeholder
'H', 'He',
'Li', 'Be', 'B', 'C', 'N', 'O', 'F', 'Ne',
'Na', 'Mg', 'Al', 'Si', 'P', 'S', 'Cl', 'Ar',
'K', 'Ca', 'Sc', 'Ti', 'V', 'Cr', 'Mn', 'Fe', 'Co', 'Ni', 'Cu', 'Zn', 'Ga', 'Ge', 'As', 'Se', 'Br', 'Kr',
'Rb', 'Sr', 'Y', 'Zr', 'Nb', 'Mo', 'Tc', 'Ru', 'Rh', 'Pd', 'Ag', 'Cd', 'In', 'Sn', 'Sb', 'Te', 'I', 'Xe',
'Cs', 'Ba',
'La', 'Ce', 'Pr', 'Nd', 'Pm', 'Sm', 'Eu', 'Gd', 'Tb', 'Dy', 'Ho', 'Er', 'Tm', 'Yb', 'Lu',
'Hf', 'Ta', 'W', 'Re', 'Os', 'Ir', 'Pt', 'Au', 'Hg', 'Tl', 'Pb', 'Bi', 'Po', 'At', 'Rn',
'Fr', 'Ra',
'Ac','Th', 'Pa', 'U', 'Np', 'Pu', 'Am', 'Cm', 'Bk', 'Cf', 'Es', 'Fm', 'Md', 'No', 'Lr',
'Rf', 'Db', 'Sg', 'Bh', 'Hs', 'Mt', 'Ds', 'Rg', 'Cn', 'Nh', 'Fl', 'Mc', 'Lv', 'Ts', 'Og',
) # fmt: skip
atomic_number_map = {key: value for value, key in enumerate(atomic_symbols)}
# Covalent radii from:
#
# Covalent radii revisited,
# Beatriz Cordero, Verónica Gómez, Ana E. Platero-Prats, Marc Revés,
# Jorge Echeverría, Eduard Cremades, Flavia Barragán and Santiago Alvarez,
# Dalton Trans., 2008, 2832-2838 DOI:10.1039/B801115J
UNKN = 0.2
covalent_radii = [
# X, placeholder
UNKN,
# H He
0.31, 0.28,
# Li Be B C N O F Ne
1.28, 0.96, 0.84, 0.76, 0.71, 0.66, 0.57, 0.58,
# Na Mg Al Si P S Cl Ar
1.66, 1.41, 1.21, 1.11, 1.07, 1.05, 1.02, 1.06,
# K Ca Sc Ti V Cr Mn Fe Co Ni Cu Zn Ga Ge As Se Br Kr
2.03, 1.76, 1.70, 1.60, 1.53, 1.39, 1.39, 1.32, 1.26, 1.24, 1.32, 1.22, 1.22, 1.20, 1.19, 1.20, 1.20, 1.16,
# Rb Sr Y Zr Nb Mo Tc Ru Rh Pd Au Cd In Sn Sb Te I Xe
2.20, 1.95, 1.90, 1.75, 1.64, 1.54, 1.47, 1.46, 1.42, 1.39, 1.45, 1.44, 1.42, 1.39, 1.39, 1.38, 1.39, 1.40,
# Cs Ba
2.44, 2.15,
# La Ce Pr Nd Pm Sm Eu Gd Tb Dy Ho Er Tm Yb Lu
2.07, 2.04, 2.03, 2.01, 1.99, 1.98, 1.98, 1.96, 1.94, 1.92, 1.92, 1.89, 1.90, 1.87, 1.87,
# Hf Ta W Re Os Ir Pt Au Hg Tl Pb Bi Po At Rn
1.75, 1.70, 1.62, 1.51, 1.44, 1.41, 1.36, 1.36, 1.32, 1.45, 1.46, 1.48, 1.40, 1.50, 1.50,
# Fr Ra
2.60, 2.21,
# Ac Th Pa U Np Pu Am Cm Bk Cf Es Fm Md No Lr
2.15, 2.06, 2.00, 1.96, 1.90, 1.87, 1.80, 1.69, UNKN, UNKN, UNKN, UNKN, UNKN, UNKN, UNKN,
# Rf Db Sg Bh Hs Mt Ds Rg Cn Nh Fl Mc Lv Ts Og
UNKN, UNKN, UNKN, UNKN, UNKN, UNKN, UNKN, UNKN, UNKN, UNKN, UNKN, UNKN, UNKN, UNKN, UNKN,
] # fmt: skip


class CalyTaskGroup(ExplorationTaskGroup):
def __init__(self):
Expand All @@ -27,9 +81,9 @@ def set_params(
self,
numb_of_species,
name_of_atoms,
atomic_number,
numb_of_atoms,
distance_of_ions,
distance_of_ions=None,
atomic_number=None,
pop_size: int = 30,
max_step: int = 5,
system_name: str = "CALYPSO",
Expand Down Expand Up @@ -58,10 +112,56 @@ def set_params(
Set calypso parameters
"""
self.numb_of_species = numb_of_species
self.name_of_atoms = name_of_atoms
self.atomic_number = atomic_number
self.numb_of_atoms = numb_of_atoms
self.distance_of_ions = distance_of_ions

if isinstance(name_of_atoms, list) and all(
[isinstance(i, list) for i in name_of_atoms]
):
overlap = set(name_of_atoms[0])
for temp in name_of_atoms[1:]:
overlap = overlap & set(temp)

if any(map(lambda s: (set(s) - overlap) == 0, name_of_atoms)):
raise ValueError(
f"Any sub-list should not equal with intersection, e.g. [[A,B,C], [B,C], [C]] is not allowed."
)

while True:
choice = []
for _atoms in name_of_atoms:
value = random.choice(_atoms)
logging.info(
f"randomly choose {value} from {_atoms}, already choose: {choice}"
)
if value in choice:
break
choice.append(value)
else:
break
self.name_of_atoms = choice
logging.info(f"The final choice is {self.name_of_atoms}")
self.atomic_number = [atomic_symbols.index(i) for i in self.name_of_atoms]
else:
self.name_of_atoms = name_of_atoms
self.atomic_number = atomic_number

if isinstance(distance_of_ions, dict):
updated_table = copy.deepcopy(covalent_radii)
for key, value in distance_of_ions.items():
updated_table[atomic_number_map[key]] = value

temp_distance_mtx = np.zeros((numb_of_species, numb_of_species))
for i in range(numb_of_species):
for j in range(numb_of_species):
temp_distance_mtx[i][j] = round(
updated_table[atomic_number_map[self.name_of_atoms[i]]] * 0.7
+ updated_table[atomic_number_map[self.name_of_atoms[j]]] * 0.7,
2,
)
self.distance_of_ions = temp_distance_mtx
else:
self.distance_of_ions = distance_of_ions

self.pop_size = pop_size
self.max_step = max_step
self.system_name = system_name
Expand Down
2 changes: 1 addition & 1 deletion dpgen2/exploration/task/calypso/caly_input.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,7 @@ def check():
def make_calypso_input(
numb_of_species: int,
name_of_atoms: List[str],
atomic_number: List[int],
atomic_number,
numb_of_atoms: List[int],
distance_of_ions,
pop_size: int = 30,
Expand Down
6 changes: 3 additions & 3 deletions dpgen2/exploration/task/make_task_group_from_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -294,7 +294,7 @@ def caly_task_grp_args():
Argument(
"atomic_number",
list,
optional=False,
optional=True,
doc="atomic number of each element.",
),
Argument(
Expand All @@ -305,8 +305,8 @@ def caly_task_grp_args():
),
Argument(
"distance_of_ions",
list,
optional=False,
[list, dict],
optional=True,
doc="the distance matrix between different elements.",
),
Argument(
Expand Down
3 changes: 3 additions & 0 deletions dpgen2/op/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@
from .prep_caly_input import (
PrepCalyInput,
)
from .prep_caly_model_devi import (
PrepCalyModelDevi,
)
from .prep_dp_train import (
PrepDPTrain,
)
Expand Down
47 changes: 23 additions & 24 deletions dpgen2/op/collect_run_caly.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,11 +141,6 @@ def execute(
results = (
ip["results"].resolve() if ip["results"] is not None else ip["results"]
)
# opt_results_dir = (
# ip["opt_results_dir"].resolve()
# if ip["opt_results_dir"] is not None
# else ip["opt_results_dir"]
# )
opt_results_dir = []
if ip["opt_results_dir"] is not None:
for temp in ip["opt_results_dir"]:
Expand All @@ -162,26 +157,30 @@ def execute(
prep_last_calypso_file(step, results, opt_results_dir, qhull_input, vsc)
# copy input.dat
Path(input_file.name).symlink_to(input_file)
# run calypso
command = " ".join([command, ">", calypso_log_name])
ret, out, err = run_command(command, shell=True)
if ret != 0:
logging.error(
"".join(
(
"calypso failed\n",
"command was: ",
command,
"out msg: ",
out,
"\n",
"err msg: ",
err,
"\n",

finished = "true" if int(cnt_num) == int(max_step) else "false"

if finished == "false":
# run calypso
command = " ".join([command, ">", calypso_log_name])
ret, out, err = run_command(command, shell=True)
if ret != 0:
logging.error(
"".join(
(
"calypso failed\n",
"command was: ",
command,
"out msg: ",
out,
"\n",
"err msg: ",
err,
"\n",
)
)
)
)
raise TransientError("calypso failed")
raise TransientError("calypso failed")

poscar_dir = Path("poscar_dir")
poscar_dir.mkdir(parents=True, exist_ok=True)
Expand All @@ -190,7 +189,7 @@ def execute(
shutil.copyfile(poscar, target)

step = Path("step").read_text().strip()
finished = "true" if int(cnt_num) == int(max_step) else "false"
# finished = "true" if int(cnt_num) == int(max_step) else "false"

if not Path("test_qconvex.in").exists():
Path("test_qconvex.in").write_text("")
Expand Down
5 changes: 3 additions & 2 deletions dpgen2/op/prep_caly_dp_optim.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,8 +106,6 @@ def execute(
- `caly_check_opt_file` : (`Path`)
"""
group_size = ip["template_slice_config"]["group_size"]

finished = ip["finished"]

work_dir = Path(ip["task_name"])
Expand All @@ -119,6 +117,9 @@ def execute(
caly_check_opt_file = _caly_check_opt_file.resolve()
poscar_list = [poscar.resolve() for poscar in poscar_dir.rglob("POSCAR_*")]
poscar_list = sorted(poscar_list, key=lambda x: int(x.name.strip("POSCAR_")))

group_size = ip["template_slice_config"].get("group_size", len(poscar_list))

model_name = "frozen_model.pb"
model_list = [model.resolve() for model in models_dir.rglob(model_name)]
if len(model_list) == 0:
Expand Down
Loading

0 comments on commit 07df321

Please sign in to comment.