diff --git a/.flake8 b/.flake8 index 7fba368..03f0e08 100644 --- a/.flake8 +++ b/.flake8 @@ -1,7 +1,8 @@ [flake8] per-file-ignores = __init__.py:F401, - default_settings.py:E501,W505 + default_settings.py:E501,W505, + templates.py:E501 ignore = E402, E203, diff --git a/tools/misc/analysis.py b/tools/misc/analysis.py index 412c0c2..1ac34fb 100755 --- a/tools/misc/analysis.py +++ b/tools/misc/analysis.py @@ -1,9 +1,7 @@ import numpy as np import pandas as pd -from scm.plams.interfaces.molecule.ase import fromASE as ASEtoSCM -from scm.plams.interfaces.molecule.ase import toASE as SCMtoASE -from ase.geometry.analysis import Analysis from ase.geometry import get_distances +from ase.geometry.analysis import Analysis from ase.neighborlist import neighbor_list @@ -16,39 +14,39 @@ def _elements_combinations(elemnts): def _get_bond_elements(ase_mol): bonds_el = [] - elemnts = np.unique( ase_mol.get_chemical_symbols() ) + elemnts = np.unique(ase_mol.get_chemical_symbols()) el_i, el_j = _elements_combinations(elemnts) analysis = Analysis(ase_mol) for ai, aj in zip(el_i, el_j): bonds = analysis.get_bonds(ai, aj, unique=True) if not len(bonds[0]) > 0: continue - bonds_el.append( (ai, aj) ) + bonds_el.append((ai, aj)) return bonds_el -def get_all_bonds(ase_mol, verbose = False): - results_df = pd.DataFrame(columns=['ai', 'aj', 'Nbonds', 'bonds']) - elemnts = np.unique( ase_mol.get_chemical_symbols() ) +def get_all_bonds(ase_mol, verbose=False): + results_df = pd.DataFrame(columns=["ai", "aj", "Nbonds", "bonds"]) + elemnts = np.unique(ase_mol.get_chemical_symbols()) el_i, el_j = _elements_combinations(elemnts) analysis = Analysis(ase_mol) for ai, aj in zip(el_i, el_j): bonds = analysis.get_bonds(ai, aj, unique=True) if not len(bonds[0]) > 0: continue - results_df.loc[f'{ai}-{aj}', 'ai'] = ai - results_df.loc[f'{ai}-{aj}', 'aj'] = aj - results_df.loc[f'{ai}-{aj}', 'bonds'] = bonds[0] - results_df.loc[f'{ai}-{aj}', 'Nbonds'] = len(bonds[0]) + results_df.loc[f"{ai}-{aj}", "ai"] = ai + results_df.loc[f"{ai}-{aj}", "aj"] = aj + results_df.loc[f"{ai}-{aj}", "bonds"] = bonds[0] + results_df.loc[f"{ai}-{aj}", "Nbonds"] = len(bonds[0]) if verbose: print(f"Found {len(bonds[0])} {ai}-{aj} bonds") return results_df -def get_all_angles(ase_mol, verbose = False, values=False): - results_df = pd.DataFrame(columns=['ai', 'aj', 'ak', 'Nangles', 'angles', 'angles_values']) - elemnts = np.unique( ase_mol.get_chemical_symbols() ) - el_product = np.meshgrid(elemnts, elemnts) +def get_all_angles(ase_mol, verbose=False, values=False): + results_df = pd.DataFrame(columns=["ai", "aj", "ak", "Nangles", "angles", "angles_values"]) + elemnts = np.unique(ase_mol.get_chemical_symbols()) + # el_product = np.meshgrid(elemnts, elemnts) el_j = elemnts el_i, el_k = _elements_combinations(elemnts) analysis = Analysis(ase_mol) @@ -57,65 +55,66 @@ def get_all_angles(ase_mol, verbose = False, values=False): angles = analysis.get_angles(ai, aj, ak, unique=True) if not len(angles[0]) > 0: continue - results_df.loc[f'{ai}-{aj}-{ak}', 'ai'] = ai - results_df.loc[f'{ai}-{aj}-{ak}', 'aj'] = aj - results_df.loc[f'{ai}-{aj}-{ak}', 'ak'] = ak - results_df.loc[f'{ai}-{aj}-{ak}', 'angles'] = angles[0] + results_df.loc[f"{ai}-{aj}-{ak}", "ai"] = ai + results_df.loc[f"{ai}-{aj}-{ak}", "aj"] = aj + results_df.loc[f"{ai}-{aj}-{ak}", "ak"] = ak + results_df.loc[f"{ai}-{aj}-{ak}", "angles"] = angles[0] if values: - results_df.loc[f'{ai}-{aj}-{ak}', 'angles_values'] = analysis.get_values(angles, mic=True) # [analysis.get_angle_value(0, ijk) for ijk in angles] - results_df.loc[f'{ai}-{aj}-{ak}', 'Nangles'] = len(angles[0]) + results_df.loc[f"{ai}-{aj}-{ak}", "angles_values"] = analysis.get_values( + angles, mic=True + ) # [analysis.get_angle_value(0, ijk) for ijk in angles] + results_df.loc[f"{ai}-{aj}-{ak}", "Nangles"] = len(angles[0]) if verbose: print(f"Found {len(angles[0])} {ai}-{aj}-{ak} angles") return results_df -def get_all_dihedrals(ase_mol, verbose = False): - results_df = pd.DataFrame(columns=['ai', 'aj', 'ak', 'al', 'Ndihedrals', 'dihedrals']) - elemnts = np.unique( ase_mol.get_chemical_symbols() ) - el_product = np.meshgrid(elemnts, elemnts) +def get_all_dihedrals(ase_mol, verbose=False): + results_df = pd.DataFrame(columns=["ai", "aj", "ak", "al", "Ndihedrals", "dihedrals"]) + elemnts = np.unique(ase_mol.get_chemical_symbols()) + # el_product = np.meshgrid(elemnts, elemnts) bonds = _get_bond_elements(ase_mol) el_i, el_l = _elements_combinations(elemnts) analysis = Analysis(ase_mol) - for (aj, ak) in bonds: + for aj, ak in bonds: for ai, al in zip(el_i, el_l): dihedrals = analysis.get_dihedrals(ai, aj, ak, al, unique=True) if not len(dihedrals[0]) > 0: continue - results_df.loc[f'{ai}-{aj}-{ak}-{al}', 'ai'] = ai - results_df.loc[f'{ai}-{aj}-{ak}-{al}', 'aj'] = aj - results_df.loc[f'{ai}-{aj}-{ak}-{al}', 'ak'] = ak - results_df.loc[f'{ai}-{aj}-{ak}-{al}', 'al'] = al - results_df.loc[f'{ai}-{aj}-{ak}-{al}', 'dihedrals'] = dihedrals[0] - results_df.loc[f'{ai}-{aj}-{ak}-{al}', 'Ndihedrals'] = len(dihedrals[0]) + results_df.loc[f"{ai}-{aj}-{ak}-{al}", "ai"] = ai + results_df.loc[f"{ai}-{aj}-{ak}-{al}", "aj"] = aj + results_df.loc[f"{ai}-{aj}-{ak}-{al}", "ak"] = ak + results_df.loc[f"{ai}-{aj}-{ak}-{al}", "al"] = al + results_df.loc[f"{ai}-{aj}-{ak}-{al}", "dihedrals"] = dihedrals[0] + results_df.loc[f"{ai}-{aj}-{ak}-{al}", "Ndihedrals"] = len(dihedrals[0]) if verbose: print(f"Found {len(dihedrals[0])} {ai}-{aj}-{ak}-{al} dihedrals") return results_df def get_distances_atom(ase_mol, r, mic=False, vector=False): - R = ase_mol.arrays['positions'] - if isinstance(r, int): - p1 = [R[r]] - else: - p1 = [r] - p2 = R - cell = None - pbc = None - if mic: - cell = ase_mol.cell - pbc = ase_mol.pbc - D, D_len = get_distances(p1, p2, cell=cell, pbc=pbc) - if vector: - D.shape = (-1, 3) - return D - else: - D_len.shape = (-1,) - return D_len - + R = ase_mol.arrays["positions"] + if isinstance(r, int): + p1 = [R[r]] + else: + p1 = [r] + p2 = R + cell = None + pbc = None + if mic: + cell = ase_mol.cell + pbc = ase_mol.pbc + D, D_len = get_distances(p1, p2, cell=cell, pbc=pbc) + if vector: + D.shape = (-1, 3) + return D + else: + D_len.shape = (-1,) + return D_len + - -def get_surf_atoms(ase_mol, cutoff =3., coord_cutoff = 0.5): - i = neighbor_list('i', ase_mol, cutoff) +def get_surf_atoms(ase_mol, cutoff=3.0, coord_cutoff=0.5): + i = neighbor_list("i", ase_mol, cutoff) coord = np.bincount(i) coord = (coord - coord.min()) / (coord.max() - coord.min()) - return np.where(coord <= coord_cutoff)[0] \ No newline at end of file + return np.where(coord <= coord_cutoff)[0] diff --git a/tools/misc/read_history.py b/tools/misc/read_history.py index 5c0ab91..8ec8dd4 100755 --- a/tools/misc/read_history.py +++ b/tools/misc/read_history.py @@ -1,9 +1,11 @@ import os -import pandas as pd -import numpy as np -from multiprocessing import Pool, Value, Manager -from multiprocessing.managers import ValueProxy, ListProxy from ctypes import c_char_p +from multiprocessing import Manager, Pool +from multiprocessing.managers import ListProxy, ValueProxy + +import numpy as np +import pandas as pd + def progressbar(percentage: float, info: str = "", screen: int = 100, status: str = "info"): if percentage is None: @@ -49,18 +51,18 @@ def progressbar(percentage: float, info: str = "", screen: int = 100, status: st def chunkify(file_path, nlines=1024): fileEnd = os.path.getsize(file_path) - with open(file_path, 'r') as file_obj: + with open(file_path) as file_obj: chunkEnd = file_obj.tell() while True: chunkStart = chunkEnd n = 0 while True: - line = file_obj.readline() + line = file_obj.readline() # noqa chunkEnd = file_obj.tell() n += 1 if n >= nlines: break - yield chunkEnd/fileEnd, chunkStart, chunkEnd - chunkStart + yield chunkEnd / fileEnd, chunkStart, chunkEnd - chunkStart if chunkEnd >= fileEnd: break @@ -76,60 +78,78 @@ def storing_in_dataframe(file_path, interface_names, interface_x, interface_isac interface_x = list(interface_x) if isinstance(interface_isactive, ListProxy): interface_isactive = list(interface_isactive) - dataset_df = pd.DataFrame(index=np.arange(nlines), columns=['index', 'fx', 'time'] + interface_names) + dataset_df = pd.DataFrame(index=np.arange(nlines), columns=["index", "fx", "time"] + interface_names) active_index = np.where(interface_isactive)[0] - n=0 - with open(file_path, 'r') as file_obj: + n = 0 + with open(file_path) as file_obj: file_obj.seek(chunkStart) lines = file_obj.read(chunkSize).splitlines() for line in lines: - if line[0] == '#': + if line[0] == "#": continue data = line.split() - dataset_df.loc[n, 'index'] = int(data[0]) - dataset_df.loc[n, 'fx'] = float(data[1]) - dataset_df.loc[n, 'time'] = float(data[2]) - dataset_df.iloc[n, 3:] = interface_x + dataset_df.loc[n, "index"] = int(data[0]) + dataset_df.loc[n, "fx"] = float(data[1]) + dataset_df.loc[n, "time"] = float(data[2]) + dataset_df.iloc[n, 3:] = interface_x dataset_df.iloc[n, active_index + 3] = [float(d) for d in data[3:]] - n +=1 - dataset_df = dataset_df.dropna(axis=0, how='all') + n += 1 + dataset_df = dataset_df.dropna(axis=0, how="all") return dataset_df def get_run_history(file_path, interface, workers=4, nlines=1024): - dataframes = [] + dataframes = [] # noqa pool = Pool(processes=workers) jobs = [] # shared memory ojects manager = Manager() - s_nlines = manager.Value('i', nlines) + s_nlines = manager.Value("i", nlines) s_file_path = manager.Value(c_char_p, file_path) s_interface_names = manager.list(interface.names) s_interface_x = manager.list(interface.x) s_interface_isactive = manager.list(interface.is_active) for i, (perc_, chunkStart_, chunkSize_) in enumerate(chunkify(file_path, nlines=nlines)): - jobs.append( + jobs.append( pool.apply_async( - storing_in_dataframe, args=(s_file_path, s_interface_names, s_interface_x, - s_interface_isactive, chunkStart_, chunkSize_, s_nlines) - ) - ) #file_path, interface_names, interface_x, interface_isactive, chunkStart, chunkSize, nlines - print(f'Warmup (jobs: {i+1}, cores used {workers})'.ljust(35) + progressbar(0.0, status='info', screen=65), - end='\r', flush=True) + storing_in_dataframe, + args=( + s_file_path, + s_interface_names, + s_interface_x, + s_interface_isactive, + chunkStart_, + chunkSize_, + s_nlines, + ), + ) + ) # file_path, interface_names, interface_x, interface_isactive, chunkStart, chunkSize, nlines + print( + f"Warmup (jobs: {i+1}, cores used {workers})".ljust(35) + progressbar(0.0, status="info", screen=65), + end="\r", + flush=True, + ) N_jobs_rest = N_jobs = len(jobs) results = [] n = 0 - print(f'Reading (Jobs remaining: {N_jobs_rest})'.ljust(35) + progressbar(n / N_jobs * 100.0, status='info', screen=65), - end='\r', flush=True) + print( + f"Reading (Jobs remaining: {N_jobs_rest})".ljust(35) + + progressbar(n / N_jobs * 100.0, status="info", screen=65), + end="\r", + flush=True, + ) while N_jobs_rest > 0: job = jobs.pop(0) - results.append( job.get() ) + results.append(job.get()) n += 1 N_jobs_rest = len(jobs) - print(f'Reading (Jobs remaining: {N_jobs_rest})'.ljust(35) + progressbar(n / N_jobs * 100.0, status='info', screen=65), - end='\r', flush=True) - print(f'DONE'.ljust(35) + progressbar(n / N_jobs * 100.0, status='info', screen=65), - end='\n', flush=True) + print( + f"Reading (Jobs remaining: {N_jobs_rest})".ljust(35) + + progressbar(n / N_jobs * 100.0, status="info", screen=65), + end="\r", + flush=True, + ) + print("DONE".ljust(35) + progressbar(n / N_jobs * 100.0, status="info", screen=65), end="\n", flush=True) pool.close() pool.join() - return results \ No newline at end of file + return results diff --git a/tools/misc/templates.py b/tools/misc/templates.py index 7b5c279..1bf629b 100644 --- a/tools/misc/templates.py +++ b/tools/misc/templates.py @@ -1,3 +1,5 @@ +import os + PARAM_SCRIP_TEMPLATE = f"""# PARAM Script print(f" START ".center(80, '=')) import os, sys, pickle @@ -82,9 +84,9 @@ def load_pickle(pickle_file): PARAM_SCRIP_TEMPLATE += """ # Parallel Strategy parallel_kwargs = {{ - 'parametervectors': {:d}, - 'jobs': {:d}, - 'processes': {:d}, + 'parametervectors': {:d}, + 'jobs': {:d}, + 'processes': {:d}, 'threads': {:d} }} parallel = params.ParallelLevels(**parallel_kwargs) @@ -100,7 +102,7 @@ def load_pickle(pickle_file): # 'validation' : .15, # Percentage of the training set to be used as validation, or another DataSet() instance 'parallel' : parallel, 'callbacks' : callbacks, - 'plams_workdir_path' : tmp_dir, + 'plams_workdir_path' : tmp_dir, # 'batch_size' : 32, # At every iteration, only compute a maximum of `batch_size` properties # 'use_pipe' : True, # Use the AMSPipe interface where possible # 'n_cores' : None, # Use N CPU cores for the execution of jobs during an optimization. Defaults to the number of physical cores @@ -147,4 +149,4 @@ def load_pickle(pickle_file): which python python ./{4:s} -""" \ No newline at end of file +"""