Skip to content

Commit

Permalink
⚒️ pre-commit
Browse files Browse the repository at this point in the history
  • Loading branch information
paolodeangelis committed Dec 6, 2023
1 parent ef2c63d commit a7568b1
Show file tree
Hide file tree
Showing 4 changed files with 118 additions and 96 deletions.
3 changes: 2 additions & 1 deletion .flake8
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
[flake8]
per-file-ignores =
__init__.py:F401,
default_settings.py:E501,W505
default_settings.py:E501,W505,
templates.py:E501
ignore =
E402,
E203,
Expand Down
111 changes: 55 additions & 56 deletions tools/misc/analysis.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
import numpy as np
import pandas as pd
from scm.plams.interfaces.molecule.ase import fromASE as ASEtoSCM
from scm.plams.interfaces.molecule.ase import toASE as SCMtoASE
from ase.geometry.analysis import Analysis
from ase.geometry import get_distances
from ase.geometry.analysis import Analysis
from ase.neighborlist import neighbor_list


Expand All @@ -16,39 +14,39 @@ def _elements_combinations(elemnts):

def _get_bond_elements(ase_mol):
bonds_el = []
elemnts = np.unique( ase_mol.get_chemical_symbols() )
elemnts = np.unique(ase_mol.get_chemical_symbols())
el_i, el_j = _elements_combinations(elemnts)
analysis = Analysis(ase_mol)
for ai, aj in zip(el_i, el_j):
bonds = analysis.get_bonds(ai, aj, unique=True)
if not len(bonds[0]) > 0:
continue
bonds_el.append( (ai, aj) )
bonds_el.append((ai, aj))
return bonds_el


def get_all_bonds(ase_mol, verbose = False):
results_df = pd.DataFrame(columns=['ai', 'aj', 'Nbonds', 'bonds'])
elemnts = np.unique( ase_mol.get_chemical_symbols() )
def get_all_bonds(ase_mol, verbose=False):
results_df = pd.DataFrame(columns=["ai", "aj", "Nbonds", "bonds"])
elemnts = np.unique(ase_mol.get_chemical_symbols())
el_i, el_j = _elements_combinations(elemnts)
analysis = Analysis(ase_mol)
for ai, aj in zip(el_i, el_j):
bonds = analysis.get_bonds(ai, aj, unique=True)
if not len(bonds[0]) > 0:
continue
results_df.loc[f'{ai}-{aj}', 'ai'] = ai
results_df.loc[f'{ai}-{aj}', 'aj'] = aj
results_df.loc[f'{ai}-{aj}', 'bonds'] = bonds[0]
results_df.loc[f'{ai}-{aj}', 'Nbonds'] = len(bonds[0])
results_df.loc[f"{ai}-{aj}", "ai"] = ai
results_df.loc[f"{ai}-{aj}", "aj"] = aj
results_df.loc[f"{ai}-{aj}", "bonds"] = bonds[0]
results_df.loc[f"{ai}-{aj}", "Nbonds"] = len(bonds[0])
if verbose:
print(f"Found {len(bonds[0])} {ai}-{aj} bonds")
return results_df


def get_all_angles(ase_mol, verbose = False, values=False):
results_df = pd.DataFrame(columns=['ai', 'aj', 'ak', 'Nangles', 'angles', 'angles_values'])
elemnts = np.unique( ase_mol.get_chemical_symbols() )
el_product = np.meshgrid(elemnts, elemnts)
def get_all_angles(ase_mol, verbose=False, values=False):
results_df = pd.DataFrame(columns=["ai", "aj", "ak", "Nangles", "angles", "angles_values"])
elemnts = np.unique(ase_mol.get_chemical_symbols())
# el_product = np.meshgrid(elemnts, elemnts)
el_j = elemnts
el_i, el_k = _elements_combinations(elemnts)
analysis = Analysis(ase_mol)
Expand All @@ -57,65 +55,66 @@ def get_all_angles(ase_mol, verbose = False, values=False):
angles = analysis.get_angles(ai, aj, ak, unique=True)
if not len(angles[0]) > 0:
continue
results_df.loc[f'{ai}-{aj}-{ak}', 'ai'] = ai
results_df.loc[f'{ai}-{aj}-{ak}', 'aj'] = aj
results_df.loc[f'{ai}-{aj}-{ak}', 'ak'] = ak
results_df.loc[f'{ai}-{aj}-{ak}', 'angles'] = angles[0]
results_df.loc[f"{ai}-{aj}-{ak}", "ai"] = ai
results_df.loc[f"{ai}-{aj}-{ak}", "aj"] = aj
results_df.loc[f"{ai}-{aj}-{ak}", "ak"] = ak
results_df.loc[f"{ai}-{aj}-{ak}", "angles"] = angles[0]
if values:
results_df.loc[f'{ai}-{aj}-{ak}', 'angles_values'] = analysis.get_values(angles, mic=True) # [analysis.get_angle_value(0, ijk) for ijk in angles]
results_df.loc[f'{ai}-{aj}-{ak}', 'Nangles'] = len(angles[0])
results_df.loc[f"{ai}-{aj}-{ak}", "angles_values"] = analysis.get_values(
angles, mic=True
) # [analysis.get_angle_value(0, ijk) for ijk in angles]
results_df.loc[f"{ai}-{aj}-{ak}", "Nangles"] = len(angles[0])
if verbose:
print(f"Found {len(angles[0])} {ai}-{aj}-{ak} angles")
return results_df


def get_all_dihedrals(ase_mol, verbose = False):
results_df = pd.DataFrame(columns=['ai', 'aj', 'ak', 'al', 'Ndihedrals', 'dihedrals'])
elemnts = np.unique( ase_mol.get_chemical_symbols() )
el_product = np.meshgrid(elemnts, elemnts)
def get_all_dihedrals(ase_mol, verbose=False):
results_df = pd.DataFrame(columns=["ai", "aj", "ak", "al", "Ndihedrals", "dihedrals"])
elemnts = np.unique(ase_mol.get_chemical_symbols())
# el_product = np.meshgrid(elemnts, elemnts)
bonds = _get_bond_elements(ase_mol)
el_i, el_l = _elements_combinations(elemnts)
analysis = Analysis(ase_mol)
for (aj, ak) in bonds:
for aj, ak in bonds:
for ai, al in zip(el_i, el_l):
dihedrals = analysis.get_dihedrals(ai, aj, ak, al, unique=True)
if not len(dihedrals[0]) > 0:
continue
results_df.loc[f'{ai}-{aj}-{ak}-{al}', 'ai'] = ai
results_df.loc[f'{ai}-{aj}-{ak}-{al}', 'aj'] = aj
results_df.loc[f'{ai}-{aj}-{ak}-{al}', 'ak'] = ak
results_df.loc[f'{ai}-{aj}-{ak}-{al}', 'al'] = al
results_df.loc[f'{ai}-{aj}-{ak}-{al}', 'dihedrals'] = dihedrals[0]
results_df.loc[f'{ai}-{aj}-{ak}-{al}', 'Ndihedrals'] = len(dihedrals[0])
results_df.loc[f"{ai}-{aj}-{ak}-{al}", "ai"] = ai
results_df.loc[f"{ai}-{aj}-{ak}-{al}", "aj"] = aj
results_df.loc[f"{ai}-{aj}-{ak}-{al}", "ak"] = ak
results_df.loc[f"{ai}-{aj}-{ak}-{al}", "al"] = al
results_df.loc[f"{ai}-{aj}-{ak}-{al}", "dihedrals"] = dihedrals[0]
results_df.loc[f"{ai}-{aj}-{ak}-{al}", "Ndihedrals"] = len(dihedrals[0])
if verbose:
print(f"Found {len(dihedrals[0])} {ai}-{aj}-{ak}-{al} dihedrals")
return results_df


def get_distances_atom(ase_mol, r, mic=False, vector=False):
R = ase_mol.arrays['positions']
if isinstance(r, int):
p1 = [R[r]]
else:
p1 = [r]
p2 = R
cell = None
pbc = None
if mic:
cell = ase_mol.cell
pbc = ase_mol.pbc
D, D_len = get_distances(p1, p2, cell=cell, pbc=pbc)
if vector:
D.shape = (-1, 3)
return D
else:
D_len.shape = (-1,)
return D_len
R = ase_mol.arrays["positions"]
if isinstance(r, int):
p1 = [R[r]]
else:
p1 = [r]
p2 = R
cell = None
pbc = None
if mic:
cell = ase_mol.cell
pbc = ase_mol.pbc
D, D_len = get_distances(p1, p2, cell=cell, pbc=pbc)
if vector:
D.shape = (-1, 3)
return D
else:
D_len.shape = (-1,)
return D_len



def get_surf_atoms(ase_mol, cutoff =3., coord_cutoff = 0.5):
i = neighbor_list('i', ase_mol, cutoff)
def get_surf_atoms(ase_mol, cutoff=3.0, coord_cutoff=0.5):
i = neighbor_list("i", ase_mol, cutoff)
coord = np.bincount(i)
coord = (coord - coord.min()) / (coord.max() - coord.min())
return np.where(coord <= coord_cutoff)[0]
return np.where(coord <= coord_cutoff)[0]
88 changes: 54 additions & 34 deletions tools/misc/read_history.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
import os
import pandas as pd
import numpy as np
from multiprocessing import Pool, Value, Manager
from multiprocessing.managers import ValueProxy, ListProxy
from ctypes import c_char_p
from multiprocessing import Manager, Pool
from multiprocessing.managers import ListProxy, ValueProxy

import numpy as np
import pandas as pd


def progressbar(percentage: float, info: str = "", screen: int = 100, status: str = "info"):
if percentage is None:
Expand Down Expand Up @@ -49,18 +51,18 @@ def progressbar(percentage: float, info: str = "", screen: int = 100, status: st

def chunkify(file_path, nlines=1024):
fileEnd = os.path.getsize(file_path)
with open(file_path, 'r') as file_obj:
with open(file_path) as file_obj:
chunkEnd = file_obj.tell()
while True:
chunkStart = chunkEnd
n = 0
while True:
line = file_obj.readline()
line = file_obj.readline() # noqa
chunkEnd = file_obj.tell()
n += 1
if n >= nlines:
break
yield chunkEnd/fileEnd, chunkStart, chunkEnd - chunkStart
yield chunkEnd / fileEnd, chunkStart, chunkEnd - chunkStart
if chunkEnd >= fileEnd:
break

Expand All @@ -76,60 +78,78 @@ def storing_in_dataframe(file_path, interface_names, interface_x, interface_isac
interface_x = list(interface_x)
if isinstance(interface_isactive, ListProxy):
interface_isactive = list(interface_isactive)
dataset_df = pd.DataFrame(index=np.arange(nlines), columns=['index', 'fx', 'time'] + interface_names)
dataset_df = pd.DataFrame(index=np.arange(nlines), columns=["index", "fx", "time"] + interface_names)
active_index = np.where(interface_isactive)[0]
n=0
with open(file_path, 'r') as file_obj:
n = 0
with open(file_path) as file_obj:
file_obj.seek(chunkStart)
lines = file_obj.read(chunkSize).splitlines()
for line in lines:
if line[0] == '#':
if line[0] == "#":
continue
data = line.split()
dataset_df.loc[n, 'index'] = int(data[0])
dataset_df.loc[n, 'fx'] = float(data[1])
dataset_df.loc[n, 'time'] = float(data[2])
dataset_df.iloc[n, 3:] = interface_x
dataset_df.loc[n, "index"] = int(data[0])
dataset_df.loc[n, "fx"] = float(data[1])
dataset_df.loc[n, "time"] = float(data[2])
dataset_df.iloc[n, 3:] = interface_x
dataset_df.iloc[n, active_index + 3] = [float(d) for d in data[3:]]
n +=1
dataset_df = dataset_df.dropna(axis=0, how='all')
n += 1
dataset_df = dataset_df.dropna(axis=0, how="all")
return dataset_df


def get_run_history(file_path, interface, workers=4, nlines=1024):
dataframes = []
dataframes = [] # noqa
pool = Pool(processes=workers)
jobs = []
# shared memory ojects
manager = Manager()
s_nlines = manager.Value('i', nlines)
s_nlines = manager.Value("i", nlines)
s_file_path = manager.Value(c_char_p, file_path)
s_interface_names = manager.list(interface.names)
s_interface_x = manager.list(interface.x)
s_interface_isactive = manager.list(interface.is_active)
for i, (perc_, chunkStart_, chunkSize_) in enumerate(chunkify(file_path, nlines=nlines)):
jobs.append(
jobs.append(
pool.apply_async(
storing_in_dataframe, args=(s_file_path, s_interface_names, s_interface_x,
s_interface_isactive, chunkStart_, chunkSize_, s_nlines)
)
) #file_path, interface_names, interface_x, interface_isactive, chunkStart, chunkSize, nlines
print(f'Warmup (jobs: {i+1}, cores used {workers})'.ljust(35) + progressbar(0.0, status='info', screen=65),
end='\r', flush=True)
storing_in_dataframe,
args=(
s_file_path,
s_interface_names,
s_interface_x,
s_interface_isactive,
chunkStart_,
chunkSize_,
s_nlines,
),
)
) # file_path, interface_names, interface_x, interface_isactive, chunkStart, chunkSize, nlines
print(
f"Warmup (jobs: {i+1}, cores used {workers})".ljust(35) + progressbar(0.0, status="info", screen=65),
end="\r",
flush=True,
)
N_jobs_rest = N_jobs = len(jobs)
results = []
n = 0
print(f'Reading (Jobs remaining: {N_jobs_rest})'.ljust(35) + progressbar(n / N_jobs * 100.0, status='info', screen=65),
end='\r', flush=True)
print(
f"Reading (Jobs remaining: {N_jobs_rest})".ljust(35)
+ progressbar(n / N_jobs * 100.0, status="info", screen=65),
end="\r",
flush=True,
)
while N_jobs_rest > 0:
job = jobs.pop(0)
results.append( job.get() )
results.append(job.get())
n += 1
N_jobs_rest = len(jobs)
print(f'Reading (Jobs remaining: {N_jobs_rest})'.ljust(35) + progressbar(n / N_jobs * 100.0, status='info', screen=65),
end='\r', flush=True)
print(f'DONE'.ljust(35) + progressbar(n / N_jobs * 100.0, status='info', screen=65),
end='\n', flush=True)
print(
f"Reading (Jobs remaining: {N_jobs_rest})".ljust(35)
+ progressbar(n / N_jobs * 100.0, status="info", screen=65),
end="\r",
flush=True,
)
print("DONE".ljust(35) + progressbar(n / N_jobs * 100.0, status="info", screen=65), end="\n", flush=True)
pool.close()
pool.join()
return results
return results
12 changes: 7 additions & 5 deletions tools/misc/templates.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import os

PARAM_SCRIP_TEMPLATE = f"""# PARAM Script
print(f" START ".center(80, '='))
import os, sys, pickle
Expand Down Expand Up @@ -82,9 +84,9 @@ def load_pickle(pickle_file):
PARAM_SCRIP_TEMPLATE += """
# Parallel Strategy
parallel_kwargs = {{
'parametervectors': {:d},
'jobs': {:d},
'processes': {:d},
'parametervectors': {:d},
'jobs': {:d},
'processes': {:d},
'threads': {:d}
}}
parallel = params.ParallelLevels(**parallel_kwargs)
Expand All @@ -100,7 +102,7 @@ def load_pickle(pickle_file):
# 'validation' : .15, # Percentage of the training set to be used as validation, or another DataSet() instance
'parallel' : parallel,
'callbacks' : callbacks,
'plams_workdir_path' : tmp_dir,
'plams_workdir_path' : tmp_dir,
# 'batch_size' : 32, # At every iteration, only compute a maximum of `batch_size` properties
# 'use_pipe' : True, # Use the AMSPipe interface where possible
# 'n_cores' : None, # Use N CPU cores for the execution of jobs during an optimization. Defaults to the number of physical cores
Expand Down Expand Up @@ -147,4 +149,4 @@ def load_pickle(pickle_file):
which python
python ./{4:s}
"""
"""

0 comments on commit a7568b1

Please sign in to comment.