Skip to content

Commit

Permalink
⚒️ Enhanced modularity
Browse files Browse the repository at this point in the history
  • Loading branch information
paolodeangelis committed Dec 6, 2023
1 parent c9f3d22 commit 88bd11c
Show file tree
Hide file tree
Showing 7 changed files with 418 additions and 6 deletions.
10 changes: 10 additions & 0 deletions tools/misc/__init__.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,21 @@
from .analysis import (
get_all_angles,
get_all_bonds,
get_all_dihedrals,
get_distances_atom,
get_surf_atoms,
)
from .default_settings import (
settings_0_GO,
settings_0_SP,
settings_1_GO,
settings_1_SP,
settings_2_SP,
)
from .read_history import chunkify, get_run_history, progressbar
from .utils import make_dir, mtx2str

MAIN_DIR_JNB1 = "step01-ic"
MAIN_DIR_JNB2 = "step02-sim"
MAIN_DIR_JNB3 = "step03-mkdb"
MAIN_DIR_JNB3 = "step04-opt"
121 changes: 121 additions & 0 deletions tools/misc/analysis.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
import numpy as np
import pandas as pd
from scm.plams.interfaces.molecule.ase import fromASE as ASEtoSCM
from scm.plams.interfaces.molecule.ase import toASE as SCMtoASE
from ase.geometry.analysis import Analysis
from ase.geometry import get_distances
from ase.neighborlist import neighbor_list


def _elements_combinations(elemnts):
el_product = np.meshgrid(elemnts, elemnts)
el_i = el_product[0][np.where(np.triu(el_product[0]))]
el_j = el_product[1][np.where(np.triu(el_product[1]))]
return el_i, el_j


def _get_bond_elements(ase_mol):
bonds_el = []
elemnts = np.unique( ase_mol.get_chemical_symbols() )
el_i, el_j = _elements_combinations(elemnts)
analysis = Analysis(ase_mol)
for ai, aj in zip(el_i, el_j):
bonds = analysis.get_bonds(ai, aj, unique=True)
if not len(bonds[0]) > 0:
continue
bonds_el.append( (ai, aj) )
return bonds_el


def get_all_bonds(ase_mol, verbose = False):
results_df = pd.DataFrame(columns=['ai', 'aj', 'Nbonds', 'bonds'])
elemnts = np.unique( ase_mol.get_chemical_symbols() )
el_i, el_j = _elements_combinations(elemnts)
analysis = Analysis(ase_mol)
for ai, aj in zip(el_i, el_j):
bonds = analysis.get_bonds(ai, aj, unique=True)
if not len(bonds[0]) > 0:
continue
results_df.loc[f'{ai}-{aj}', 'ai'] = ai
results_df.loc[f'{ai}-{aj}', 'aj'] = aj
results_df.loc[f'{ai}-{aj}', 'bonds'] = bonds[0]
results_df.loc[f'{ai}-{aj}', 'Nbonds'] = len(bonds[0])
if verbose:
print(f"Found {len(bonds[0])} {ai}-{aj} bonds")
return results_df


def get_all_angles(ase_mol, verbose = False, values=False):
results_df = pd.DataFrame(columns=['ai', 'aj', 'ak', 'Nangles', 'angles', 'angles_values'])
elemnts = np.unique( ase_mol.get_chemical_symbols() )
el_product = np.meshgrid(elemnts, elemnts)
el_j = elemnts
el_i, el_k = _elements_combinations(elemnts)
analysis = Analysis(ase_mol)
for aj in el_j:
for ai, ak in zip(el_i, el_k):
angles = analysis.get_angles(ai, aj, ak, unique=True)
if not len(angles[0]) > 0:
continue
results_df.loc[f'{ai}-{aj}-{ak}', 'ai'] = ai
results_df.loc[f'{ai}-{aj}-{ak}', 'aj'] = aj
results_df.loc[f'{ai}-{aj}-{ak}', 'ak'] = ak
results_df.loc[f'{ai}-{aj}-{ak}', 'angles'] = angles[0]
if values:
results_df.loc[f'{ai}-{aj}-{ak}', 'angles_values'] = analysis.get_values(angles, mic=True) # [analysis.get_angle_value(0, ijk) for ijk in angles]
results_df.loc[f'{ai}-{aj}-{ak}', 'Nangles'] = len(angles[0])
if verbose:
print(f"Found {len(angles[0])} {ai}-{aj}-{ak} angles")
return results_df


def get_all_dihedrals(ase_mol, verbose = False):
results_df = pd.DataFrame(columns=['ai', 'aj', 'ak', 'al', 'Ndihedrals', 'dihedrals'])
elemnts = np.unique( ase_mol.get_chemical_symbols() )
el_product = np.meshgrid(elemnts, elemnts)
bonds = _get_bond_elements(ase_mol)
el_i, el_l = _elements_combinations(elemnts)
analysis = Analysis(ase_mol)
for (aj, ak) in bonds:
for ai, al in zip(el_i, el_l):
dihedrals = analysis.get_dihedrals(ai, aj, ak, al, unique=True)
if not len(dihedrals[0]) > 0:
continue
results_df.loc[f'{ai}-{aj}-{ak}-{al}', 'ai'] = ai
results_df.loc[f'{ai}-{aj}-{ak}-{al}', 'aj'] = aj
results_df.loc[f'{ai}-{aj}-{ak}-{al}', 'ak'] = ak
results_df.loc[f'{ai}-{aj}-{ak}-{al}', 'al'] = al
results_df.loc[f'{ai}-{aj}-{ak}-{al}', 'dihedrals'] = dihedrals[0]
results_df.loc[f'{ai}-{aj}-{ak}-{al}', 'Ndihedrals'] = len(dihedrals[0])
if verbose:
print(f"Found {len(dihedrals[0])} {ai}-{aj}-{ak}-{al} dihedrals")
return results_df


def get_distances_atom(ase_mol, r, mic=False, vector=False):
R = ase_mol.arrays['positions']
if isinstance(r, int):
p1 = [R[r]]
else:
p1 = [r]
p2 = R
cell = None
pbc = None
if mic:
cell = ase_mol.cell
pbc = ase_mol.pbc
D, D_len = get_distances(p1, p2, cell=cell, pbc=pbc)
if vector:
D.shape = (-1, 3)
return D
else:
D_len.shape = (-1,)
return D_len



def get_surf_atoms(ase_mol, cutoff =3., coord_cutoff = 0.5):
i = neighbor_list('i', ase_mol, cutoff)
coord = np.bincount(i)
coord = (coord - coord.min()) / (coord.max() - coord.min())
return np.where(coord <= coord_cutoff)[0]
2 changes: 0 additions & 2 deletions tools/misc/default_settings.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
import os

import scm.plams as scm

# Unit cells `Single Point`
Expand Down
135 changes: 135 additions & 0 deletions tools/misc/read_history.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
import os
import pandas as pd
import numpy as np
from multiprocessing import Pool, Value, Manager
from multiprocessing.managers import ValueProxy, ListProxy
from ctypes import c_char_p

def progressbar(percentage: float, info: str = "", screen: int = 100, status: str = "info"):
if percentage is None:
percentage = 0.0
if info != "":
info = info.strip() + " "
bar_length = screen - len(info) - 2 - 6
status_chars = [
" ",
"\u258f",
"\u258e",
"\u258d",
"\u258c",
"\u258b",
"\u258a",
"\u2589",
"\u2588",
]
# if percentage <= 1.:
# percentage *= 100
length = percentage * bar_length / 100
units = int(length // 1) # floor of the percentage
decimal = length % 1 # mantissa of the percentage
empty = bar_length - units
full = 1 - int(percentage / 100)
if status == "success":
color = "\x1b[32m"
elif status == "warning":
color = "\x1b[33m"
elif status == "danger":
color = "\x1b[31m"
else:
color = "\x1b[0m"
# Print bar
text = "{:s}{:s}{:s}\x1b[0m {:4.1f}%".format(
info,
color,
"\u2588" * units + status_chars[int(decimal * 8)] * full + " " * empty,
percentage,
)
return text


def chunkify(file_path, nlines=1024):
fileEnd = os.path.getsize(file_path)
with open(file_path, 'r') as file_obj:
chunkEnd = file_obj.tell()
while True:
chunkStart = chunkEnd
n = 0
while True:
line = file_obj.readline()
chunkEnd = file_obj.tell()
n += 1
if n >= nlines:
break
yield chunkEnd/fileEnd, chunkStart, chunkEnd - chunkStart
if chunkEnd >= fileEnd:
break


def storing_in_dataframe(file_path, interface_names, interface_x, interface_isactive, chunkStart, chunkSize, nlines):
if isinstance(nlines, ValueProxy):
nlines = nlines.get()
if isinstance(file_path, ValueProxy):
file_path = file_path.get()
if isinstance(interface_names, ListProxy):
interface_names = list(interface_names)
if isinstance(interface_x, ListProxy):
interface_x = list(interface_x)
if isinstance(interface_isactive, ListProxy):
interface_isactive = list(interface_isactive)
dataset_df = pd.DataFrame(index=np.arange(nlines), columns=['index', 'fx', 'time'] + interface_names)
active_index = np.where(interface_isactive)[0]
n=0
with open(file_path, 'r') as file_obj:
file_obj.seek(chunkStart)
lines = file_obj.read(chunkSize).splitlines()
for line in lines:
if line[0] == '#':
continue
data = line.split()
dataset_df.loc[n, 'index'] = int(data[0])
dataset_df.loc[n, 'fx'] = float(data[1])
dataset_df.loc[n, 'time'] = float(data[2])
dataset_df.iloc[n, 3:] = interface_x
dataset_df.iloc[n, active_index + 3] = [float(d) for d in data[3:]]
n +=1
dataset_df = dataset_df.dropna(axis=0, how='all')
return dataset_df


def get_run_history(file_path, interface, workers=4, nlines=1024):
dataframes = []
pool = Pool(processes=workers)
jobs = []
# shared memory ojects
manager = Manager()
s_nlines = manager.Value('i', nlines)
s_file_path = manager.Value(c_char_p, file_path)
s_interface_names = manager.list(interface.names)
s_interface_x = manager.list(interface.x)
s_interface_isactive = manager.list(interface.is_active)
for i, (perc_, chunkStart_, chunkSize_) in enumerate(chunkify(file_path, nlines=nlines)):
jobs.append(
pool.apply_async(
storing_in_dataframe, args=(s_file_path, s_interface_names, s_interface_x,
s_interface_isactive, chunkStart_, chunkSize_, s_nlines)
)
) #file_path, interface_names, interface_x, interface_isactive, chunkStart, chunkSize, nlines
print(f'Warmup (jobs: {i+1}, cores used {workers})'.ljust(35) + progressbar(0.0, status='info', screen=65),
end='\r', flush=True)
N_jobs_rest = N_jobs = len(jobs)
results = []
n = 0
print(f'Reading (Jobs remaining: {N_jobs_rest})'.ljust(35) + progressbar(n / N_jobs * 100.0, status='info', screen=65),
end='\r', flush=True)
while N_jobs_rest > 0:
job = jobs.pop(0)
results.append( job.get() )
n += 1
N_jobs_rest = len(jobs)
print(f'Reading (Jobs remaining: {N_jobs_rest})'.ljust(35) + progressbar(n / N_jobs * 100.0, status='info', screen=65),
end='\r', flush=True)
print(f'DONE'.ljust(35) + progressbar(n / N_jobs * 100.0, status='info', screen=65),
end='\n', flush=True)
pool.close()
pool.join()
return results
Loading

0 comments on commit 88bd11c

Please sign in to comment.