⚒️ Enhanced modularity

paolodeangelis · Dec 6, 2023 · 88bd11c · 88bd11c
1 parent c9f3d22
commit 88bd11c
Show file tree

Hide file tree

Showing 7 changed files with 418 additions and 6 deletions.
diff --git a/tools/misc/__init__.py b/tools/misc/__init__.py
@@ -1,11 +1,21 @@
+from .analysis import (
+    get_all_angles,
+    get_all_bonds,
+    get_all_dihedrals,
+    get_distances_atom,
+    get_surf_atoms,
+)
 from .default_settings import (
     settings_0_GO,
     settings_0_SP,
     settings_1_GO,
     settings_1_SP,
     settings_2_SP,
 )
+from .read_history import chunkify, get_run_history, progressbar
 from .utils import make_dir, mtx2str
 
 MAIN_DIR_JNB1 = "step01-ic"
 MAIN_DIR_JNB2 = "step02-sim"
+MAIN_DIR_JNB3 = "step03-mkdb"
+MAIN_DIR_JNB3 = "step04-opt"
diff --git a/tools/misc/analysis.py b/tools/misc/analysis.py
@@ -0,0 +1,121 @@
+import numpy as np
+import pandas as pd
+from scm.plams.interfaces.molecule.ase import fromASE as ASEtoSCM
+from scm.plams.interfaces.molecule.ase import toASE as SCMtoASE
+from ase.geometry.analysis import Analysis
+from ase.geometry import get_distances
+from ase.neighborlist import neighbor_list
+
+
+def _elements_combinations(elemnts):
+    el_product = np.meshgrid(elemnts, elemnts)
+    el_i = el_product[0][np.where(np.triu(el_product[0]))]
+    el_j = el_product[1][np.where(np.triu(el_product[1]))]
+    return el_i, el_j
+
+
+def _get_bond_elements(ase_mol):
+    bonds_el = []
+    elemnts = np.unique( ase_mol.get_chemical_symbols() )
+    el_i, el_j = _elements_combinations(elemnts)
+    analysis = Analysis(ase_mol)
+    for ai, aj in zip(el_i, el_j):
+        bonds = analysis.get_bonds(ai, aj, unique=True)
+        if not len(bonds[0]) > 0:
+            continue
+        bonds_el.append( (ai, aj) )
+    return bonds_el
+
+
+def get_all_bonds(ase_mol, verbose = False):
+    results_df = pd.DataFrame(columns=['ai', 'aj', 'Nbonds', 'bonds'])
+    elemnts = np.unique( ase_mol.get_chemical_symbols() )
+    el_i, el_j = _elements_combinations(elemnts)
+    analysis = Analysis(ase_mol)
+    for ai, aj in zip(el_i, el_j):
+        bonds = analysis.get_bonds(ai, aj, unique=True)
+        if not len(bonds[0]) > 0:
+            continue
+        results_df.loc[f'{ai}-{aj}', 'ai'] = ai
+        results_df.loc[f'{ai}-{aj}', 'aj'] = aj
+        results_df.loc[f'{ai}-{aj}', 'bonds'] = bonds[0]
+        results_df.loc[f'{ai}-{aj}', 'Nbonds'] = len(bonds[0])
+        if verbose:
+            print(f"Found {len(bonds[0])} {ai}-{aj} bonds")
+    return results_df
+
+
+def get_all_angles(ase_mol, verbose = False, values=False):
+    results_df = pd.DataFrame(columns=['ai', 'aj', 'ak', 'Nangles', 'angles', 'angles_values'])
+    elemnts = np.unique( ase_mol.get_chemical_symbols() )
+    el_product = np.meshgrid(elemnts, elemnts)
+    el_j = elemnts
+    el_i, el_k = _elements_combinations(elemnts)
+    analysis = Analysis(ase_mol)
+    for aj in el_j:
+        for ai, ak in zip(el_i, el_k):
+            angles = analysis.get_angles(ai, aj, ak, unique=True)
+            if not len(angles[0]) > 0:
+                continue
+            results_df.loc[f'{ai}-{aj}-{ak}', 'ai'] = ai
+            results_df.loc[f'{ai}-{aj}-{ak}', 'aj'] = aj
+            results_df.loc[f'{ai}-{aj}-{ak}', 'ak'] = ak
+            results_df.loc[f'{ai}-{aj}-{ak}', 'angles'] = angles[0]
+            if values:
+                results_df.loc[f'{ai}-{aj}-{ak}', 'angles_values'] =  analysis.get_values(angles, mic=True) # [analysis.get_angle_value(0, ijk) for ijk in angles]
+            results_df.loc[f'{ai}-{aj}-{ak}', 'Nangles'] = len(angles[0])
+            if verbose:
+                print(f"Found {len(angles[0])} {ai}-{aj}-{ak} angles")
+    return results_df
+
+
+def get_all_dihedrals(ase_mol, verbose = False):
+    results_df = pd.DataFrame(columns=['ai', 'aj', 'ak', 'al', 'Ndihedrals', 'dihedrals'])
+    elemnts = np.unique( ase_mol.get_chemical_symbols() )
+    el_product = np.meshgrid(elemnts, elemnts)
+    bonds = _get_bond_elements(ase_mol)
+    el_i, el_l = _elements_combinations(elemnts)
+    analysis = Analysis(ase_mol)
+    for (aj, ak) in bonds:
+        for ai, al in zip(el_i, el_l):
+            dihedrals = analysis.get_dihedrals(ai, aj, ak, al, unique=True)
+            if not len(dihedrals[0]) > 0:
+                continue
+            results_df.loc[f'{ai}-{aj}-{ak}-{al}', 'ai'] = ai
+            results_df.loc[f'{ai}-{aj}-{ak}-{al}', 'aj'] = aj
+            results_df.loc[f'{ai}-{aj}-{ak}-{al}', 'ak'] = ak
+            results_df.loc[f'{ai}-{aj}-{ak}-{al}', 'al'] = al
+            results_df.loc[f'{ai}-{aj}-{ak}-{al}', 'dihedrals'] = dihedrals[0]
+            results_df.loc[f'{ai}-{aj}-{ak}-{al}', 'Ndihedrals'] = len(dihedrals[0])
+            if verbose:
+                print(f"Found {len(dihedrals[0])} {ai}-{aj}-{ak}-{al} dihedrals")
+    return results_df
+
+
+def get_distances_atom(ase_mol, r, mic=False, vector=False):
+        R = ase_mol.arrays['positions']
+        if isinstance(r, int):
+            p1 = [R[r]]
+        else:
+            p1 = [r]
+        p2 = R
+        cell = None
+        pbc = None
+        if mic:
+            cell = ase_mol.cell
+            pbc = ase_mol.pbc
+        D, D_len = get_distances(p1, p2, cell=cell, pbc=pbc)
+        if vector:
+            D.shape = (-1, 3)
+            return D
+        else:
+            D_len.shape = (-1,)
+            return D_len
+
+
+
+def get_surf_atoms(ase_mol, cutoff =3., coord_cutoff = 0.5): 
+    i = neighbor_list('i', ase_mol, cutoff)
+    coord = np.bincount(i)
+    coord = (coord - coord.min()) / (coord.max() - coord.min())
+    return np.where(coord <= coord_cutoff)[0]
diff --git a/tools/misc/default_settings.py b/tools/misc/default_settings.py
@@ -1,5 +1,3 @@
-import os
-
 import scm.plams as scm
 
 # Unit cells `Single Point`

diff --git a/tools/misc/read_history.py b/tools/misc/read_history.py
@@ -0,0 +1,135 @@
+import os
+import pandas as pd
+import numpy as np
+from multiprocessing import Pool, Value, Manager
+from multiprocessing.managers import ValueProxy, ListProxy
+from ctypes import c_char_p
+
+def progressbar(percentage: float, info: str = "", screen: int = 100, status: str = "info"):
+    if percentage is None:
+        percentage = 0.0
+    if info != "":
+        info = info.strip() + " "
+    bar_length = screen - len(info) - 2 - 6
+    status_chars = [
+        " ",
+        "\u258f",
+        "\u258e",
+        "\u258d",
+        "\u258c",
+        "\u258b",
+        "\u258a",
+        "\u2589",
+        "\u2588",
+    ]
+    # if percentage <= 1.:
+    #     percentage *= 100
+    length = percentage * bar_length / 100
+    units = int(length // 1)  # floor of the percentage
+    decimal = length % 1  # mantissa of the percentage
+    empty = bar_length - units
+    full = 1 - int(percentage / 100)
+    if status == "success":
+        color = "\x1b[32m"
+    elif status == "warning":
+        color = "\x1b[33m"
+    elif status == "danger":
+        color = "\x1b[31m"
+    else:
+        color = "\x1b[0m"
+    # Print bar
+    text = "{:s}{:s}{:s}\x1b[0m {:4.1f}%".format(
+        info,
+        color,
+        "\u2588" * units + status_chars[int(decimal * 8)] * full + " " * empty,
+        percentage,
+    )
+    return text
+
+
+def chunkify(file_path, nlines=1024):
+    fileEnd = os.path.getsize(file_path)
+    with open(file_path, 'r') as file_obj:
+        chunkEnd = file_obj.tell()
+        while True:
+            chunkStart = chunkEnd
+            n = 0
+            while True:
+                line = file_obj.readline()
+                chunkEnd = file_obj.tell()
+                n += 1
+                if n >= nlines:
+                    break
+            yield chunkEnd/fileEnd, chunkStart, chunkEnd - chunkStart
+            if chunkEnd >= fileEnd:
+                break
+
+
+def storing_in_dataframe(file_path, interface_names, interface_x, interface_isactive, chunkStart, chunkSize, nlines):
+    if isinstance(nlines, ValueProxy):
+        nlines = nlines.get()
+    if isinstance(file_path, ValueProxy):
+        file_path = file_path.get()
+    if isinstance(interface_names, ListProxy):
+        interface_names = list(interface_names)
+    if isinstance(interface_x, ListProxy):
+        interface_x = list(interface_x)
+    if isinstance(interface_isactive, ListProxy):
+        interface_isactive = list(interface_isactive)
+    dataset_df = pd.DataFrame(index=np.arange(nlines), columns=['index', 'fx', 'time'] + interface_names)
+    active_index = np.where(interface_isactive)[0]
+    n=0
+    with open(file_path, 'r') as file_obj: 
+        file_obj.seek(chunkStart)
+        lines = file_obj.read(chunkSize).splitlines()
+        for line in lines:
+            if line[0] == '#':
+                continue
+            data = line.split()
+            dataset_df.loc[n, 'index'] = int(data[0])
+            dataset_df.loc[n, 'fx'] = float(data[1])
+            dataset_df.loc[n, 'time'] = float(data[2])
+            dataset_df.iloc[n, 3:]  = interface_x
+            dataset_df.iloc[n, active_index + 3] = [float(d) for d in data[3:]]
+            n +=1
+    dataset_df = dataset_df.dropna(axis=0, how='all')
+    return dataset_df
+
+
+def get_run_history(file_path, interface, workers=4, nlines=1024):
+    dataframes = []
+    pool = Pool(processes=workers)
+    jobs = []
+    # shared memory ojects
+    manager = Manager()
+    s_nlines = manager.Value('i', nlines)
+    s_file_path = manager.Value(c_char_p, file_path)
+    s_interface_names = manager.list(interface.names)
+    s_interface_x = manager.list(interface.x)
+    s_interface_isactive = manager.list(interface.is_active)
+    for i, (perc_, chunkStart_, chunkSize_) in enumerate(chunkify(file_path, nlines=nlines)):
+        jobs.append( 
+            pool.apply_async(
+                storing_in_dataframe, args=(s_file_path, s_interface_names, s_interface_x, 
+                                            s_interface_isactive, chunkStart_, chunkSize_, s_nlines)
+                ) 
+            ) #file_path, interface_names, interface_x, interface_isactive, chunkStart, chunkSize, nlines
+        print(f'Warmup (jobs: {i+1}, cores used {workers})'.ljust(35) + progressbar(0.0, status='info', screen=65), 
+                end='\r', flush=True)
+    N_jobs_rest = N_jobs = len(jobs)
+    results = []
+    n = 0
+    print(f'Reading (Jobs remaining: {N_jobs_rest})'.ljust(35) + progressbar(n / N_jobs * 100.0, status='info', screen=65), 
+                end='\r', flush=True)
+    while N_jobs_rest > 0:
+        job = jobs.pop(0)
+        results.append( job.get() )
+        n += 1
+        N_jobs_rest = len(jobs)
+        print(f'Reading (Jobs remaining: {N_jobs_rest})'.ljust(35) + progressbar(n / N_jobs * 100.0, status='info', screen=65), 
+                end='\r', flush=True)
+    print(f'DONE'.ljust(35) + progressbar(n / N_jobs * 100.0, status='info', screen=65), 
+                end='\n', flush=True)
+    pool.close()
+    pool.join()
+    return results