-
Notifications
You must be signed in to change notification settings - Fork 7
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #10 from rinikerlab/SI_figures
Si figures
- Loading branch information
Showing
14 changed files
with
4,974 additions
and
0 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
1,824 changes: 1,824 additions & 0 deletions
1,824
dev/test_jnbs/props/007_aminoacid_benchmark.ipynb
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
36 changes: 36 additions & 0 deletions
36
dev/test_jnbs/props/euler_scripts/dual_submit_worker_batchedArray.sh
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
#!/bin/bash | ||
#SBATCH -n 1 | ||
#SBATCH --cpus-per-task=2 | ||
#SBATCH --time=24:00:00 | ||
#SBATCH --job-name='dual' | ||
#SBATCH --nodes=1 | ||
#SBATCH --mem-per-cpu=8192 | ||
#SBATCH --tmp=20000 | ||
#SBATCH --output="dual/dual.%A.%a.out" | ||
#SBATCH --error="dual/dual.%A.%a.err" | ||
#SBATCH --array=1-10000 | ||
|
||
max_molJobIndex=180000 | ||
min_molJobIndex=1 | ||
|
||
cd ${TMPDIR} | ||
mkdir scratch | ||
export PSI_SCRATCH=${TMPDIR}/scratch | ||
|
||
molJobIndicesPerArrayJob=18 | ||
start_molJobIndex=$(($min_molJobIndex + ($SLURM_ARRAY_TASK_ID-1)*$molJobIndicesPerArrayJob)) | ||
end_molJobIndex=$(($start_molJobIndex + $molJobIndicesPerArrayJob - 1)) | ||
|
||
for ((molJobIndex=$start_molJobIndex; molJobIndex<=$end_molJobIndex; molJobIndex++)) | ||
do | ||
echo "molJobIndex: $molJobIndex" | ||
if [ ! -f "${SLURM_SUBMIT_DIR}/dual/dual_${molJobIndex}.pkl" ]; then | ||
python ${SLURM_SUBMIT_DIR}/run_psi4_dual.py $molJobIndex | ||
# clear scratch | ||
rm -rf ${TMPDIR}/scratch/* | ||
else | ||
echo "dual_${molJobIndex}.pkl already exists" | ||
fi | ||
done | ||
|
||
echo "Finished with array job: $SLURM_ARRAY_JOB_ID, task: $SLURM_ARRAY_TASK_ID" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
import pickle | ||
from rdkit import Chem | ||
from serenityff.charge.tree.dash_tree import DASHTree | ||
|
||
sdf_file_path = "sdf_qmugs500_mbis_collect.sdf" | ||
mol_sup = Chem.SDMolSupplier(sdf_file_path, removeHs=False) | ||
tree = DASHTree() | ||
# tree = DASHTree(tree_folder_path="/cluster/work/igc/mlehner/test192_DASHtree/") | ||
|
||
node_path_storage = {} | ||
|
||
last_dash_idx = "" | ||
for mol in mol_sup: | ||
dash_idx = mol.GetProp("DASH_IDX") | ||
if dash_idx != last_dash_idx: | ||
this_mols_node_path_storage = {} | ||
for atom_idx in range(mol.GetNumAtoms()): | ||
node_path = tree.match_new_atom(atom=atom_idx, mol=mol) | ||
this_mols_node_path_storage[atom_idx] = node_path | ||
node_path_storage[dash_idx] = this_mols_node_path_storage | ||
last_dash_idx = dash_idx | ||
else: | ||
continue | ||
|
||
with open("node_path_storage_default.pkl", "wb") as f: | ||
pickle.dump(node_path_storage, f) | ||
|
||
print(f"node_paths saved with {len(node_path_storage)} entries \nfile used {sdf_file_path}, \n{len(mol_sup)} mols") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,95 @@ | ||
from rdkit import Chem | ||
from tqdm import tqdm | ||
import sys | ||
import pandas as pd | ||
from openff.toolkit.topology import Molecule | ||
from openff.toolkit.typing.engines.smirnoff import ForceField | ||
|
||
sdf_test_path = "/cluster/work/igc/mlehner/test142_psi4_rest/sdf_qmugs500_mbis_collect.sdf" | ||
sdf_test = Chem.SDMolSupplier(sdf_test_path, removeHs=False) | ||
ff = ForceField("openff_unconstrained-2.0.0.offxml") | ||
|
||
|
||
def get_am1Bcc_charges_averaged(mol): | ||
try: | ||
molecule = Molecule.from_rdkit(mol, allow_undefined_stereo=True) | ||
charges_tmp = ff.get_partial_charges(molecule) | ||
charges = charges_tmp.value_in_unit(charges_tmp.unit).tolist() | ||
return [round(float(item), 5) for item in charges] | ||
except: | ||
return [0 for x in range(mol.GetNumAtoms())] | ||
|
||
|
||
def get_am1Bcc_charges_conf(mol): | ||
try: | ||
molecule = Molecule.from_rdkit(mol, allow_undefined_stereo=True) | ||
charges_tmp = molecule.assign_partial_charges("am1bcc", use_conformers=molecule.conformers) | ||
charges = charges_tmp.value_in_unit(charges_tmp.unit).tolist() | ||
return [round(float(item), 5) for item in charges] | ||
except: | ||
return [0 for x in range(mol.GetNumAtoms())] | ||
|
||
|
||
def get_am1mulliken_charge(mol): | ||
try: | ||
molecule = Molecule.from_rdkit(mol, allow_undefined_stereo=True) | ||
charges_tmp = molecule.assign_partial_charges('"am1-mulliken"', use_conformers=molecule.conformers) | ||
charges = charges_tmp.value_in_unit(charges_tmp.unit).tolist() | ||
return [round(float(item), 5) for item in charges] | ||
except: | ||
return [0 for x in range(mol.GetNumAtoms())] | ||
|
||
|
||
if __name__ == "__main__": | ||
# get mol index from argument | ||
array_idx = int(sys.argv[1]) - 1 | ||
print("array_idx: ", array_idx) | ||
array_width = 1000 | ||
start_idx = array_idx * array_width | ||
end_idx = start_idx + array_width | ||
print("start_idx: ", start_idx) | ||
print("end_idx: ", end_idx) | ||
# calculate charges | ||
am1Bcc_charges_averaged = [] | ||
am1Bcc_charges_conf = [] | ||
am1Mulliken_charges = [] | ||
elements = [] | ||
atom_indices = [] | ||
mol_indices = [] | ||
dash_indices = [] | ||
for mol_index in tqdm(range(start_idx, end_idx)): | ||
try: | ||
mol = sdf_test[mol_index] | ||
am1Bcc_charges_averaged.append(get_am1Bcc_charges_averaged(mol)) | ||
am1Bcc_charges_conf.append(get_am1Bcc_charges_conf(mol)) | ||
am1Mulliken_charges.append(get_am1mulliken_charge(mol)) | ||
elements.append([x.GetSymbol() for x in mol.GetAtoms()]) | ||
atom_indices.append([x.GetIdx() for x in mol.GetAtoms()]) | ||
mol_indices.append([mol_index for x in mol.GetAtoms()]) | ||
dash_indices.append([mol.GetProp("DASH_IDX") for x in mol.GetAtoms()]) | ||
except Exception as e: | ||
print(e) | ||
pass | ||
# flatten lists | ||
am1Bcc_charges_averaged_flat = [item for sublist in am1Bcc_charges_averaged for item in sublist] | ||
am1Bcc_charges_conf_flat = [item for sublist in am1Bcc_charges_conf for item in sublist] | ||
am1Mulliken_charges_flat = [item for sublist in am1Mulliken_charges for item in sublist] | ||
elements_flat = [item for sublist in elements for item in sublist] | ||
atom_indices_flat = [item for sublist in atom_indices for item in sublist] | ||
mol_indices_flat = [item for sublist in mol_indices for item in sublist] | ||
dash_indices_flat = [item for sublist in dash_indices for item in sublist] | ||
# create dataframe | ||
df = pd.DataFrame( | ||
{ | ||
"mol_index": mol_indices_flat, | ||
"dash_index": dash_indices_flat, | ||
"atom_index": atom_indices_flat, | ||
"element": elements_flat, | ||
"am1Bcc_averaged": am1Bcc_charges_averaged_flat, | ||
"am1Bcc_conf": am1Bcc_charges_conf_flat, | ||
"am1Mulliken": am1Mulliken_charges_flat, | ||
} | ||
) | ||
# save dataframe | ||
df.to_csv(f"./charges/test_{array_idx}.csv", index=False) | ||
print("Done with array", array_idx) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
import dftd4 | ||
from dftd4 import interface | ||
import numpy as np | ||
import pandas as pd | ||
from rdkit import Chem | ||
import matplotlib.pyplot as plt | ||
from tqdm import tqdm | ||
|
||
print("start dftd4 calculation", flush=True) | ||
|
||
mol_sup = Chem.SDMolSupplier("combined_multi.sdf", removeHs=False) | ||
sd_writer = Chem.SDWriter("mols_comb_dftd4.sdf") | ||
|
||
damp_params = interface.DampingParam(method="tpssh") | ||
|
||
for mol in tqdm(mol_sup, total=len(mol_sup)): | ||
atom_numbers = np.array([atom.GetAtomicNum() for atom in mol.GetAtoms()]) | ||
atom_positions = mol.GetConformer().GetPositions() | ||
atom_positions_bohr = atom_positions * 1.88973 | ||
model = interface.DispersionModel(atom_numbers, atom_positions_bohr) | ||
res = model.get_dispersion(damp_params, grad=False) | ||
res.update(**model.get_properties()) | ||
c6_coef = res.get("c6 coefficients") | ||
c6_coef_diag = np.diag(c6_coef) | ||
polarizability = res.get("polarizibilities") | ||
mol.SetProp("DFTD4:C6", "|".join([f"{x:.4f}" for x in c6_coef_diag])) | ||
mol.SetProp("DFTD4:polarizability", "|".join([f"{x:.4f}" for x in polarizability])) | ||
sd_writer.write(mol) | ||
|
||
print("all dftd4 done", flush=True) |
Oops, something went wrong.