Skip to content

Commit

Permalink
Merge pull request #10 from rinikerlab/SI_figures
Browse files Browse the repository at this point in the history
Si figures
  • Loading branch information
MTLehner authored Jul 8, 2024
2 parents 80e651d + 4e3f885 commit 8404fc8
Show file tree
Hide file tree
Showing 14 changed files with 4,974 additions and 0 deletions.
2,103 changes: 2,103 additions & 0 deletions dev/test_jnbs/props/006_vehicle.ipynb

Large diffs are not rendered by default.

1,824 changes: 1,824 additions & 0 deletions dev/test_jnbs/props/007_aminoacid_benchmark.ipynb

Large diffs are not rendered by default.

351 changes: 351 additions & 0 deletions dev/test_jnbs/props/euler_scripts/combine_props.py

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
#!/bin/bash
#SBATCH -n 1
#SBATCH --cpus-per-task=2
#SBATCH --time=24:00:00
#SBATCH --job-name='dual'
#SBATCH --nodes=1
#SBATCH --mem-per-cpu=8192
#SBATCH --tmp=20000
#SBATCH --output="dual/dual.%A.%a.out"
#SBATCH --error="dual/dual.%A.%a.err"
#SBATCH --array=1-10000

max_molJobIndex=180000
min_molJobIndex=1

cd ${TMPDIR}
mkdir scratch
export PSI_SCRATCH=${TMPDIR}/scratch

molJobIndicesPerArrayJob=18
start_molJobIndex=$(($min_molJobIndex + ($SLURM_ARRAY_TASK_ID-1)*$molJobIndicesPerArrayJob))
end_molJobIndex=$(($start_molJobIndex + $molJobIndicesPerArrayJob - 1))

for ((molJobIndex=$start_molJobIndex; molJobIndex<=$end_molJobIndex; molJobIndex++))
do
echo "molJobIndex: $molJobIndex"
if [ ! -f "${SLURM_SUBMIT_DIR}/dual/dual_${molJobIndex}.pkl" ]; then
python ${SLURM_SUBMIT_DIR}/run_psi4_dual.py $molJobIndex
# clear scratch
rm -rf ${TMPDIR}/scratch/*
else
echo "dual_${molJobIndex}.pkl already exists"
fi
done

echo "Finished with array job: $SLURM_ARRAY_JOB_ID, task: $SLURM_ARRAY_TASK_ID"
28 changes: 28 additions & 0 deletions dev/test_jnbs/props/euler_scripts/node_path_for_atoms.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import pickle
from rdkit import Chem
from serenityff.charge.tree.dash_tree import DASHTree

sdf_file_path = "sdf_qmugs500_mbis_collect.sdf"
mol_sup = Chem.SDMolSupplier(sdf_file_path, removeHs=False)
tree = DASHTree()
# tree = DASHTree(tree_folder_path="/cluster/work/igc/mlehner/test192_DASHtree/")

node_path_storage = {}

last_dash_idx = ""
for mol in mol_sup:
dash_idx = mol.GetProp("DASH_IDX")
if dash_idx != last_dash_idx:
this_mols_node_path_storage = {}
for atom_idx in range(mol.GetNumAtoms()):
node_path = tree.match_new_atom(atom=atom_idx, mol=mol)
this_mols_node_path_storage[atom_idx] = node_path
node_path_storage[dash_idx] = this_mols_node_path_storage
last_dash_idx = dash_idx
else:
continue

with open("node_path_storage_default.pkl", "wb") as f:
pickle.dump(node_path_storage, f)

print(f"node_paths saved with {len(node_path_storage)} entries \nfile used {sdf_file_path}, \n{len(mol_sup)} mols")
95 changes: 95 additions & 0 deletions dev/test_jnbs/props/euler_scripts/run_am1bcc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
from rdkit import Chem
from tqdm import tqdm
import sys
import pandas as pd
from openff.toolkit.topology import Molecule
from openff.toolkit.typing.engines.smirnoff import ForceField

sdf_test_path = "/cluster/work/igc/mlehner/test142_psi4_rest/sdf_qmugs500_mbis_collect.sdf"
sdf_test = Chem.SDMolSupplier(sdf_test_path, removeHs=False)
ff = ForceField("openff_unconstrained-2.0.0.offxml")


def get_am1Bcc_charges_averaged(mol):
try:
molecule = Molecule.from_rdkit(mol, allow_undefined_stereo=True)
charges_tmp = ff.get_partial_charges(molecule)
charges = charges_tmp.value_in_unit(charges_tmp.unit).tolist()
return [round(float(item), 5) for item in charges]
except:
return [0 for x in range(mol.GetNumAtoms())]


def get_am1Bcc_charges_conf(mol):
try:
molecule = Molecule.from_rdkit(mol, allow_undefined_stereo=True)
charges_tmp = molecule.assign_partial_charges("am1bcc", use_conformers=molecule.conformers)
charges = charges_tmp.value_in_unit(charges_tmp.unit).tolist()
return [round(float(item), 5) for item in charges]
except:
return [0 for x in range(mol.GetNumAtoms())]


def get_am1mulliken_charge(mol):
try:
molecule = Molecule.from_rdkit(mol, allow_undefined_stereo=True)
charges_tmp = molecule.assign_partial_charges('"am1-mulliken"', use_conformers=molecule.conformers)
charges = charges_tmp.value_in_unit(charges_tmp.unit).tolist()
return [round(float(item), 5) for item in charges]
except:
return [0 for x in range(mol.GetNumAtoms())]


if __name__ == "__main__":
# get mol index from argument
array_idx = int(sys.argv[1]) - 1
print("array_idx: ", array_idx)
array_width = 1000
start_idx = array_idx * array_width
end_idx = start_idx + array_width
print("start_idx: ", start_idx)
print("end_idx: ", end_idx)
# calculate charges
am1Bcc_charges_averaged = []
am1Bcc_charges_conf = []
am1Mulliken_charges = []
elements = []
atom_indices = []
mol_indices = []
dash_indices = []
for mol_index in tqdm(range(start_idx, end_idx)):
try:
mol = sdf_test[mol_index]
am1Bcc_charges_averaged.append(get_am1Bcc_charges_averaged(mol))
am1Bcc_charges_conf.append(get_am1Bcc_charges_conf(mol))
am1Mulliken_charges.append(get_am1mulliken_charge(mol))
elements.append([x.GetSymbol() for x in mol.GetAtoms()])
atom_indices.append([x.GetIdx() for x in mol.GetAtoms()])
mol_indices.append([mol_index for x in mol.GetAtoms()])
dash_indices.append([mol.GetProp("DASH_IDX") for x in mol.GetAtoms()])
except Exception as e:
print(e)
pass
# flatten lists
am1Bcc_charges_averaged_flat = [item for sublist in am1Bcc_charges_averaged for item in sublist]
am1Bcc_charges_conf_flat = [item for sublist in am1Bcc_charges_conf for item in sublist]
am1Mulliken_charges_flat = [item for sublist in am1Mulliken_charges for item in sublist]
elements_flat = [item for sublist in elements for item in sublist]
atom_indices_flat = [item for sublist in atom_indices for item in sublist]
mol_indices_flat = [item for sublist in mol_indices for item in sublist]
dash_indices_flat = [item for sublist in dash_indices for item in sublist]
# create dataframe
df = pd.DataFrame(
{
"mol_index": mol_indices_flat,
"dash_index": dash_indices_flat,
"atom_index": atom_indices_flat,
"element": elements_flat,
"am1Bcc_averaged": am1Bcc_charges_averaged_flat,
"am1Bcc_conf": am1Bcc_charges_conf_flat,
"am1Mulliken": am1Mulliken_charges_flat,
}
)
# save dataframe
df.to_csv(f"./charges/test_{array_idx}.csv", index=False)
print("Done with array", array_idx)
30 changes: 30 additions & 0 deletions dev/test_jnbs/props/euler_scripts/run_dft_d4.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import dftd4
from dftd4 import interface
import numpy as np
import pandas as pd
from rdkit import Chem
import matplotlib.pyplot as plt
from tqdm import tqdm

print("start dftd4 calculation", flush=True)

mol_sup = Chem.SDMolSupplier("combined_multi.sdf", removeHs=False)
sd_writer = Chem.SDWriter("mols_comb_dftd4.sdf")

damp_params = interface.DampingParam(method="tpssh")

for mol in tqdm(mol_sup, total=len(mol_sup)):
atom_numbers = np.array([atom.GetAtomicNum() for atom in mol.GetAtoms()])
atom_positions = mol.GetConformer().GetPositions()
atom_positions_bohr = atom_positions * 1.88973
model = interface.DispersionModel(atom_numbers, atom_positions_bohr)
res = model.get_dispersion(damp_params, grad=False)
res.update(**model.get_properties())
c6_coef = res.get("c6 coefficients")
c6_coef_diag = np.diag(c6_coef)
polarizability = res.get("polarizibilities")
mol.SetProp("DFTD4:C6", "|".join([f"{x:.4f}" for x in c6_coef_diag]))
mol.SetProp("DFTD4:polarizability", "|".join([f"{x:.4f}" for x in polarizability]))
sd_writer.write(mol)

print("all dftd4 done", flush=True)
Loading

0 comments on commit 8404fc8

Please sign in to comment.