Skip to content

Commit

Permalink
updating changes from gitlab
Browse files Browse the repository at this point in the history
  • Loading branch information
Juanma committed Aug 30, 2023
1 parent cf001a7 commit 7cc62d5
Show file tree
Hide file tree
Showing 7 changed files with 274 additions and 94 deletions.
54 changes: 37 additions & 17 deletions bin/optimisers/host_guest_overlapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,39 +15,59 @@
import tqdm
import pickle
import numpy as np
from datetime import datetime
import tensorflow as tf
from tensorflow.keras import backend as K

from src.utils.optimiser_utils import load_host, load_vae_model, initial_population
from src.utils.optimiser_utils import grad_ed_overlapping

os.environ["CUDA_VISIBLE_DEVICES"] = '0'

if __name__ == "__main__":

BATCH_SIZE = 32
DATA_FOLDER = '/home/nvme/juanma/Data/ED/' # in Auchentoshan
host = load_host(DATA_FOLDER+'cc6.pkl', BATCH_SIZE)
# folder where to save the logs of this run
startdate = datetime.now().strftime('%Y-%m-%d')
RUN_FOLDER = startdate

# just do a while loop to make sure the folder doesnt exist
n = 0
while os.path.exists(RUN_FOLDER+'_'+str(n)+'/'):
n += 1
RUN_FOLDER += '_'+str(n)+'/'
os.mkdir(RUN_FOLDER)

BATCH_SIZE = 48
# DATA_FOLDER = '/home/nvme/juanma/Data/ED/' # in Auchentoshan
DATA_FOLDER = '/home/juanma/Data/' # in maddog2020
host = load_host(DATA_FOLDER+'cb6ED.pkl', BATCH_SIZE, expand_dims=True)
vae, z_dim = load_vae_model('logs/vae/2021-05-25/')

# noise_t = initial_population(BATCH_SIZE, random=True)
noise_t = initial_population(BATCH_SIZE, False, datapath=DATA_FOLDER, vae=vae)
# noise_t = initial_population(BATCH_SIZE, False, datapath=DATA_FOLDER, vae=vae)

# initial population
noise_t = K.random_uniform(shape = (BATCH_SIZE, z_dim),
minval = -2.0, maxval = 2.0)
_, _, initial_output = grad_ed_overlapping(noise_t, vae, host)

with open('initial_g.p', 'wb') as file:
with open(RUN_FOLDER+'initial_cb6ONLYED.p', 'wb') as file:
pickle.dump(initial_output, file)

with open('initial_hg.p', 'wb') as file:
pickle.dump(initial_output+host, file)
# we will do five cycles of optimising
for factor in [1, 5, 10, 20, 50]:
lr = 0.05 / factor
slr = str(factor)

for i in tqdm.tqdm(range(10000)):
f, grads, output = grad_ed_overlapping(noise_t, vae, host)
print(np.mean(f.numpy()))
noise_t -= 0.05 * grads[0].numpy()
noise_t = np.clip(noise_t, a_min=-4.0, a_max=4.0)
for j in tqdm.tqdm(range(int(10000/factor))):
f, grads, output = grad_ed_overlapping(noise_t, vae, host)
print(np.mean(f.numpy()))
noise_t -= lr * grads[0].numpy()
noise_t = np.clip(noise_t, a_min=-5.0, a_max=5.0)

if i % 1000 == 0:
with open('optimized_g.p', 'wb') as file:
pickle.dump(output, file)
if j % 1000 == 0:
with open(RUN_FOLDER+'optimized_cb6ONLYED'+slr+'.p', 'wb') as file:
pickle.dump(output, file)

with open('optimized_hg.p', 'wb') as file:
pickle.dump(output+host, file)
with open(RUN_FOLDER+'cb6_optimised_final.p', 'wb') as file:
pickle.dump(output, file)
140 changes: 74 additions & 66 deletions bin/optimisers/maximise_hg_esp.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,70 +26,78 @@
if __name__ == "__main__":

# factor that we will use to multiply the ED part of gradient descent.
# The ESP part will by multiplied by 1-ed_factor
ed_factor = 0.7

# folder where to save the logs of this run
startdate = datetime.now().strftime('%Y-%m-%d')
RUN_FOLDER = startdate +'_'+ str(ed_factor)

# just do a while loop to make sure the folder doesnt exist
n = 0
while os.path.exists(RUN_FOLDER+'_'+str(n)+'/'):
n += 1
RUN_FOLDER += '_'+str(n)+'/'
os.mkdir(RUN_FOLDER)

BATCH_SIZE = 32
DATA_FOLDER = '/home/nvme/juanma/Data/ED/'
# DATA_FOLDER = '/media/extssd/juanma/'

host_ed, host_esp = load_host_ed_esp(
DATA_FOLDER+'cc6.pkl', DATA_FOLDER+'cc6_esp.pkl', BATCH_SIZE)
vae, z_dim = load_vae_model('logs/vae/2021-05-25/')
ed_to_esp = load_ED_to_ESP('logs/vae_ed_esp/2021-07-18')

noise_t = K.random_uniform(shape = (BATCH_SIZE, z_dim),
minval = -2.0, maxval = 2.0)
_, _, init_eds, init_esps = grad_esp_overlapping(noise_t, vae, ed_to_esp, host_esp)

with open(RUN_FOLDER+'cb6_esp_opt_initial_g.p', 'wb') as file:
pickle.dump(init_eds, file)

with open(RUN_FOLDER+'cb6_esp_opt_initial_g_esp.p', 'wb') as file:
pickle.dump(init_esps, file)

with open(RUN_FOLDER+'cb6_esp_opt_initial_hg.p', 'wb') as file:
pickle.dump(init_eds+host_ed, file)

# we will do five cycles of optimising
for factor in [1, 5, 10, 20, 50]:
lr = 0.05 / factor
slr = str(factor)

for j in tqdm.tqdm(range(int(10000/factor))):
# try to minimise overlapping ESP
f, grads, output, esps = grad_esp_overlapping(noise_t, vae, ed_to_esp, host_esp)
print(np.mean(f.numpy()))
noise_t -= lr * grads[0].numpy() * (1-ed_factor)
noise_t = np.clip(noise_t, a_min=-5.0, a_max=5.0)

if j % 1000 == 0:
with open(RUN_FOLDER+'cb6_esp_optimizedESPED'+slr+'.p', 'wb') as file:
pickle.dump(output, file)
with open(RUN_FOLDER+'cb6_esp_optimizedESP'+slr+'.p', 'wb') as file:
pickle.dump(esps, file)

# try to minimise overlapping ED
f, grads, output = grad_ed_overlapping(noise_t, vae, host_ed)
print(np.mean(f.numpy()))
noise_t -= lr * grads[0].numpy() * ed_factor
noise_t = np.clip(noise_t, a_min=-5.0, a_max=5.0)

if j % 1000 == 0:
with open(RUN_FOLDER+'cb6_esp_optimizedEDED'+slr+'.p', 'wb') as file:
pickle.dump(output, file)

with open(RUN_FOLDER+'cb6_esp_optimized_final.p', 'wb') as file:
pickle.dump(output, file)
for ef in [0.0, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.95, 1.0]:

# The ESP part will by multiplied by 1-ed_factor
ed_factor = ef

# folder where to save the logs of this run
startdate = datetime.now().strftime('%Y-%m-%d')
RUN_FOLDER = startdate +'_'+ str(ed_factor)

# just do a while loop to make sure the folder doesnt exist
n = 0
while os.path.exists(RUN_FOLDER+'_'+str(n)+'/'):
n += 1
RUN_FOLDER += '_'+str(n)+'/'
os.mkdir(RUN_FOLDER)

BATCH_SIZE = 48
# DATA_FOLDER = '/home/nvme/juanma/Data/ED/'
DATA_FOLDER = '/home/juanma/Data/' # in maddog2020 and dragonsoop

host_ed, host_esp = load_host_ed_esp(
DATA_FOLDER+'cb6ED.pkl', DATA_FOLDER+'cb6ESP.pkl', BATCH_SIZE,
expand_dims=True, thicken=True)
vae, z_dim = load_vae_model('logs/vae/2021-05-25/')
ed_to_esp = load_ED_to_ESP('logs/vae_ed_esp/2021-07-18')

noise_t = K.random_uniform(shape = (BATCH_SIZE, z_dim),
minval = -2.0, maxval = 2.0)
_, _, init_eds, init_esps = grad_esp_overlapping(noise_t, vae, ed_to_esp, host_esp)

with open(RUN_FOLDER+'cb6_esp_opt_initial_g_ed.p', 'wb') as file:
pickle.dump(init_eds, file)

with open(RUN_FOLDER+'cb6_esp_opt_initial_g_esp.p', 'wb') as file:
pickle.dump(init_esps, file)

with open(RUN_FOLDER+'cb6_esp_opt_initial_hg.p', 'wb') as file:
pickle.dump(init_eds+host_ed, file)

# we will do five cycles of optimising
for factor in [1, 5, 10, 20, 50]:
lr = 0.05 / factor
slr = str(factor)

for j in tqdm.tqdm(range(int(5000/factor))):
# try to minimise overlapping ESP
f, grads, output, esps = grad_esp_overlapping(noise_t, vae, ed_to_esp, host_esp)
print(np.mean(f.numpy()))
noise_t -= lr * grads[0].numpy() * (1-ed_factor)
noise_t = np.clip(noise_t, a_min=-5.0, a_max=5.0)

if j % 1000 == 0:
with open(RUN_FOLDER+'cb6_esp_optimisedESPED'+slr+'.p', 'wb') as file:
pickle.dump(output, file)
with open(RUN_FOLDER+'cb6_esp_optimisedESPESP'+slr+'.p', 'wb') as file:
pickle.dump(esps, file)

# try to minimise overlapping ED
f, grads, output = grad_ed_overlapping(noise_t, vae, host_ed)
print(np.mean(f.numpy()))
noise_t -= lr * grads[0].numpy() * ed_factor
noise_t = np.clip(noise_t, a_min=-5.0, a_max=5.0)

if j % 1000 == 0:
with open(RUN_FOLDER+'cb6_esp_optimisedEDED'+slr+'.p', 'wb') as file:
pickle.dump(output, file)

# this last one is just to save them
f, grads, output, esps = grad_esp_overlapping(noise_t, vae, ed_to_esp, host_esp)
with open(RUN_FOLDER+'cb6_esped_optimised_final.p', 'wb') as file:
pickle.dump(output, file)

with open(RUN_FOLDER+'cb6_espesp_optimised_final.p', 'wb') as file:
pickle.dump(esps, file)

2 changes: 1 addition & 1 deletion env.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
CONDA_ENV_NAME=electrondensity
CONDA_ENV_NAME=ed_tf12
export PROJECT_ROOT=`pwd`
export DATA_DIR='./data'
export LOGS_DIR='./logs'
Expand Down
25 changes: 25 additions & 0 deletions environmentTF12.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
name: ed_tf12

channels:
- rdkit
- conda-forge
dependencies:
- cudatoolkit=11.8.0
- python=3.9
- ipython
- tqdm
- rdkit
- xtb=6.4.1
- requests
- argh
- matplotlib
- scipy
- cython
- traits=6.1.1
- traitsui=7.0.0
- mayavi
- pip
- pip:
- nvidia-cudnn-cu11==8.6.0.163
- tensorflow==2.12.*

108 changes: 108 additions & 0 deletions src/utils/generate_ed_from_xyz.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
##########################################################################################
#
# This script was used to obtain the electron densities and electrostatic potentials
# from single XYZ files. We used it to generate the EDs and ESP from hosts.
# To generate the EDs from a lot of files, like for example the QM9 dataset, do not use
# this script, but the scripts we have in the Dataset folder.
#
# Author: Jarek & Juanma ([email protected])
#
##########################################################################################

import numpy as np
import shutil
import os
import pickle

from src.datasets.utils.esp import ESP
from src.datasets.utils.xtb import prepare_xtb_input, run_xtb
from src.datasets.utils.orbkit import electron_density_from_molden
import matplotlib.pyplot as plt



def load_xyz(file_path, read_energies=False):
with open(file_path, 'r') as file:
lines = file.readlines()
lines = [l.strip('\n') for l in lines]

idx = 0
confs = []
energies = []
while idx < len(lines):
num_atoms = int(lines[idx].strip(' '))
if read_energies:
conf_energy = lines[idx+1].strip(' ')
conf_energy = float(conf_energy)
energies.append(conf_energy)
idx += 2
elements = []
coords = []

for lidx in range(num_atoms):
parsed_line = [l for l in lines[lidx+idx].split(' ') if l!='']
atom = parsed_line[0]
atom_coords = [float(c) for c in parsed_line[1:]]
elements.append(atom)
coords.append(atom_coords)
confs.append([elements, np.array(coords)])
idx += num_atoms
if not read_energies:
return confs
else:
return confs, energies


if __name__ == '__main__':

# these values should be the same as the ones you used to generate the dataset
n_points = 64
step_size = 0.5

# class to calculate the electrostatic potentials
esp = ESP(n_points, step_size)

# replace the file here with whatever host you want to convert
output_dir = "cb6_output"
if not os.path.exists(output_dir):
os.mkdir(output_dir)

# replace the file here with whatever host you want to convert
elements, coords = load_xyz('cb6v2.xyz')[0]
# xtb_input_file_path = 'input.xtb'
xtb_input_file_path = os.path.join(output_dir, 'input.xtb')

# this is to fit the the prepare_xtb_input function
coords = [[e]+list(c) for e, c in zip(elements, coords)]

prepare_xtb_input(coords, xtb_input_file_path)
#xtb_exec_path = "/home/jarek/xtb-6.4.1/bin/xtb"
xtb_exec_path = shutil.which('xtb')
run_xtb(xtb_exec_path, xtb_input_file_path, output_dir, molden=True, esp=True)

molden_input = os.path.join(output_dir, 'molden.input')
rho = electron_density_from_molden(molden_input, n_points=n_points,
step_size=step_size)

# calculate esp cube from xtb
espxtb_input = os.path.join(output_dir, 'xtb_esp.dat')
molecule_esp = esp.calculate_espcube_from_xtb(espxtb_input)

with open('cb6ED.pkl', 'wb') as file:
pickle.dump(rho, file)

with open('cb6ESP.pkl', 'wb') as file:
pickle.dump(molecule_esp, file)

# plt.imsave('test.png', np.tanh(rho[32]))











Loading

0 comments on commit 7cc62d5

Please sign in to comment.