Skip to content

Commit

Permalink
add experimental results
Browse files Browse the repository at this point in the history
  • Loading branch information
hypnopump committed May 22, 2021
1 parent eb7ab95 commit e9b2edc
Show file tree
Hide file tree
Showing 12 changed files with 346 additions and 557 deletions.
8 changes: 4 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,10 @@ Special emphasis is placed on reusability and ease of use within diverse pipelin

length | sota | **us (cpu)** | Nx | us (gpu) | us (hybrid) |
---------|--------|--------------|-------|----------|-------------|
~114 | 2.4s | **5.64ms** | ~420 | 22.02ms | 19.65ms |
~300 | 3.5s | **8.29ms** | ~422 | 26.46ms | 23.60ms |
~500 | 7.5s | **11.49ms** | ~652 | 31.47ms | 24.96ms |
~1000 | 18.66s | **16.80ms** | ~1112 | 43.09ms | 29.09ms |
~114 | 2.4s | **5.33ms** | ~446 | 19.93ms | 18.41ms |
~300 | 3.5s | **8.66ms** | ~400 | 25.47ms | 22.54ms |
~500 | 7.5s | **11.22ms** | ~651 | 30.79ms | 24.05ms |
~1000 | 18.66s | **18.11ms** | ~1030 | 44.29ms | 30.69ms |

* **Profiler Trace (CPU)**:
<center><img src="notebooks/experiments_manual/profiler_capture.png"></center>
Expand Down
Binary file removed notebooks/experiments/100_info.joblib
Binary file not shown.
Binary file removed notebooks/experiments/200_info.joblib
Binary file not shown.
Binary file removed notebooks/experiments/300_info.joblib
Binary file not shown.
Binary file removed notebooks/experiments/400_info.joblib
Binary file not shown.
Binary file removed notebooks/experiments/500_info.joblib
Binary file not shown.
Binary file removed notebooks/experiments/600_info.joblib
Binary file not shown.
Binary file removed notebooks/experiments/700_info.joblib
Binary file not shown.
Binary file removed notebooks/experiments/800_info.joblib
Binary file not shown.
Binary file removed notebooks/experiments/900_info.joblib
Binary file not shown.
751 changes: 323 additions & 428 deletions notebooks/experiments/logs_experiment.txt

Large diffs are not rendered by default.

144 changes: 19 additions & 125 deletions notebooks/integrated_test.py
Original file line number Diff line number Diff line change
@@ -1,95 +1,3 @@
<<<<<<< HEAD
##########################
# Clone repos with utils #
##########################

# !git clone https://github.com/hypnopump/geometric-vector-perceptron

import os
import sys
import time
import timeit
import logging

# science
import numpy as np
import torch
import sidechainnet
from sidechainnet.utils.sequence import ProteinVocabulary as VOCAB
VOCAB = VOCAB()

# process
import joblib

# custom
import mp_nerf

BASE_FOLDER = "experiments/"

logging.basicConfig(level=logging.DEBUG,
format="%(asctime)s %(levelname)s %(threadName)s %(name)s %(message)s",
# datefmt='%m-%d %H:%M',
filename=BASE_FOLDER+"logs_experiment.txt",
filemode="a")
logger = logging.getLogger()
sep = "\n\n=======\n\n"


# begin tests
if __name__ == "__main__":

logger.info("Loading data"+"\n")
lengths = [100, 200, 300, 400, 500, 600, 700, 800, 900]# [::-1]
try:
"a"+9
# skip
dataloaders_ = sidechainnet.load(casp_version=7, with_pytorch="dataloaders", batch_size=2)
logger.info("Data has been loaded"+"\n"+sep)
stored = [ mp_nerf.utils.get_prot(dataloader_=dataloaders_,
vocab_=VOCAB,
min_len=desired_len+5,
max_len=desired_len+60) for desired_len in lengths ]
joblib.dump(stored, BASE_FOLDER[:-1]+"_manual/analyzed_prots.joblib")
except:
stored = joblib.load(BASE_FOLDER[:-1]+"_manual/analyzed_prots.joblib")
logger.info("Data has been loaded"+"\n"+sep)

logger.info("Assessing lengths of: "+str([len(x[0]) for x in stored])+"\n")

cuda_device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
for device in ["cpu", cuda_device]:

logger.info("Preparing speed tests: for device "+repr(device)+"\n")

for i,desired_len in enumerate(lengths):

seq, int_seq, true_coords, angles, padding_seq, mask, pid = stored[i]
scaffolds = mp_nerf.proteins.build_scaffolds_from_scn_angles(seq, angles.to(device))

logger.info("Assessing the speed of folding algorithm at length "+str(len(seq))+"\n")

logger.info( str( timeit.timeit('mp_nerf.proteins.protein_fold(**scaffolds, device=device, hybrid=True)',
globals=globals(), number=1000) )+" for 1000 calls" )

logger.info("Saving the related information at {0}{1}_info.joblib\n".format(
BASE_FOLDER, desired_len))
joblib.dump({"seq": seq,
"true_coords": true_coords,
"angles": angles,
"padding_seq": padding_seq,
"mask": mask,
"pid": pid,
"padding_stripped": True}, BASE_FOLDER+str(desired_len)+"_info.joblib")
logger.info(sep)

logger.info("Execution has finished\n")






=======
##########################
# Clone repos with utils #
##########################
Expand Down Expand Up @@ -128,42 +36,35 @@

# begin tests
if __name__ == "__main__":
"a"+9

logger.info("Loading data"+"\n")
# adapt desired lengths to previous experiments results
# lengths = [100, 200, 300, 400, 500, 600, 700, 800, 900]# [::-1]
lengths = [[131, 150],
[200, 250],
[331, 351],
[400, 450],
[500, 550],
[600, 650],
[700, 780],
[800, 900],
[905, 1070]]# [::-1]
lengths = [100, 200, 300, 400, 500, 600, 700, 800, 900]# [::-1]
try:
"a"+9
# skip
dataloaders_ = sidechainnet.load(casp_version=7, with_pytorch="dataloaders", batch_size=2)
logger.info("Data has been loaded"+"\n"+sep)
stored = [ ]
for i,(desired_len, upper_len) in enumerate(lengths[::-1]):
dataloaders_ = sidechainnet.load(casp_version=7, with_pytorch="dataloaders", batch_size=2)
stored.append(mp_nerf.utils.get_prot(dataloader_=dataloaders_,
vocab_=VOCAB,
min_len=desired_len,
max_len=upper_len))
stored = stored[::-1]

stored = [ mp_nerf.utils.get_prot(dataloader_=dataloaders_,
vocab_=VOCAB,
min_len=desired_len+5,
max_len=desired_len+60) for desired_len in lengths ]
joblib.dump(stored, BASE_FOLDER[:-1]+"_manual/analyzed_prots.joblib")
except:
stored = joblib.load(BASE_FOLDER[:-1]+"_manual/analyzed_prots.joblib")
logger.info("Data has been loaded"+"\n"+sep)

logger.info("Assessing lengths of: "+str([len(x[0]) for x in stored])+"\n")

cuda_device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
for device in ["cpu", cuda_device]:
run_opts = [(torch.device("cpu"), False)] # tuples of (device, hybrid)
# add possibility for different configs
if torch.cuda.is_available():
run_opts.append( (torch.device("cuda"), True))
run_opts.append( (torch.device("cuda"), False))


logger.info("Preparing speed tests: for device "+repr(device)+"\n")
for device,hybrid in run_opts:

logger.info("Preparing speed tests: for device "+repr(device)+" and hybrid_opt: "+str(hybrid)+"\n")

for i,desired_len in enumerate(lengths):

Expand All @@ -172,7 +73,7 @@

logger.info("Assessing the speed of folding algorithm at length "+str(len(seq))+"\n")

logger.info( str( timeit.timeit('mp_nerf.proteins.protein_fold(**scaffolds, device=device, hybrid=True)',
logger.info( str( timeit.timeit('mp_nerf.proteins.protein_fold(**scaffolds, device=device, hybrid=hybrid)',
globals=globals(), number=1000) )+" for 1000 calls" )

logger.info("Saving the related information at {0}{1}_info.joblib\n".format(
Expand All @@ -186,11 +87,4 @@
"padding_stripped": True}, BASE_FOLDER+str(desired_len)+"_info.joblib")
logger.info(sep)

logger.info("Execution has finished\n")






>>>>>>> 4cabbe55371d6a9a7edeab1db719fa0cf8312eae
logger.info("Execution has finished\n")

0 comments on commit e9b2edc

Please sign in to comment.