Skip to content

Commit

Permalink
addressing memory leakage and dataloading speed
Browse files Browse the repository at this point in the history
  • Loading branch information
Blazej Banaszewski committed Aug 5, 2024
1 parent 42f3c0f commit b9fb4a8
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 17 deletions.
30 changes: 14 additions & 16 deletions ckpts/minimol_v1/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,12 @@ accelerator:

datamodule:
module_type: "MultitaskFromSmilesDataModule"
# module_type: "FakeDataModule" # Option to use generated data
args: # Matches that in the test_multitask_datamodule.py case.
task_specific_args: # To be replaced by a new class "DatasetParams"
l1000_vcap:
df: null
df_path: /home/blazejb/graphium/graphium/data/neurips2023/large-dataset/LINCS_L1000_VCAP_0-2_th4.csv
splits_path: /home/blazejb/graphium/graphium/data/neurips2023/large-dataset/l1000_vcap_random_splits.pt
df_path: graphium/data/neurips2023/large-dataset/LINCS_L1000_VCAP_0-2_th4.csv
splits_path: graphium/data/neurips2023/large-dataset/l1000_vcap_random_splits.pt
# df_path: graphium/data/neurips2023/foat_th4_reset_index/LINCS_L1000_VCAP_0-2_th4.csv
# splits_path: graphium/data/neurips2023/foat_th4_reset_index/LINCS_L1000_VCAP_0-2_th4_no_admet_test_random_split.pt
# wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/LINCS_L1000_VCAP_0-4.csv.gz
Expand All @@ -29,8 +28,8 @@ datamodule:

l1000_mcf7:
df: null
df_path: /home/blazejb/graphium/graphium/data/neurips2023/large-dataset/LINCS_L1000_MCF7_0-2_th4.csv
splits_path: /home/blazejb/graphium/graphium/data/neurips2023/large-dataset/l1000_mcf7_random_splits.pt
df_path: graphium/data/neurips2023/large-dataset/LINCS_L1000_MCF7_0-2_th4.csv
splits_path: graphium/data/neurips2023/large-dataset/l1000_mcf7_random_splits.pt
# df_path: graphium/data/neurips2023/foat_th4_reset_index/LINCS_L1000_MCF7_0-2_th4.csv
# splits_path: graphium/data/neurips2023/foat_th4_reset_index/LINCS_L1000_MCF7_0-2_th4_no_admet_test_random_split.pt
# wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/LINCS_L1000_MCF7_0-4.csv.gz
Expand All @@ -43,8 +42,8 @@ datamodule:

pcba_1328:
df: null
df_path: /home/blazejb/graphium/graphium/data/neurips2023/large-dataset/PCBA_1328_1564k.parquet
splits_path: /home/blazejb/graphium/graphium/data/neurips2023/large-dataset/pcba_1328_random_splits.pt
df_path: graphium/data/neurips2023/large-dataset/PCBA_1328_1564k.parquet
splits_path: graphium/data/neurips2023/large-dataset/pcba_1328_random_splits.pt
# df_path: graphium/data/neurips2023/foat_th4_reset_index/PCBA_1328_1564k.parquet
# splits_path: graphium/data/neurips2023/foat_th4_reset_index/PCBA_1328_1564k_no_admet_test_random_split.pt
# wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/PCBA_1328_1564k.parquet
Expand All @@ -57,8 +56,8 @@ datamodule:

pcqm4m_g25:
df: null
df_path: /home/blazejb/graphium/graphium/data/neurips2023/large-dataset/PCQM4M_G25_N4.parquet
splits_path: /home/blazejb/graphium/graphium/data/neurips2023/large-dataset/pcqm4m_g25_n4_random_splits.pt
df_path: graphium/data/neurips2023/large-dataset/PCQM4M_G25_N4.parquet
splits_path: graphium/data/neurips2023/large-dataset/pcqm4m_g25_n4_random_splits.pt
# df_path: graphium/data/neurips2023/foat_th4_reset_index/PCQM4M_G25_N4.parquet
# splits_path: graphium/data/neurips2023/foat_th4_reset_index/PCQM4M_G25_N4_no_admet_test_random_split.pt
# wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/PCQM4M_G25_N4.parquet
Expand All @@ -74,8 +73,8 @@ datamodule:

pcqm4m_n4:
df: null
df_path: /home/blazejb/graphium/graphium/data/neurips2023/large-dataset/PCQM4M_G25_N4.parquet
splits_path: /home/blazejb/graphium/graphium/data/neurips2023/large-dataset/pcqm4m_g25_n4_random_splits.pt
df_path: graphium/data/neurips2023/large-dataset/PCQM4M_G25_N4.parquet
splits_path: graphium/data/neurips2023/large-dataset/pcqm4m_g25_n4_random_splits.pt
# df_path: graphium/data/neurips2023/foat_th4_reset_index/PCQM4M_G25_N4.parquet
# splits_path: graphium/data/neurips2023/foat_th4_reset_index/PCQM4M_G25_N4_no_admet_test_random_split.pt
# wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/PCQM4M_G25_N4.parquet
Expand All @@ -94,8 +93,8 @@ datamodule:
batch_size_training: 2048
batch_size_inference: 2048
prepare_dict_or_graph: pyg:graph
featurization_n_jobs: 30
featurization_progress: True
featurization_n_jobs: -1
featurization_progress: False
featurization_backend: "loky"
dataloading_from: ram
processed_graph_data_path: ${constants.datacache_path}
Expand Down Expand Up @@ -130,8 +129,7 @@ datamodule:
pos_type: rw_return_probs
ksteps: 16

num_workers: 16 # -1 to use all
# num_workers: -1 # -1 to use all
num_workers: -1 # -1 to use all
persistent_workers: False # if use persistent worker at the start of each epoch.
# Using persistent_workers false might make the start of each epoch very long.

Expand Down Expand Up @@ -253,7 +251,7 @@ trainer:

architecture:
model_type: FullGraphMultiTaskNetwork
mup_base_path: ../ckpts/minimol_v1/base_shape.yaml
mup_base_path: ../minimol/ckpts/minimol_v1/base_shape.yaml
mup_load_or_save: load
pre_nn: # Set as null to avoid a pre-nn network
out_dim: 128
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

setup(
name='minimol',
version='1.2',
version='1.3.1',
packages=find_packages(),
include_package_data=True,
package_data={
Expand Down

0 comments on commit b9fb4a8

Please sign in to comment.