Merge branch 'main' of https://github.com/pyt-team/TopoBenchmarkX

geometric-intelligence · May 21, 2024 · f29adb2 · f29adb2
2 parents e7e13bc + 2351fb7
commit f29adb2
Showing 176 changed files with 6,699 additions and 3,575 deletions.
diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
diff --git a/.devcontainer/pyproject.toml b/.devcontainer/pyproject.toml
diff --git a/.dockerignore b/.dockerignore
@@ -0,0 +1,3 @@
+/logs
+/datasets/graph
+.ruff_cache
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -15,21 +15,16 @@ repos:
       - id: check-added-large-files
         args:
           - --maxkb=2048
-#      - id: trailing-whitespace
       - id: requirements-txt-fixer
 
-   - repo: https://github.com/astral-sh/ruff-pre-commit
-     rev: v0.4.4
-     hooks:
-       - id: ruff
-         #types_or: [ python, pyi, jupyter ]
-         #types_or: [ python, pyi ]
-         args: [ --fix ]
-       - id: ruff-format
-         #types_or: [ python, pyi, jupyter ]
-         #types_or: [ python, pyi ]
+  # - repo: https://github.com/astral-sh/ruff-pre-commit
+  #   rev: v0.4.4
+  #   hooks:
+  #     - id: ruff
+  #       args: [ --fix ]
+  #     - id: ruff-format
 
-   - repo: https://github.com/numpy/numpydoc
-     rev: v1.6.0
-     hooks:
-       - id: numpydoc-validation
+  # - repo: https://github.com/numpy/numpydoc
+  #   rev: v1.6.0
+  #   hooks:
+  #     - id: numpydoc-validation
diff --git a/.devcontainer/Dockerfile → Dockerfile b/.devcontainer/Dockerfile → Dockerfile
@@ -7,10 +7,11 @@ COPY . .
 RUN pip install --upgrade pip
 
 RUN pip install -e '.[all]'
-RUN pip install --no-dependencies git+https://github.com/pyt-team/TopoNetX.git
-RUN pip install --no-dependencies git+https://github.com/pyt-team/TopoModelX.git
+RUN pip install git+https://github.com/pyt-team/TopoNetX.git
+RUN pip install git+https://github.com/pyt-team/TopoModelX.git
+RUN pip install git+https://github.com/pyt-team/TopoEmbedX.git
+
+RUN pip install torch_geometric==2.4.0
 RUN pip install torch==2.0.1 --extra-index-url https://download.pytorch.org/whl/cu115
 RUN pip install torch-scatter torch-sparse -f https://data.pyg.org/whl/torch-2.0.1+cu115.html
 RUN pip install torch-cluster -f https://data.pyg.org/whl/torch-2.0.0+cu115.html
-RUN pip install lightning>=2.0.0
-RUN pip install numpy pre-commit jupyterlab notebook ipykernel
diff --git a/conda.sh b/conda.sh
@@ -0,0 +1,11 @@
+# #!/bin/bash
+
+mkdir -p ~/miniconda3
+wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda3/miniconda.sh
+bash ~/miniconda3/miniconda.sh -b -u -p ~/miniconda3
+rm -rf ~/miniconda3/miniconda.sh
+
+~/miniconda3/bin/conda init bash
+
+#conda create -n topox python=3.11.3
+#conda activate topox
diff --git a/configs/dataset/MUTAG.yaml b/configs/dataset/MUTAG.yaml
@@ -15,6 +15,7 @@ parameters:
   num_features:
     - 7 # initial node features
     - 4 # initial edge features
+
   num_classes: 2
   task: classification
   loss_type: cross_entropy
@@ -26,7 +27,7 @@ parameters:
   train_prop: 0.5 # for "random" strategy splitting
 
   # Lifting parameters
-  max_dim_if_lifted: 2
+  max_dim_if_lifted: 3 # This is the maximum dimension of the simplicial complex in the dataset
   preserve_edge_attr_if_lifted: False
 
   # Dataloader parameters

diff --git a/configs/dataset/PROTEINS_TU.yaml b/configs/dataset/PROTEINS_TU.yaml
@@ -19,7 +19,7 @@ parameters:
   monitor_metric: accuracy
   task_level: graph
   data_seed: 9
-  split_type: k-fold #'k-fold' # either "k-fold" or "random" strategies
+  split_type: random #'k-fold' # either "k-fold" or "random" strategies
   k: 10 # for "k-fold" Cross-Validation
   train_prop: 0.5 # for "random" strategy splitting
 

diff --git a/configs/dataset/ZINC.yaml b/configs/dataset/ZINC.yaml
@@ -1,7 +1,10 @@
 _target_: topobenchmarkx.io.load.loaders.GraphLoader
 
+# USE python train.py dataset.transforms.one_hot_node_degree_features.degrees_fields=x to run this config
+
 defaults:
-  - transforms/data_manipulations: node_feat_to_float
+  - transforms/data_manipulations: node_degrees
+  - transforms/[email protected]_hot_node_degree_features: one_hot_node_degree_features
   - transforms: ${get_default_transform:graph,${model}}
 
 # Data definition
@@ -13,14 +16,15 @@ parameters:
   data_split_dir: ${paths.data_dir}data_splits/${dataset.parameters.data_name}
 
   # Dataset parameters
-  num_features: 1 # here basically I specify the initial num features in mutang at x aka x_0
+  num_features: 21 # torch_geometric ZINC dataset has 21 atom types
+  max_node_degree: 20 # Use it to one_hot encode node degrees. Additional parameter to run dataset.transforms.one_hot_node_degree_features.degrees_fields=x
   num_classes: 1
   task: regression
   loss_type: mse
   monitor_metric: mae
   task_level: graph
   data_seed: 0
-  split_type: 'fixed' # either k-fold or test
+  split_type: 'fixed' # ZINC accept only <fixed> split
   #k: 10 # for k-Fold Cross-Validation
 
   # Dataloader parameters

diff --git a/configs/dataset/coauthorship_citeseer.yaml b/configs/dataset/coauthorship_citeseer.yaml
@@ -20,7 +20,7 @@ parameters:
   monitor_metric: accuracy
   task_level: node
   data_seed: 0
-  split_type: k-fold #'k-fold' # either k-fold or test
+  split_type: random #'k-fold' # either k-fold or test
   k: 10 # for k-Fold Cross-Validation
 
   # Dataloader parameters

diff --git a/configs/dataset/coauthorship_cora.yaml b/configs/dataset/coauthorship_cora.yaml
@@ -19,7 +19,7 @@ parameters:
   monitor_metric: accuracy
   task_level: node
   data_seed: 0
-  split_type: k-fold #'k-fold' # either k-fold or test
+  split_type: random #'k-fold' # either k-fold or test
   k: 10 # for k-Fold Cross-Validation
 
   # Dataloader parameters

diff --git a/configs/dataset/manual_dataset.yaml b/configs/dataset/manual_dataset.yaml
@@ -19,7 +19,7 @@ parameters:
   monitor_metric: accuracy
   task_level: node
   data_seed: 0
-  split_type: k-fold #'k-fold' # either k-fold or test
+  split_type: random #'k-fold' # either k-fold or test
   k: 10 # for k-Fold Cross-Validation
 
   # Dataloader parameters

diff --git a/configs/dataset/transforms/data_manipulations/node_degrees.yaml b/configs/dataset/transforms/data_manipulations/node_degrees.yaml
@@ -1,5 +1,5 @@
 _target_: topobenchmarkx.transforms.data_transform.DataTransform
 transform_name: "NodeDegrees"
 transform_type: "data manipulation"
-selected_fields: ["edge_index", "incidence"] #"incidence"
+selected_fields: ["edge_index"] # "incidence"
 
diff --git a/configs/dataset/transforms/data_manipulations/one_hot_node_degree_features.yaml b/configs/dataset/transforms/data_manipulations/one_hot_node_degree_features.yaml
@@ -4,5 +4,5 @@ transform_type: "data manipulation"
 
 degrees_fields: "node_degrees"
 features_fields: "x"
-max_degrees: ${dataset.parameters.max_node_degree}
+max_degree: ${dataset.parameters.max_node_degree}
 
diff --git a/configs/dataset/transforms/graph2cell_lifting/cell_cycles.yaml b/configs/dataset/transforms/graph2cell_lifting/cell_cycles.yaml
@@ -1,7 +1,6 @@
 _target_: topobenchmarkx.transforms.data_transform.DataTransform
 transform_type: 'lifting'
 transform_name: "CellCyclesLifting"
-k_value: 1
 complex_dim: ${oc.select:dataset.parameters.max_dim_if_lifted,3}
-max_cell_length: 6
+max_cell_length: 10
 preserve_edge_attr: ${oc.select:dataset.parameters.preserve_edge_attr_if_lifted,False}
diff --git a/configs/dataset/us_country_demos.yaml b/configs/dataset/us_country_demos.yaml
@@ -17,7 +17,7 @@ parameters:
   num_features: 6
   num_classes: 1
   task: regression
-  task_variable: 'Election' # options: ['Election', 'MedianIncome', 'MigraRate', 'BirthRate', 'DeathRate', 'BachelorRate', 'UnemploymentRate']
+  task_variable: 'MedianIncome' # options: ['Election', 'MedianIncome', 'MigraRate', 'BirthRate', 'DeathRate', 'BachelorRate', 'UnemploymentRate']
   force_reload: True
   loss_type: mse
   monitor_metric: mae

diff --git a/configs/logger/wandb.yaml b/configs/logger/wandb.yaml
@@ -7,7 +7,7 @@ wandb:
   offline: False
   id: null # pass correct id to resume experiment!
   anonymous: null # enable anonymous logging
-  project: "topox_10fold_sweep"
+  project: "None"
   log_model: False # upload lightning ckpts
   prefix: "" # a string to put at the beginning of metric keys
   # entity: "" # set to name of your wandb team

diff --git a/configs/loss/default.yaml b/configs/loss/default.yaml