Skip to content

Commit

Permalink
Merge pull request #44 from pyt-team/dev
Browse files Browse the repository at this point in the history
Numpy style docstrings
  • Loading branch information
gbg141 authored Jun 3, 2024
2 parents 213977d + 3a56925 commit 31d147b
Show file tree
Hide file tree
Showing 96 changed files with 2,147 additions and 948 deletions.
2 changes: 1 addition & 1 deletion configs/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
# this file is needed here to include configs when building project as a package
"""This file is needed here to include configs when building project as a package."""
57 changes: 57 additions & 0 deletions configs/test.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
# @package _global_

# specify here default configuration
# order of defaults determines the order in which configs override each other
defaults:
- _self_
- dataset: graph/ZINC
- model: simplicial/sccn
- transforms: no_transform
- optimizer: adam
- scheduler: step_lr
- loss: default
- evaluator: default
- callbacks: default
- logger: wandb # set logger here or use command line (e.g. `python train.py logger=tensorboard`)
- trainer: cpu
- paths: default
- extras: default
- hydra: default

# experiment configs allow for version control of specific hyperparameters
# e.g. best hyperparameters for given model and datamodule
- experiment: null

# config for hyperparameter optimization
- hparams_search: null

# optional local config for machine/user specific settings
# it's optional since it doesn't need to exist and is excluded from version control
- optional local: default

# debugging config (enable through command line, e.g. `python train.py debug=default)
- debug: null

# evaluator: ${dataset.parameters.task}
# callbacks: ${dataset.parameters.task}

# task name, determines output directory path
task_name: "train"

# tags to help you identify your experiments
# you can overwrite this in experiment configs
# overwrite from command line with `python train.py tags="[first_tag, second_tag]"`
tags: ["dev"]

# set False to skip model training
train: True

# evaluate on test set, using best model weights achieved during training
# lightning chooses best weights based on the metric specified in checkpoint callback
test: True

# simply provide checkpoint path to resume training
ckpt_path: null

# seed for random number generators in pytorch, numpy and python.random
seed: 42
1 change: 1 addition & 0 deletions notebooks/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""Empty file to make the notebooks folder a package."""
7 changes: 4 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,9 @@ dependencies=[
"jupyterlab",
"rich",
"rootutils",
#"toponetx @ git+https://github.com/pyt-team/TopoNetX.git",
#"topomodelx @ git+https://github.com/pyt-team/TopoModelX.git",
#"topoembedx @ git+https://github.com/pyt-team/TopoEmbedX.git",
"toponetx @ git+https://github.com/pyt-team/TopoNetX.git",
"topomodelx @ git+https://github.com/pyt-team/TopoModelX.git",
"topoembedx @ git+https://github.com/pyt-team/TopoEmbedX.git",
"lightning",
]

Expand All @@ -73,6 +73,7 @@ test = [
"coverage",
"jupyter",
"mypy",
"pytest-mock"
]

dev = ["TopoBenchmarkX[test, lint]"]
Expand Down
14 changes: 8 additions & 6 deletions test/data/dataload/test_Dataloaders.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,24 +3,26 @@
import hydra
import rootutils
import torch
from hydra import compose, initialize
from hydra import compose, initialize_config_dir

from topobenchmarkx.data.preprocess.preprocessor import PreProcessor
from topobenchmarkx.dataloader import TBXDataloader
from topobenchmarkx.dataloader.utils import to_data_list

from topobenchmarkx.run import initialize_hydra

rootutils.setup_root("./", indicator=".project-root", pythonpath=True)


class TestCollateFunction:
"""Test collate_fn."""

def setup_method(self):
initialize(
version_base="1.3", config_path="../../../configs", job_name="job"
)
cfg = compose(config_name="run.yaml", overrides=["dataset=graph/ZINC"])

# initialize_config_dir(
# version_base="1.3", config_dir=str(rootutils.find_root() / "configs"), job_name="job"
# )
# cfg = compose(config_name="test.yaml", overrides=["dataset=graph/ZINC", "model=simplicial/sccn", "transforms=dataset_defaults/ZINC"])
cfg = initialize_hydra()
graph_loader = hydra.utils.instantiate(cfg.dataset, _recursive_=False)

datasets, dataset_dir = graph_loader.loader.load()
Expand Down
8 changes: 5 additions & 3 deletions test/data/preprocess/test_preprocessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,12 @@
class TestPreProcessor:

@pytest.fixture(autouse=True)
def setup_method(self, mocker):
def setup_method(self, mocker_fixture):
mocker = mocker_fixture

# Setup test parameters
self.dataset = MagicMock(spec=torch_geometric.data.Dataset)
self.data_dir = "/fake/path"
self.data_dir = "fake/path"
self.transforms_config = DictConfig(
{"transform": {"transform_name": "CellCycleLifting"}}
)
Expand Down Expand Up @@ -61,7 +62,8 @@ def test_init(self):
assert self.preprocessor.transforms_applied == False
assert self.preprocessor.data_list == ["0", "0", "0"]

def test_init_with_transform(self, mocker):
def test_init_with_transform(self, mocker_fixture):
mocker = mocker_fixture
val_processed_paths = ["/some/path"]
params = [
{"assert_args": ("created_property", "processed_data_dir")},
Expand Down
4 changes: 4 additions & 0 deletions topobenchmarkx/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
"""TopobenchmarkX: A library for benchmarking of topological models."""

# Import submodules
from . import (
data,
Expand All @@ -10,6 +12,7 @@
transforms,
utils,
)
from .run import initialize_hydra

__all__ = [
"data",
Expand All @@ -21,6 +24,7 @@
"dataloader",
"datasets",
"model",
"initialize_hydra",
]


Expand Down
2 changes: 2 additions & 0 deletions topobenchmarkx/__main__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
"""TopoBenchmarkX.__main__ module."""

from .run import main

if __name__ == "__main__":
Expand Down
6 changes: 2 additions & 4 deletions topobenchmarkx/data/load/loaders.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,8 +164,7 @@ def __init__(self, parameters: DictConfig):
def load(
self,
) -> torch_geometric.data.Dataset:
"""
Load cell complex dataset.
"""Load cell complex dataset.
Returns
-------
Expand Down Expand Up @@ -196,8 +195,7 @@ def __init__(self, parameters: DictConfig):
def load(
self,
) -> torch_geometric.data.Dataset:
"""
Load simplicial dataset.
"""Load simplicial dataset.
Returns
-------
Expand Down
26 changes: 8 additions & 18 deletions topobenchmarkx/data/preprocess/preprocessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,7 @@


class PreProcessor(torch_geometric.data.InMemoryDataset):
"""
Preprocessor for datasets.
"""Preprocessor for datasets.
Parameters
----------
Expand Down Expand Up @@ -63,8 +62,7 @@ def __init__(self, dataset, data_dir, transforms_config=None, **kwargs):

@property
def processed_dir(self) -> str:
"""
Return the path to the processed directory.
"""Return the path to the processed directory.
Returns
-------
Expand All @@ -78,8 +76,7 @@ def processed_dir(self) -> str:

@property
def processed_file_names(self) -> str:
"""
Return the name of the processed file.
"""Return the name of the processed file.
Returns
-------
Expand All @@ -91,8 +88,7 @@ def processed_file_names(self) -> str:
def instantiate_pre_transform(
self, data_dir, transforms_config
) -> torch_geometric.transforms.Compose:
"""
Instantiate the pre-transforms.
"""Instantiate the pre-transforms.
Parameters
----------
Expand Down Expand Up @@ -122,8 +118,7 @@ def instantiate_pre_transform(
def set_processed_data_dir(
self, pre_transforms_dict, data_dir, transforms_config
) -> None:
"""
Set the processed data directory.
"""Set the processed data directory.
Parameters
----------
Expand All @@ -147,9 +142,7 @@ def set_processed_data_dir(
)

def save_transform_parameters(self) -> None:
"""
Save the transform parameters.
"""
"""Save the transform parameters."""
# Check if root/params_dict.json exists, if not, save it
path_transform_parameters = os.path.join(
self.processed_data_dir, "path_transform_parameters_dict.json"
Expand All @@ -172,9 +165,7 @@ def save_transform_parameters(self) -> None:
)

def process(self) -> None:
"""
Method that processes the data.
"""
"""Method that processes the data."""
self.data_list = (
[self.pre_transform(d) for d in self.data_list]
if self.pre_transform is not None
Expand All @@ -192,8 +183,7 @@ def load_dataset_splits(
) -> tuple[
DataloadDataset, DataloadDataset | None, DataloadDataset | None
]:
"""
Load the dataset splits.
"""Load the dataset splits.
Parameters
----------
Expand Down
2 changes: 2 additions & 0 deletions topobenchmarkx/dataloader/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
"""This module implements the dataloader for the topobenchmarkx package."""

from .dataload_dataset import DataloadDataset
from .dataloader import TBXDataloader

Expand Down
32 changes: 21 additions & 11 deletions topobenchmarkx/dataloader/dataload_dataset.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,15 @@
"""Dataset class compatible with TBXDataloader."""

import torch_geometric


class DataloadDataset(torch_geometric.data.Dataset):
r"""Custom dataset to return all the values added to the dataset object.
"""Custom dataset to return all the values added to the dataset object.
Args:
data_lst (list[torch_geometric.data.Data]): List of torch_geometric.data.Data objects.
Parameters
----------
data_lst : list[torch_geometric.data.Data]
List of torch_geometric.data.Data objects.
"""

def __init__(self, data_lst):
Expand All @@ -16,22 +20,28 @@ def __repr__(self):
return f"{self.__class__.__name__}({len(self.data_lst)})"

def get(self, idx):
r"""Get data object from data list.
"""Get data object from data list.
Args:
idx (int): Index of the data object to get.
Parameters
----------
idx : int
Index of the data object to get.
Returns:
tuple: tuple containing a list of all the values for the data and the corresponding keys.
Returns
-------
tuple
Tuple containing a list of all the values for the data and the corresponding keys.
"""
data = self.data_lst[idx]
keys = list(data.keys())
return ([data[key] for key in keys], keys)

def len(self):
r"""Return the length of the dataset.
"""Return the length of the dataset.
Returns:
int: Length of the dataset.
Returns
-------
int
Length of the dataset.
"""
return len(self.data_lst)
Loading

0 comments on commit 31d147b

Please sign in to comment.