Skip to content

Commit

Permalink
Merge branch 'pyg-team:master' into master
Browse files Browse the repository at this point in the history
  • Loading branch information
mova authored Nov 22, 2023
2 parents 7539d75 + 5b9a58b commit 37901d5
Show file tree
Hide file tree
Showing 22 changed files with 212 additions and 67 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).

### Added

- Added `fsspec` as file system backend ([#8379](https://github.com/pyg-team/pytorch_geometric/pull/8379))
- Added support for floating-point average degree numbers in `FakeDataset` and `FakeHeteroDataset` ([#8404](https://github.com/pyg-team/pytorch_geometric/pull/8404))
- Added support for device conversions of `InMemoryDataset` ([#8402] (https://github.com/pyg-team/pytorch_geometric/pull/8402))
- Added support for edge-level temporal sampling in `NeighborLoader` and `LinkNeighborLoader` ([#8372] (https://github.com/pyg-team/pytorch_geometric/pull/8372))
Expand Down Expand Up @@ -36,6 +37,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).

### Deprecated

- Deprecated `torch_geometric.data.makedirs` in favor of `os.makedirs` ([#8421](https://github.com/pyg-team/pytorch_geometric/pull/8421))
- Deprecated `DataParallel` in favor of `DistributedDataParallel` ([#8250](https://github.com/pyg-team/pytorch_geometric/pull/8250))

### Fixed
Expand Down
2 changes: 2 additions & 0 deletions conda/pyg/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,13 @@ requirements:
- {{ environ.get('CONDA_PYTORCH_CONSTRAINT') }}
- {{ environ.get('CONDA_CUDATOOLKIT_CONSTRAINT') }}
- psutil
- fsspec
- tqdm
- jinja2
- pyparsing
- numpy
- scipy
- aiohttp
- requests
- scikit-learn

Expand Down
2 changes: 2 additions & 0 deletions conda/pytorch-geometric/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,13 @@ requirements:
- {{ environ.get('CONDA_PYTORCH_CONSTRAINT') }}
- {{ environ.get('CONDA_CUDATOOLKIT_CONSTRAINT') }}
- psutil
- fsspec
- tqdm
- jinja2
- pyparsing
- numpy
- scipy
- aiohttp
- requests
- scikit-learn

Expand Down
2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,9 @@ dependencies=[
"tqdm",
"numpy",
"scipy",
"fsspec",
"jinja2",
"aiohttp",
"requests",
"pyparsing",
"scikit-learn",
Expand Down
1 change: 1 addition & 0 deletions test/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ def load_dataset(root: str, name: str, *args, **kwargs) -> Dataset:

@pytest.fixture(scope='session')
def get_dataset() -> Callable:
# TODO Support root = 'memory://pyg_test_datasets'
root = osp.join('/', 'tmp', 'pyg_test_datasets')
yield functools.partial(load_dataset, root)
if osp.exists(root):
Expand Down
6 changes: 3 additions & 3 deletions test/transforms/test_to_superpixels.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import os
import os.path as osp

import torch

from torch_geometric.data import download_url, extract_gz
from torch_geometric.data.makedirs import makedirs
from torch_geometric.loader import DataLoader
from torch_geometric.testing import onlyOnline, withPackage
from torch_geometric.transforms import ToSLIC
Expand All @@ -27,8 +27,8 @@ def test_to_superpixels(tmp_path):
raw_folder = osp.join(tmp_path, 'MNIST', 'raw')
processed_folder = osp.join(tmp_path, 'MNIST', 'processed')

makedirs(raw_folder)
makedirs(processed_folder)
os.makedirs(raw_folder, exist_ok=True)
os.makedirs(processed_folder, exist_ok=True)
for resource in resources:
path = download_url(resource, raw_folder)
extract_gz(path, osp.join(tmp_path, raw_folder))
Expand Down
13 changes: 7 additions & 6 deletions torch_geometric/data/dataset.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import copy
import os
import os.path as osp
import re
import sys
Expand All @@ -12,7 +13,7 @@
from torch import Tensor

from torch_geometric.data.data import BaseData
from torch_geometric.data.makedirs import makedirs
from torch_geometric.io import fs

IndexType = Union[slice, Tensor, np.ndarray, Sequence]

Expand Down Expand Up @@ -91,7 +92,7 @@ def __init__(
super().__init__()

if isinstance(root, str):
root = osp.expanduser(osp.normpath(root))
root = osp.expanduser(fs.normpath(root))

self.root = root
self.transform = transform
Expand Down Expand Up @@ -209,7 +210,7 @@ def _download(self):
if files_exist(self.raw_paths): # pragma: no cover
return

makedirs(self.raw_dir)
os.makedirs(self.raw_dir, exist_ok=True)
self.download()

@property
Expand Down Expand Up @@ -240,13 +241,13 @@ def _process(self):
if self.log and 'pytest' not in sys.modules:
print('Processing...', file=sys.stderr)

makedirs(self.processed_dir)
os.makedirs(self.processed_dir, exist_ok=True)
self.process()

path = osp.join(self.processed_dir, 'pre_transform.pt')
torch.save(_repr(self.pre_transform), path)
fs.torch_save(_repr(self.pre_transform), path)
path = osp.join(self.processed_dir, 'pre_filter.pt')
torch.save(_repr(self.pre_filter), path)
fs.torch_save(_repr(self.pre_filter), path)

if self.log and 'pytest' not in sys.modules:
print('Done!', file=sys.stderr)
Expand Down
11 changes: 7 additions & 4 deletions torch_geometric/data/download.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
import os
import os.path as osp
import ssl
import sys
import urllib
from typing import Optional

from torch_geometric.data.makedirs import makedirs
import fsspec

from torch_geometric.io import fs


def download_url(
Expand All @@ -30,20 +33,20 @@ def download_url(

path = osp.join(folder, filename)

if osp.exists(path): # pragma: no cover
if fs.exists(path): # pragma: no cover
if log and 'pytest' not in sys.modules:
print(f'Using existing file {filename}', file=sys.stderr)
return path

if log and 'pytest' not in sys.modules:
print(f'Downloading {url}', file=sys.stderr)

makedirs(folder)
os.makedirs(folder, exist_ok=True)

context = ssl._create_unverified_context()
data = urllib.request.urlopen(url, context=context)

with open(path, 'wb') as f:
with fsspec.open(path, 'wb') as f:
# workaround for https://bugs.python.org/issue42853
while True:
chunk = data.read(10 * 1024 * 1024)
Expand Down
5 changes: 3 additions & 2 deletions torch_geometric/data/in_memory_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
from torch_geometric.data.data import BaseData
from torch_geometric.data.dataset import Dataset, IndexType
from torch_geometric.data.separate import separate
from torch_geometric.io import fs


class InMemoryDataset(Dataset, ABC):
Expand Down Expand Up @@ -121,11 +122,11 @@ def get(self, idx: int) -> BaseData:
def save(cls, data_list: List[BaseData], path: str):
r"""Saves a list of data objects to the file path :obj:`path`."""
data, slices = cls.collate(data_list)
torch.save((data.to_dict(), slices), path)
fs.torch_save((data.to_dict(), slices), path)

def load(self, path: str, data_cls: Type[BaseData] = Data):
r"""Loads the dataset from the file path :obj:`path`."""
data, self.slices = torch.load(path)
data, self.slices = fs.torch_load(path)
if isinstance(data, dict): # Backward compatibility.
data = data_cls.from_dict(data)
self.data = data
Expand Down
17 changes: 9 additions & 8 deletions torch_geometric/data/makedirs.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,17 @@
import errno
import os
import os.path as osp
from torch_geometric.deprecation import deprecated
from torch_geometric.io import fs


@deprecated("use 'os.makedirs(path, exist_ok=True)' instead")
def makedirs(path: str):
r"""Recursively creates a directory.
.. warning::
:meth:`makedirs` is deprecated and will be removed soon.
Please use :obj:`os.makedirs(path, exist_ok=True)` instead.
Args:
path (str): The path to create.
"""
try:
os.makedirs(osp.expanduser(osp.normpath(path)))
except OSError as e:
if e.errno != errno.EEXIST and osp.isdir(path):
raise e
fs.makedirs(path, exist_ok=True)
3 changes: 1 addition & 2 deletions torch_geometric/datasets/snap_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
extract_gz,
extract_tar,
)
from torch_geometric.data.makedirs import makedirs
from torch_geometric.utils import coalesce


Expand Down Expand Up @@ -222,7 +221,7 @@ def _download(self):
if osp.isdir(self.raw_dir) and len(os.listdir(self.raw_dir)) > 0:
return

makedirs(self.raw_dir)
os.makedirs(self.raw_dir, exist_ok=True)
self.download()

def download(self):
Expand Down
29 changes: 9 additions & 20 deletions torch_geometric/datasets/tu_dataset.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,8 @@
import os
import os.path as osp
import shutil
from typing import Callable, List, Optional

import torch

from torch_geometric.data import (
Data,
InMemoryDataset,
download_url,
extract_zip,
)
from torch_geometric.io import read_tu_data
from torch_geometric.data import Data, InMemoryDataset
from torch_geometric.io import fs, read_tu_data


class TUDataset(InMemoryDataset):
Expand Down Expand Up @@ -138,7 +129,7 @@ def __init__(
super().__init__(root, transform, pre_transform, pre_filter,
force_reload=force_reload)

out = torch.load(self.processed_paths[0])
out = fs.torch_load(self.processed_paths[0])
if not isinstance(out, tuple) or len(out) != 3:
raise RuntimeError(
"The 'data' object was created by an older version of PyG. "
Expand Down Expand Up @@ -192,12 +183,10 @@ def processed_file_names(self) -> str:

def download(self):
url = self.cleaned_url if self.cleaned else self.url
folder = osp.join(self.root, self.name)
path = download_url(f'{url}/{self.name}.zip', folder)
extract_zip(path, folder)
os.unlink(path)
shutil.rmtree(self.raw_dir)
os.rename(osp.join(folder, self.name), self.raw_dir)
fs.cp(f'{url}/{self.name}.zip', self.raw_dir, extract=True)
for filename in fs.ls(osp.join(self.raw_dir, self.name)):
fs.mv(filename, osp.join(self.raw_dir, osp.basename(filename)))
fs.rm(osp.join(self.raw_dir, self.name))

def process(self):
self.data, self.slices, sizes = read_tu_data(self.raw_dir, self.name)
Expand All @@ -214,8 +203,8 @@ def process(self):
self.data, self.slices = self.collate(data_list)
self._data_list = None # Reset cache.

torch.save((self._data.to_dict(), self.slices, sizes),
self.processed_paths[0])
fs.torch_save((self._data.to_dict(), self.slices, sizes),
self.processed_paths[0])

def __repr__(self) -> str:
return f'{self.name}({len(self)})'
3 changes: 1 addition & 2 deletions torch_geometric/graphgym/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
from typing import Any

import torch_geometric.graphgym.register as register
from torch_geometric.data.makedirs import makedirs

try: # Define global config object
from yacs.config import CfgNode as CN
Expand Down Expand Up @@ -483,7 +482,7 @@ def dump_cfg(cfg):
Args:
cfg (CfgNode): Configuration node
"""
makedirs(cfg.out_dir)
os.makedirs(cfg.out_dir, exist_ok=True)
cfg_file = os.path.join(cfg.out_dir, cfg.cfg_dest)
with open(cfg_file, 'w') as f:
cfg.dump(stream=f)
Expand Down
6 changes: 3 additions & 3 deletions torch_geometric/graphgym/logger.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
import logging
import math
import os
import sys
import time
from typing import Any, Dict, Optional

import torch

from torch_geometric.data.makedirs import makedirs
from torch_geometric.graphgym import register
from torch_geometric.graphgym.config import cfg
from torch_geometric.graphgym.imports import Callback, pl
Expand All @@ -18,7 +18,7 @@ def set_printing():
"""Set up printing options."""
logging.root.handlers = []
logging_cfg = {'level': logging.INFO, 'format': '%(message)s'}
makedirs(cfg.run_dir)
os.makedirs(cfg.run_dir, exist_ok=True)
h_file = logging.FileHandler('{}/logging.log'.format(cfg.run_dir))
h_stdout = logging.StreamHandler(sys.stdout)
if cfg.print == 'file':
Expand All @@ -41,7 +41,7 @@ def __init__(self, name='train', task_type=None):
self._time_total = 0 # won't be reset

self.out_dir = '{}/{}'.format(cfg.run_dir, name)
makedirs(self.out_dir)
os.makedirs(self.out_dir, exist_ok=True)
if cfg.tensorboard_each_run:
from tensorboardX import SummaryWriter
self.tb_writer = SummaryWriter(self.out_dir)
Expand Down
4 changes: 0 additions & 4 deletions torch_geometric/graphgym/utils/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,10 +69,6 @@ def dict_list_to_tb(dict_list, writer):
dict_to_tb(dict, writer, dict['epoch'])


def makedirs(dir):
os.makedirs(dir, exist_ok=True)


def makedirs_rm_exist(dir):
"""Make a directory, remove any existing data.
Expand Down
Loading

0 comments on commit 37901d5

Please sign in to comment.