Skip to content

Commit

Permalink
Rename Bunch as Dataset
Browse files Browse the repository at this point in the history
  • Loading branch information
tbonald committed Jul 16, 2024
1 parent 57f0b03 commit adaaf59
Show file tree
Hide file tree
Showing 12 changed files with 139 additions and 176 deletions.
3 changes: 2 additions & 1 deletion sknetwork/clustering/leiden.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,8 @@ class Leiden(Louvain):
References
----------
* Traag, V. A., Waltman, L., & Van Eck, N. J. (2019).
`From Louvain to Leiden: guaranteeing well-connected communities`, Scientific reports.
`From Louvain to Leiden: guaranteeing well-connected communities`, Scientific reports.
"""

def __init__(self, resolution: float = 1, modularity: str = 'dugue', tol_optimization: float = 1e-3,
Expand Down
42 changes: 5 additions & 37 deletions sknetwork/clustering/tests/test_kcenters.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import unittest

from sknetwork.clustering import KCenters
from sknetwork.data import karate_club, painters, star_wars
from sknetwork.data.test_graphs import *


Expand All @@ -13,7 +12,7 @@ class TestKCentersClustering(unittest.TestCase):
def test_kcenters(self):
# Test undirected graph
n_clusters = 2
adjacency = karate_club()
adjacency = test_graph()
n_row = adjacency.shape[0]
kcenters = KCenters(n_clusters=n_clusters)
labels = kcenters.fit_predict(adjacency)
Expand All @@ -22,7 +21,7 @@ def test_kcenters(self):

# Test directed graph
n_clusters = 3
adjacency = painters()
adjacency = test_digraph()
n_row = adjacency.shape[0]
kcenters = KCenters(n_clusters=n_clusters, directed=True)
labels = kcenters.fit_predict(adjacency)
Expand All @@ -31,7 +30,7 @@ def test_kcenters(self):

# Test bipartite graph
n_clusters = 2
biadjacency = star_wars()
biadjacency = test_bigraph()
n_row, n_col = biadjacency.shape
kcenters = KCenters(n_clusters=n_clusters)
kcenters.fit(biadjacency)
Expand All @@ -40,41 +39,10 @@ def test_kcenters(self):
self.assertEqual(len(kcenters.labels_col_), n_col)
self.assertEqual(len(set(labels)), n_clusters)

def test_kcenters_centers(self):
# Test centers for undirected graphs
n_clusters = 2
adjacency = karate_club()
kcenters = KCenters(n_clusters=n_clusters)
kcenters.fit(adjacency)
centers = kcenters.centers_
self.assertEqual(n_clusters, len(set(centers)))

# Test centers for bipartite graphs
n_clusters = 2
biadjacency = star_wars()
n_row, n_col = biadjacency.shape
for position in ["row", "col", "both"]:
kcenters = KCenters(n_clusters=n_clusters, center_position=position)
kcenters.fit(biadjacency)
centers_row = kcenters.centers_row_
centers_col = kcenters.centers_col_
if position == "row":
self.assertEqual(n_clusters, len(set(centers_row)))
self.assertTrue(np.all(centers_row < n_row))
self.assertTrue(centers_col is None)
if position == "col":
self.assertEqual(n_clusters, len(set(centers_col)))
self.assertTrue(np.all((centers_col < n_col) & (0 <= centers_col)))
self.assertTrue(centers_row is None)
if position == "both":
self.assertEqual(n_clusters, len(set(centers_row)) + len(set(centers_col)))
self.assertTrue(np.all(centers_row < n_row))
self.assertTrue(np.all((centers_col < n_col) & (0 <= centers_col)))

def test_kcenters_error(self):
# Test value errors
adjacency = karate_club()
biadjacency = star_wars()
adjacency = test_graph()
biadjacency = test_bigraph()

# test n_clusters error
kcenters = KCenters(n_clusters=1)
Expand Down
2 changes: 1 addition & 1 deletion sknetwork/data/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""data module"""
from sknetwork.data.base import Bunch
from sknetwork.data.base import *
from sknetwork.data.load import *
from sknetwork.data.models import *
from sknetwork.data.parse import from_edge_list, from_adjacency_list, from_csv, from_graphml
Expand Down
9 changes: 7 additions & 2 deletions sknetwork/data/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,10 @@
"""


class Bunch(dict):
class Dataset(dict):
"""Container object for datasets.
Dictionary-like object that exposes its keys as attributes.
>>> dataset = Bunch(name='dataset')
>>> dataset = Dataset(name='dataset')
>>> dataset['name']
'dataset'
>>> dataset.name
Expand All @@ -26,3 +26,8 @@ def __getattr__(self, key):
return self[key]
except KeyError:
raise AttributeError(key)


# alias for Dataset
Bunch = Dataset

39 changes: 18 additions & 21 deletions sknetwork/data/load.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,12 @@
from scipy import sparse

from sknetwork.data.parse import from_csv, load_labels, load_header, load_metadata
from sknetwork.data.base import Bunch
from sknetwork.data.base import Dataset
from sknetwork.utils.check import is_square
from sknetwork.log import Log

NETSET_URL = 'https://netset.telecom-paris.fr'

# former name of Dataset
Bunch = Bunch


def is_within_directory(directory, target):
"""Utility function."""
Expand Down Expand Up @@ -89,7 +86,7 @@ def clean_data_home(data_home: Optional[Union[str, Path]] = None):


def load_netset(name: Optional[str] = None, data_home: Optional[Union[str, Path]] = None,
verbose: bool = True) -> Optional[Bunch]:
verbose: bool = True) -> Optional[Dataset]:
"""Load a dataset from the `NetSet collection
<https://netset.telecom-paris.fr/>`_.
Expand All @@ -105,10 +102,10 @@ def load_netset(name: Optional[str] = None, data_home: Optional[Union[str, Path]
Returns
-------
dataset : :class:`Bunch`
dataset : :class:`Dataset`
Returned dataset.
"""
dataset = Bunch()
dataset = Dataset()
dataset_folder = NETSET_URL + '/datasets/'
folder_npz = NETSET_URL + '/datasets_npz/'

Expand Down Expand Up @@ -167,7 +164,7 @@ def load_netset(name: Optional[str] = None, data_home: Optional[Union[str, Path]


def load_konect(name: str, data_home: Optional[Union[str, Path]] = None, auto_numpy_bundle: bool = True,
verbose: bool = True) -> Bunch:
verbose: bool = True) -> Dataset:
"""Load a dataset from the `Konect database
<http://konect.cc/networks/>`_.
Expand All @@ -186,7 +183,7 @@ def load_konect(name: str, data_home: Optional[Union[str, Path]] = None, auto_nu
Returns
-------
dataset : :class:`Bunch`
dataset : :class:`Dataset`
Object with the following attributes:
* `adjacency` or `biadjacency`: the adjacency/biadjacency matrix for the dataset
Expand Down Expand Up @@ -240,7 +237,7 @@ def load_konect(name: str, data_home: Optional[Union[str, Path]] = None, auto_nu
logger.print_log('Loading from local bundle...')
return load_from_numpy_bundle(name + '_bundle', data_path)

dataset = Bunch()
dataset = Dataset()
path = data_konect / name / name
if not path.exists() or len(listdir(path)) == 0:
raise Exception("No data downloaded.")
Expand Down Expand Up @@ -269,7 +266,7 @@ def load_konect(name: str, data_home: Optional[Union[str, Path]] = None, auto_nu
else:
dataset.meta.name = name
else:
dataset.meta = Bunch()
dataset.meta = Dataset()
dataset.meta.name = name

if auto_numpy_bundle:
Expand All @@ -280,12 +277,12 @@ def load_konect(name: str, data_home: Optional[Union[str, Path]] = None, auto_nu
return dataset


def save_to_numpy_bundle(data: Bunch, bundle_name: str, data_home: Optional[Union[str, Path]] = None):
def save_to_numpy_bundle(data: Dataset, bundle_name: str, data_home: Optional[Union[str, Path]] = None):
"""Save a dataset in the specified data home to a collection of Numpy and Pickle files for faster subsequent loads.
Parameters
----------
data: Bunch
data: Dataset
Data to save.
bundle_name: str
Name to be used for the bundle folder.
Expand Down Expand Up @@ -317,7 +314,7 @@ def load_from_numpy_bundle(bundle_name: str, data_home: Optional[Union[str, Path
Returns
-------
data: Bunch
data: Dataset
Data.
"""
data_home = get_data_home(data_home)
Expand All @@ -326,7 +323,7 @@ def load_from_numpy_bundle(bundle_name: str, data_home: Optional[Union[str, Path
raise FileNotFoundError('No bundle at ' + str(data_path))
else:
files = listdir(data_path)
data = Bunch()
data = Dataset()
for file in files:
if len(file.split('.')) == 2:
file_name, file_extension = file.split('.')
Expand All @@ -340,21 +337,21 @@ def load_from_numpy_bundle(bundle_name: str, data_home: Optional[Union[str, Path
return data


def save(folder: Union[str, Path], data: Union[sparse.csr_matrix, Bunch]):
def save(folder: Union[str, Path], data: Union[sparse.csr_matrix, Dataset]):
"""Save a dataset or a CSR matrix in the current directory to a collection of Numpy and Pickle files for faster
subsequent loads. Supported attribute types include sparse matrices, NumPy arrays, strings and objects Dataset.
Parameters
----------
folder : str or :class:`pathlib.Path`
Name of the bundle folder.
data : Union[sparse.csr_matrix, Bunch]
data : Union[sparse.csr_matrix, Dataset]
Data to save.
Example
-------
>>> from sknetwork.data import save
>>> dataset = Bunch()
>>> dataset = Dataset()
>>> dataset.adjacency = sparse.csr_matrix(np.random.random((3, 3)) < 0.5)
>>> dataset.names = np.array(['a', 'b', 'c'])
>>> save('dataset', dataset)
Expand All @@ -366,7 +363,7 @@ def save(folder: Union[str, Path], data: Union[sparse.csr_matrix, Bunch]):
if folder.exists():
shutil.rmtree(folder)
if isinstance(data, sparse.csr_matrix):
dataset = Bunch()
dataset = Dataset()
if is_square(data):
dataset.adjacency = data
else:
Expand All @@ -388,13 +385,13 @@ def load(folder: Union[str, Path]):
Returns
-------
data: Bunch
data: Dataset
Data.
Example
-------
>>> from sknetwork.data import save
>>> dataset = Bunch()
>>> dataset = Dataset()
>>> dataset.adjacency = sparse.csr_matrix(np.random.random((3, 3)) < 0.5)
>>> dataset.names = np.array(['a', 'b', 'c'])
>>> save('dataset', dataset)
Expand Down
Loading

0 comments on commit adaaf59

Please sign in to comment.