Skip to content

Commit

Permalink
Merge branch 'issues/CSD-301-custom-types' into develop
Browse files Browse the repository at this point in the history
  • Loading branch information
diegoabt committed Dec 4, 2024
2 parents 574522d + 509eecf commit 045a76c
Show file tree
Hide file tree
Showing 28 changed files with 374 additions and 299 deletions.
88 changes: 48 additions & 40 deletions probinet/evaluation/community_detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,41 @@
"""

from contextlib import suppress
from typing import Set

import numpy as np


def calculate_metric(ground_truth: Set[int], detected: Set[int], metric: str) -> float:
"""
Calculate a metric for evaluating community detection.
Parameters
----------
ground_truth : Set[int]
The set of ground truth nodes.
detected : Set[int]
The set of detected nodes.
metric : str
The metric to use for evaluation ('f1' or 'jaccard').
Returns
-------
float
The calculated metric value.
"""
if not len(ground_truth.intersection(detected)):
return 0.0
precision = len(ground_truth.intersection(detected)) / len(detected)
recall = len(ground_truth.intersection(detected)) / len(ground_truth)
if metric == "f1":
return 2 * (precision * recall) / (precision + recall)
elif metric == "jaccard":
return len(ground_truth.intersection(detected)) / len(
ground_truth.union(detected)
)


def compute_community_detection_metric(
U_infer: np.ndarray, U0: np.ndarray, metric: str = "f1", com: bool = False
) -> float:
Expand All @@ -15,52 +46,29 @@ def compute_community_detection_metric(
"""
if metric not in {"f1", "jaccard"}:
raise ValueError('The similarity measure can be either "f1" or "jaccard"!')

K = U0.shape[1]
gt = {}
threshold = 1 / K
# Create the ground truth dictionary for each community in the original partition. The key is
# the community index and the value is the set of nodes in that community, i.e., the nodes
# with a value greater than the threshold in the corresponding column of the original partition.
gt = {i: set(np.argwhere(U0[:, i] > threshold).flatten()) for i in range(K)}
d = {}
threshold = 1 / U0.shape[1]
for i in range(K):
gt[i] = list(np.argwhere(U0[:, i] > threshold).flatten())
if com:
with suppress(IndexError):
d[i] = U_infer[i]
d[i] = set(U_infer[i])
else:
d[i] = list(np.argwhere(U_infer[:, i] > threshold).flatten())
R = 0
for i in np.arange(K):
ground_truth = set(gt[i])
_max = -1
M = 0
for j in d.keys():
detected = set(d[j])
if len(ground_truth & detected) != 0:
precision = len(ground_truth & detected) / len(detected)
recall = len(ground_truth & detected) / len(ground_truth)
if metric == "f1":
M = 2 * (precision * recall) / (precision + recall)
elif metric == "jaccard":
M = len(ground_truth & detected) / len(ground_truth.union(detected))
if M > _max:
_max = M
R += _max
S = 0
for j in d.keys():
detected = set(d[j])
_max = -1
M = 0
for i in np.arange(K):
ground_truth = set(gt[i])
if len(ground_truth & detected) != 0:
precision = len(ground_truth & detected) / len(detected)
recall = len(ground_truth & detected) / len(ground_truth)
if metric == "f1":
M = 2 * (precision * recall) / (precision + recall)
elif metric == "jaccard":
M = len(ground_truth & detected) / len(ground_truth.union(detected))
if M > _max:
_max = M
S += _max
return np.round(R / (2 * len(gt)) + S / (2 * len(d)), 4)
d[i] = set(np.argwhere(U_infer[:, i] > threshold).flatten())

R = sum(
max(calculate_metric(gt[i], d[j], metric) for j in d.keys()) for i in range(K)
)
S = sum(
max(calculate_metric(gt[i], d[j], metric) for i in range(K)) for j in d.keys()
)
# Return the average of the two measures
return np.round(R / (2 * K) + S / (2 * len(d)), 4)


def compute_permutation_matrix(U_infer: np.ndarray, U0: np.ndarray) -> np.ndarray:
Expand Down
6 changes: 3 additions & 3 deletions probinet/evaluation/expectation_computation.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,15 @@
Functions for computing expectations and related metrics.
"""

from typing import Optional, Tuple, Union
from typing import Optional, Tuple

import numpy as np
import pandas as pd
from scipy.stats import poisson
from sklearn import metrics
from sparse import COO

from ..model_selection.labeling import extract_true_label, predict_label
from ..types import GraphDataType
from ..utils.matrix_operations import transpose_matrix, transpose_tensor
from ..utils.tools import check_symmetric

Expand All @@ -32,7 +32,7 @@ def calculate_conditional_expectation(


def calculate_conditional_expectation_dyncrep(
B_to_T: Union[COO, np.ndarray],
B_to_T: GraphDataType,
u: np.ndarray,
v: np.ndarray,
w: np.ndarray,
Expand Down
3 changes: 1 addition & 2 deletions probinet/evaluation/likelihood.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,8 +198,7 @@ def likelihood_conditional(
# Check for NaN values in the log-likelihood
if np.isnan(l):
log_and_raise_error(ValueError, "Likelihood is NaN!")
else:
return l
return l


def PSloglikelihood(
Expand Down
20 changes: 8 additions & 12 deletions probinet/evaluation/link_prediction.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,23 +12,18 @@
compute_expected_adjacency_tensor_multilayer,
)


@singledispatch
def mask_or_flatten_array(mask: Union[np.ndarray, None], expected_adjacency: np.ndarray) -> np.ndarray:
def mask_or_flatten_array(
mask: Union[np.ndarray, None], expected_adjacency: np.ndarray
) -> np.ndarray:
raise NotImplementedError(f"Unsupported type {type(mask)} for mask.")


@mask_or_flatten_array.register(type(None))
def _(mask: None, expected_adjacency: np.ndarray) -> np.ndarray:
return expected_adjacency.flatten()

@mask_or_flatten_array.register(np.ndarray)
def _(mask: np.ndarray, expected_adjacency: np.ndarray) -> np.ndarray:
return expected_adjacency[mask > 0]


@singledispatch
def mask_or_flatten_array(mask: None, expected_adjacency: np.ndarray) -> np.ndarray:
return expected_adjacency.flatten()


@mask_or_flatten_array.register(np.ndarray)
def _(mask: np.ndarray, expected_adjacency: np.ndarray) -> np.ndarray:
Expand Down Expand Up @@ -179,5 +174,6 @@ def calculate_f1_score(
# Binarize the data
data = (data0 > 0).astype("int")

return metrics.f1_score(mask_or_flatten_array(mask, data), mask_or_flatten_array(mask, Z_pred))

return metrics.f1_score(
mask_or_flatten_array(mask, data), mask_or_flatten_array(mask, Z_pred)
)
64 changes: 27 additions & 37 deletions probinet/input/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
import networkx as nx
import numpy as np
import pandas as pd
from numpy import ndarray

from .preprocessing import (
create_adjacency_tensor_from_graph_list,
Expand All @@ -34,44 +33,35 @@ def build_adjacency_from_file(
**_kwargs: Any,
) -> GraphData:
"""
Import data, i.e. the adjacency matrix, from a given folder.
Import data, i.e. the adjacency matrix, from a given folder.
Return the NetworkX graph and its numpy adjacency matrix.
Return the NetworkX graph and its numpy adjacency matrix.
Parameters
----------
path_to_file
Path of the input file.
ego
Name of the column to consider as the source of the edge.
alter
Name of the column to consider as the target of the edge.
force_dense
If set to True, the algorithm is forced to consider a dense adjacency tensor.
undirected
If set to True, the algorithm considers an undirected graph.
noselfloop
If set to True, the algorithm removes the self-loops.
sep
Separator to use when reading the dataset.
binary
If set to True, the algorithm reads the graph with binary edges.
header
Row number to use as the column names, and the start of the data.
Returns
-------
A
List of MultiDiGraph NetworkX objects representing the layers of the network.
B
Graph adjacency tensor. If `force_dense` is True, returns a dense ndarray. Otherwise, returns a sparse COO tensor.
B_T
Transposed graph adjacency tensor. Returns None if `force_dense` is True.
data_T_vals
Array with values of entries A[j, i] if entry A[i, j] is non-zero. Returns None if
`force_dense` is True.
nodes
List of node IDs
Parameters
----------
path_to_file
Path of the input file.
ego
Name of the column to consider as the source of the edge.
alter
Name of the column to consider as the target of the edge.
force_dense
If set to True, the algorithm is forced to consider a dense adjacency tensor.
undirected
If set to True, the algorithm considers an undirected graph.
noselfloop
If set to True, the algorithm removes the self-loops.
sep
Separator to use when reading the dataset.
binary
If set to True, the algorithm reads the graph with binary edges.
header
Row number to use as the column names, and the start of the data.
Returns
-------
GraphData
Named tuple containing the graph list, the adjacency tensor, the transposed tensor, the data values, and the nodes.
"""

# Read adjacency file
Expand Down
3 changes: 2 additions & 1 deletion probinet/input/preprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import scipy
from sparse import COO

from ..types import GraphDataType
from ..utils import tools


Expand Down Expand Up @@ -162,7 +163,7 @@ def create_sparse_adjacency_tensor_from_graph_list(
return data


def preprocess_adjacency_tensor(A: np.ndarray) -> Union[COO, np.ndarray]:
def preprocess_adjacency_tensor(A: np.ndarray) -> GraphDataType:
"""
Pre-process input data tensor.
Expand Down
8 changes: 8 additions & 0 deletions probinet/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,14 @@


def parse_args():
"""
Parse the command-line arguments.
Returns
-------
args : argparse.Namespace
Parsed arguments.
"""
parser = argparse.ArgumentParser(
description="Script to run the CRep, JointCRep, DynCRep, MTCOV, and ACD algorithms.",
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
Expand Down
25 changes: 24 additions & 1 deletion probinet/model_selection/cross_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,10 @@


class CrossValidation(ABC):
"""
Abstract class to implement cross-validation for a given algorithm.
"""

def __init__(
self, algorithm, model_parameters, cv_parameters, numerical_parameters=None
):
Expand All @@ -44,6 +48,9 @@ def __init__(
setattr(self, key, value)

def prepare_output_directory(self):
"""
Prepare the output directory to save the results.
"""
if not os.path.exists(self.out_folder):
os.makedirs(self.out_folder)

Expand Down Expand Up @@ -101,7 +108,23 @@ def load_data(self):
sep=self.sep,
)

def prepare_and_run(self, mask):
def prepare_and_run(self, mask: np.ndarray):
"""
Prepare the data for training and run the algorithm.
Parameters
----------
mask: np.ndarray
The mask to apply on the data.
Returns
-------
tuple
The outputs of the algorithm.
object
The algorithm object.
"""
# Create a copy of the adjacency matrix B to use for training
B_train = self.gdata.adjacency_tensor.copy()

Expand Down
27 changes: 24 additions & 3 deletions probinet/model_selection/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
"""

import logging
from typing import Any, Optional

import pandas as pd

Expand All @@ -15,9 +16,29 @@


def cross_validation(
algorithm, model_parameters, cv_parameters, numerical_parameters=None
):

algorithm: str,
model_parameters: dict[str, Any],
cv_parameters: dict[str, Any],
numerical_parameters: Optional[dict[str, Any]] = None,
) -> pd.DataFrame:
"""
Run cross-validation for a given algorithm.
Parameters
----------
algorithm
String with the name of the algorithm to run.
model_parameters
Dictionary with the parameters for the algorithm.
cv_parameters
Dictionary with the parameters for the cross-validation.
numerical_parameters
Dictionary with the numerical parameters for the algorithm, like the number of iterations, etc.
Returns
-------
results_df
DataFrame with the results of the cross-validation.
"""
if numerical_parameters is None:
numerical_parameters = {}
cv_classes = {
Expand Down
Loading

0 comments on commit 045a76c

Please sign in to comment.