Skip to content

Commit

Permalink
Refactored evaluate. Made evaluate available for pipelines. Simplifie…
Browse files Browse the repository at this point in the history
…d evaluate logic.
  • Loading branch information
Pringled committed Feb 17, 2025
1 parent f939695 commit aa07183
Show file tree
Hide file tree
Showing 4 changed files with 87 additions and 33 deletions.
4 changes: 2 additions & 2 deletions model2vec/inference/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,6 @@
for extra_dependency in get_package_extras("model2vec", _REQUIRED_EXTRA):
importable(extra_dependency, _REQUIRED_EXTRA)

from model2vec.inference.model import StaticModelPipeline
from model2vec.inference.model import StaticModelPipeline, evaluate_single_or_multi_label

__all__ = ["StaticModelPipeline"]
__all__ = ["StaticModelPipeline", "evaluate_single_or_multi_label"]
62 changes: 62 additions & 0 deletions model2vec/inference/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,19 +3,24 @@
import re
from pathlib import Path
from tempfile import TemporaryDirectory
from typing import TypeVar

import huggingface_hub
import numpy as np
import skops.io
from sklearn.metrics import classification_report
from sklearn.neural_network import MLPClassifier
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MultiLabelBinarizer

from model2vec.hf_utils import _create_model_card
from model2vec.model import PathLike, StaticModel

_DEFAULT_TRUST_PATTERN = re.compile(r"sklearn\..+")
_DEFAULT_MODEL_FILENAME = "pipeline.skops"

LabelType = TypeVar("LabelType", list[str], list[list[str]])


class StaticModelPipeline:
def __init__(self, model: StaticModel, head: Pipeline) -> None:
Expand Down Expand Up @@ -169,6 +174,26 @@ def predict_proba(

return self.head.predict_proba(encoded)

def evaluate(
self, X: list[str], y: LabelType, batch_size: int = 1024, threshold: float = 0.5, output_dict: bool = False
) -> str | dict[str, dict[str, float]]:
"""
Evaluate the classifier on a given dataset using scikit-learn's classification report.
:param X: The texts to predict on.
:param y: The ground truth labels.
:param batch_size: The batch size.
:param threshold: The threshold for multilabel classification.
:param output_dict: Whether to output the classification report as a dictionary.
:return: A classification report.
"""
predictions = self.predict(X, show_progress_bar=True, batch_size=batch_size, threshold=threshold)
report = evaluate_single_or_multi_label(
predictions=predictions, y=y, classes=self.classes_, output_dict=output_dict
)

return report


def _load_pipeline(
folder_or_repo_path: PathLike, token: str | None = None, trust_remote_code: bool = False
Expand Down Expand Up @@ -244,3 +269,40 @@ def save_pipeline(pipeline: StaticModelPipeline, folder_path: str | Path) -> Non
language=pipeline.model.language,
template_path="modelcards/classifier_template.md",
)


def _is_multi_label_shaped(y: LabelType) -> bool:
"""Check if the labels are in a multi-label shape."""
return isinstance(y, (list, tuple)) and len(y) > 0 and isinstance(y[0], (list, tuple, set))


def evaluate_single_or_multi_label(
predictions: np.ndarray,
y: LabelType,
classes: np.ndarray,
output_dict: bool = False,
) -> str | dict[str, dict[str, float]]:
"""
Evaluate the classifier on a given dataset using scikit-learn's classification report.
:param predictions: The predictions.
:param y: The ground truth labels.
:param classes: The classes of the classifier.
:param output_dict: Whether to output the classification report as a dictionary.
:return: A classification report.
"""
if _is_multi_label_shaped(y):
mlb = MultiLabelBinarizer(classes=classes)
y = mlb.fit_transform(y)
predictions = mlb.transform(predictions)

report = classification_report(
y,
predictions,
labels=np.arange(len(classes)),
target_names=[str(c) for c in classes],
output_dict=output_dict,
zero_division=0,
)

return report
34 changes: 5 additions & 29 deletions model2vec/train/classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,12 @@
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import LabelEncoder, MultiLabelBinarizer
from sklearn.preprocessing import MultiLabelBinarizer
from tokenizers import Tokenizer
from torch import nn
from tqdm import trange

from model2vec.inference import StaticModelPipeline
from model2vec.inference import StaticModelPipeline, evaluate_single_or_multi_label
from model2vec.train.base import FinetunableStaticModel, TextDataset

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -243,33 +243,9 @@ def evaluate(
"""
self.eval()
predictions = self.predict(X, show_progress_bar=True, batch_size=batch_size, threshold=threshold)

if not self.multilabel:
# Encode the labels using a LabelEncoder
label_encoder = LabelEncoder()
label_idx = label_encoder.fit_transform(self.classes_)
y = label_encoder.transform(y)
predictions = label_encoder.transform(predictions)
report = classification_report(
y,
predictions,
labels=label_idx,
target_names=[str(c) for c in self.classes_],
output_dict=output_dict,
zero_division=0,
)
else:
# Encode the labels using a MultiLabelBinarizer
mlb = MultiLabelBinarizer(classes=self.classes)
y = mlb.fit_transform(y)
predictions = mlb.transform(predictions)
report = classification_report(
y,
predictions,
target_names=[str(c) for c in mlb.classes_],
output_dict=output_dict,
zero_division=0,
)
report = evaluate_single_or_multi_label(
predictions=predictions, y=y, classes=self.classes, output_dict=output_dict
)

return report

Expand Down
20 changes: 18 additions & 2 deletions tests/test_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@


def test_init_predict(mock_inference_pipeline: StaticModelPipeline) -> None:
"""Test successful initialization of StaticModelPipeline."""
"""Test successful init and predict with StaticModelPipeline."""
target: list[str] | list[list[str]]
if mock_inference_pipeline.multilabel:
if isinstance(mock_inference_pipeline.classes_[0], str):
Expand All @@ -26,11 +26,27 @@ def test_init_predict(mock_inference_pipeline: StaticModelPipeline) -> None:


def test_init_predict_proba(mock_inference_pipeline: StaticModelPipeline) -> None:
"""Test successful initialization of StaticModelPipeline."""
"""Test successful init and predict_proba with StaticModelPipeline."""
assert mock_inference_pipeline.predict_proba("dog").argmax() == 1
assert mock_inference_pipeline.predict_proba(["dog"]).argmax(1).tolist() == [1]


def test_init_evaluate(mock_inference_pipeline: StaticModelPipeline) -> None:
"""Test successful init and evaluate with StaticModelPipeline."""
target: list[str] | list[list[str]]
if mock_inference_pipeline.multilabel:
if isinstance(mock_inference_pipeline.classes_[0], str):
target = [["a", "b"]]
else:
target = [[0, 1]] # type: ignore
else:
if isinstance(mock_inference_pipeline.classes_[0], str):
target = ["b"]
else:
target = [1] # type: ignore
mock_inference_pipeline.evaluate("dog", target) # type: ignore


def test_roundtrip_save(mock_inference_pipeline: StaticModelPipeline) -> None:
"""Test saving and loading the pipeline."""
with TemporaryDirectory() as temp_dir:
Expand Down

0 comments on commit aa07183

Please sign in to comment.