add registry

Parry-Parry · Oct 31, 2024 · b4792b5 · b4792b5
1 parent faac2a5
commit b4792b5
Show file tree

Hide file tree

Showing 5 changed files with 157 additions and 56 deletions.
diff --git a/rankers/train/loss/__init__.py b/rankers/train/loss/__init__.py
@@ -1,8 +1,142 @@
-from collections import defaultdict
 import torch.nn as nn
-import torch.nn.functional as F
 import torch
 from torch import Tensor
+import functools
+
+class SingletonMeta(type):
+    """
+    Metaclass to implement the Singleton design pattern.
+    Ensures only one instance of a class can be created.
+    """
+    _instances = {}
+
+    def __call__(cls, *args, **kwargs):
+        """
+        Controlled object creation to ensure only one instance exists.
+        """
+        if cls not in cls._instances:
+            # If no instance exists, create one
+            cls._instances[cls] = super().__call__(*args, **kwargs)
+        return cls._instances[cls]
+
+class LossFunctionRegistry(metaclass=SingletonMeta):
+    """
+    A singleton registry for managing and retrieving loss functions by name.
+    Supports both built-in PyTorch losses and custom loss functions.
+    """
+
+    def __init__(self):
+        # Check if the registry has already been initialized
+        if not hasattr(self, '_registry'):
+            # Dictionary to store registered loss functions
+            self._registry = {}
+
+            # Automatically register built-in PyTorch loss functions
+            self._register_builtin_losses()
+
+    def _register_builtin_losses(self):
+        """
+        Automatically register common PyTorch loss functions.
+        """
+        builtin_losses = {
+            # Basic losses
+            'mse': nn.MSELoss,
+            'l1': nn.L1Loss,
+            'cross_entropy': nn.CrossEntropyLoss,
+            'nll': nn.NLLLoss,
+            'binary_cross_entropy': nn.BCELoss,
+            'binary_cross_entropy_with_logits': nn.BCEWithLogitsLoss,
+
+            # Reduction variant losses
+            'mse_sum': functools.partial(nn.MSELoss, reduction='sum'),
+            'mse_none': functools.partial(nn.MSELoss, reduction='none'),
+        }
+
+        for name, loss_fn in builtin_losses.items():
+            self.register(name, loss_fn)
+
+    def register(self, name, loss_fn):
+        """
+        Register a loss function with a given name.
+        
+        Args:
+            name (str): Name to register the loss function under
+            loss_fn (callable): Loss function to register
+        """
+        if not callable(loss_fn):
+            raise TypeError(f"Loss function {name} must be callable")
+
+        if name in self._registry:
+            print(f"Warning: Overwriting existing loss function '{name}'")
+
+        self._registry[name] = loss_fn
+
+    def get(self, name, **kwargs):
+        """
+        Retrieve a loss function by name.
+        
+        Args:
+            name (str): Name of the loss function
+            **kwargs: Additional arguments to pass to the loss function constructor
+        
+        Returns:
+            callable: Instantiated loss function
+        
+        Raises:
+            KeyError: If the loss function is not found in the registry
+        """
+        if name not in self._registry:
+            available_losses = ", ".join(sorted(self._registry.keys()))
+            raise KeyError(f"Loss function '{name}' not found. Available losses: {available_losses}")
+
+        return self._registry[name](**kwargs)
+
+    @property
+    def available(self):
+        """
+        List all available loss functions.
+        
+        Returns:
+            list: Names of registered loss functions
+        """
+        return list(self._registry.keys())
+
+# Global singleton instance
+LOSS_REGISTRY = LossFunctionRegistry()
+
+def register_loss(name):
+    """
+    Decorator to register a custom loss function.
+    
+    Args:
+        name (str): Name to register the loss function under
+    
+    Returns:
+        decorator function
+    """
+    def decorator(loss_fn):
+        LOSS_REGISTRY.register(name, loss_fn)
+        return loss_fn
+    return decorator
+
+class BaseLoss(nn.Module):
+    """
+    Base class for Losses
+
+    Parameters
+    ----------
+    reduction: str
+        the reduction type
+    """
+    def __init__(self, reduction : str = 'mean') -> None:
+        super(BaseLoss, self).__init__()
+        self.reduction = reduction
+
+    def _reduce(self, a : torch.Tensor):
+        return reduce(a, self.reduction)
+
+    def forward(self, *args, **kwargs):
+        raise NotImplementedError
 
 def reduce(a : torch.Tensor, reduction : str):
     """
@@ -28,25 +162,6 @@ def reduce(a : torch.Tensor, reduction : str):
         return a.mean(dim=0).sum()
     raise ValueError(f"Unknown reduction type: {reduction}")
 
-class BaseLoss(nn.Module):
-    """
-    Base class for Losses
-
-    Parameters
-    ----------
-    reduction: str
-        the reduction type
-    """
-    def __init__(self, reduction : str = 'mean') -> None:
-        super(BaseLoss, self).__init__()
-        self.reduction = reduction
-
-    def _reduce(self, a : torch.Tensor):
-        return reduce(a, self.reduction)
-
-    def forward(self, *args, **kwargs):
-        raise NotImplementedError
-
 def normalize(a: Tensor, dim: int = -1):
     """
     Normalizing a tensor along a given dimension.

diff --git a/rankers/train/loss/listwise.py b/rankers/train/loss/listwise.py
@@ -1,8 +1,9 @@
 import torch
 from torch import Tensor
 from torch.nn import functional as F
-from . import BaseLoss
+from . import BaseLoss, register_loss
 
+@register_loss('kl_div')
 class KL_DivergenceLoss(BaseLoss):
     """KL Divergence loss"""
 
@@ -14,7 +15,7 @@ def __init__(self, reduction='batchmean', temperature=1.):
     def forward(self, pred: Tensor, labels: Tensor) -> Tensor:
         return self.kl_div(F.log_softmax(pred / self.temperature, dim=1), F.softmax(labels / self.temperature, dim=1))
 
-
+@register_loss('ranknet')
 class RankNetLoss(BaseLoss):
     """RankNet loss
     https://www.microsoft.com/en-us/research/wp-content/uploads/2016/02/MSR-TR-2010-82.pdf
@@ -38,7 +39,7 @@ def forward(self, pred: Tensor, labels: Tensor=None) -> Tensor:
 
         return self.bce(pred_diff, labels)
 
-
+@register_loss('distill_ranknet')
 class DistillRankNetLoss(BaseLoss):
     """DistillRankNet loss
     Very much a WIP from https://arxiv.org/pdf/2402.10769
@@ -64,6 +65,7 @@ def forward(self, pred: Tensor, labels: Tensor) -> Tensor:
 
         return self._reduce(final_margin[labels])
 
+@register_loss('listnet')
 class ListNetLoss(BaseLoss):
     """ListNet loss
     """
@@ -78,6 +80,7 @@ def forward(self, pred: Tensor, labels: Tensor) -> Tensor:
             labels = F.softmax(labels / self.temperature, dim=1)
         return self._reduce(-torch.sum(labels * F.log_softmax(pred + self.epsilon  / self.temperature, dim=1), dim=-1))
 
+@register_loss('poly1')
 class Poly1SoftmaxLoss(BaseLoss):
     """Poly1 softmax loss with automatic softmax handling and reduction."""
 
@@ -137,6 +140,7 @@ def get_ndcg(
         ndcg = dcg / (idcg.clamp(min=1e-12))
         return ndcg
 
+@register_loss('approx_ndcg')
 class ApproxNDCGLoss(BaseLoss):
     def __init__(self, reduction: str = 'mean', temperature=1., scale_gains: bool = True) -> None:
         super().__init__(reduction)
@@ -159,6 +163,7 @@ def get_mrr(ranks: torch.Tensor, labels: torch.Tensor, k: int | None = None) ->
         mrr = mrr.max(dim=-1)[0]
         return mrr
 
+@register_loss('approx_mrr')
 class ApproxMRRLoss(BaseLoss):
     def __init__(self, reduction: str = 'mean', temperature=1.) -> None:
         super().__init__(reduction)
@@ -179,14 +184,4 @@ def forward(self, pred: torch.Tensor, labels: torch.Tensor) -> torch.Tensor:
     'Poly1SoftmaxLoss',
     'ApproxNDCGLoss',
     'ApproxMRRLoss',
-]
-
-LISTWISE_LOSSES = {
-    'kl_div': KL_DivergenceLoss,
-    'ranknet': RankNetLoss,
-    'distill_ranknet': DistillRankNetLoss,
-    'listnet': ListNetLoss,
-    'poly1': Poly1SoftmaxLoss,
-    'approx_ndcg': ApproxNDCGLoss,
-    'approx_mrr': ApproxMRRLoss,
-}
+]
diff --git a/rankers/train/loss/pairwise.py b/rankers/train/loss/pairwise.py
@@ -1,10 +1,11 @@
 import torch
 from torch import Tensor
 import torch.nn.functional as F
-from . import BaseLoss
+from . import BaseLoss, register_loss
 
 residual = lambda x : x[:, 0].unsqueeze(1) - x[:, 1:]
 
+@register_loss('margin_mse')
 class MarginMSELoss(BaseLoss):
     """Margin MSE loss with residual calculation."""
 
@@ -13,7 +14,7 @@ def forward(self, pred: Tensor, labels: Tensor) -> Tensor:
         residual_label = labels[:, 0].unsqueeze(1) - labels[:, 1:]
         return F.mse_loss(residual_pred, residual_label, reduction=self.reduction)
 
-
+@register_loss('hinge')
 class HingeLoss(BaseLoss):
     """Hinge loss with sigmoid activation and residual calculation."""
 
@@ -26,7 +27,7 @@ def forward(self, pred: Tensor, labels: Tensor) -> Tensor:
         label_residuals = torch.sign(residual(F.sigmoid(labels)))
         return self._reduce(F.relu(self.margin - (label_residuals * pred_residuals)))
 
-
+@register_loss('clear')
 class ClearLoss(BaseLoss):
     """Clear loss with margin and residual calculation."""
 
@@ -38,6 +39,7 @@ def forward(self, pred: Tensor, labels: Tensor) -> Tensor:
         margin_b = self.margin - residual(labels)
         return self._reduce(F.relu(margin_b - residual(pred)))
 
+@register_loss('lce')
 class LCELoss(BaseLoss):
     """LCE loss: Cross Entropy for NCE with localised examples."""
     def forward(self, pred: Tensor, labels: Tensor=None) -> Tensor:
@@ -47,7 +49,7 @@ def forward(self, pred: Tensor, labels: Tensor=None) -> Tensor:
             labels = torch.zeros(pred.size(0), dtype=torch.long, device=pred.device)
         return F.cross_entropy(pred, labels, reduction=self.reduction)
 
-
+@register_loss('contrastive')
 class ContrastiveLoss(BaseLoss):
     """Contrastive loss with log_softmax and negative log likelihood."""
 
@@ -66,12 +68,4 @@ def forward(self, pred: Tensor, labels : Tensor = None) -> Tensor:
     'ClearLoss',
     'LCELoss',
     'ContrastiveLoss',
-]
-
-PAIRWISE_LOSSES = {
-    'margin_mse': MarginMSELoss,
-    'hinge': HingeLoss,
-    'clear': ClearLoss,
-    'lce': LCELoss,
-    'contrastive': ContrastiveLoss,
-}
+]
diff --git a/rankers/train/loss/pointwise.py b/rankers/train/loss/pointwise.py
@@ -1,8 +1,9 @@
 import torch
 from torch import Tensor
 from torch.nn import functional as F
-from . import BaseLoss
+from . import BaseLoss, register_loss
 
+@register_loss('pointwise_mse')
 class PointwiseMSELoss(BaseLoss):
     """Pointwise MSE loss"""
 
@@ -11,10 +12,6 @@ def forward(self, pred: Tensor, labels: Tensor) -> Tensor:
         flattened_labels = labels.view(-1)
         return F.mse_loss(flattened_pred, flattened_labels, reduction=self.reduction)
 
-POINTWISE_LOSSES = {
-    'mse': PointwiseMSELoss,
-}
-
 __all__ = [
     'PointwiseMSELoss',
 ]   
diff --git a/rankers/train/trainer.py b/rankers/train/trainer.py
@@ -9,7 +9,7 @@
 from datasets import Dataset
 from transformers.trainer_utils import EvalLoopOutput, speed_metrics
 from transformers.integrations.deepspeed import deepspeed_init
-from . import loss
+from .loss import LOSS_REGISTRY
 
 logger = logging.getLogger(__name__)
 
@@ -21,8 +21,8 @@ class RankerTrainer(Trainer):
     def __init__(self, *args, loss_fn=None, **kwargs) -> None:
         super(RankerTrainer, self).__init__(*args, **kwargs)
         if isinstance(loss_fn, str): 
-            if loss_fn not in loss.__all__: raise ValueError(f"Unknown loss: {loss_fn}")
-            self.loss = getattr(loss, loss_fn)()
+            if loss_fn not in LOSS_REGISTRY.availible: raise ValueError(f"Unknown loss: {loss_fn}, choices are {LOSS_REGISTRY.availible}")
+            self.loss = LOSS_REGISTRY.get(loss_fn)
         else: 
             self.loss = loss_fn
         self.tokenizer = self.data_collator.tokenizer