MobileTeleSystems · blondered · Jan 13, 2025 · Dec 11, 2024 · Dec 11, 2024 · Dec 12, 2024
diff --git a/.readthedocs.yml b/.readthedocs.yml
@@ -8,7 +8,7 @@ build:
     pre_build:
       - cp -r examples docs/source/
     post_install:
-      - pip install --no-cache-dir poetry
+      - pip install --no-cache-dir poetry==1.8.5
       - poetry export -f requirements.txt -o requirements.txt -E all --without-hashes
       - pip install --no-cache-dir -r requirements.txt
 

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -5,6 +5,10 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## Unreleased
+
+### Added
+- `ImplicitBPRWrapperModel` model ([#232](https://github.com/MobileTeleSystems/RecTools/pull/232))
 
 ## [0.9.0] - 11.12.2024
 

diff --git a/rectools/metrics/intersection.py b/rectools/metrics/intersection.py
@@ -12,7 +12,8 @@
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 
-from typing import Dict, Hashable, Optional, Union
+from collections.abc import Hashable
+from typing import Dict, Optional, Union
 
 import attr
 import numpy as np

diff --git a/rectools/models/__init__.py b/rectools/models/__init__.py
@@ -38,6 +38,7 @@
 
 from .ease import EASEModel
 from .implicit_als import ImplicitALSWrapperModel
+from .implicit_bpr import ImplicitBPRWrapperModel
 from .implicit_knn import ImplicitItemKNNWrapperModel
 from .popular import PopularModel
 from .popular_in_category import PopularInCategoryModel
@@ -59,6 +60,7 @@
 __all__ = (
     "EASEModel",
     "ImplicitALSWrapperModel",
+    "ImplicitBPRWrapperModel",
     "ImplicitItemKNNWrapperModel",
     "LightFMWrapperModel",
     "PopularModel",

diff --git a/rectools/models/implicit_als.py b/rectools/models/implicit_als.py
@@ -22,15 +22,15 @@
 from implicit.cpu.als import AlternatingLeastSquares as CPUAlternatingLeastSquares
 from implicit.gpu.als import AlternatingLeastSquares as GPUAlternatingLeastSquares
 from implicit.utils import check_random_state
-from pydantic import BeforeValidator, ConfigDict, PlainSerializer, SerializationInfo, WrapSerializer
+from pydantic import BeforeValidator, ConfigDict, SerializationInfo, WrapSerializer
 from scipy import sparse
 from tqdm.auto import tqdm
 
 from rectools.dataset import Dataset, Features
 from rectools.exceptions import NotFittedError
 from rectools.models.base import ModelConfig
 from rectools.utils.misc import get_class_or_function_full_path, import_object
-from rectools.utils.serialization import RandomState
+from rectools.utils.serialization import DType, RandomState
 
 from .rank import Distance
 from .vector import Factors, VectorModel
@@ -68,10 +68,6 @@ def _serialize_alternating_least_squares_class(
     ),
 ]
 
-DType = tpe.Annotated[
-    np.dtype, BeforeValidator(func=np.dtype), PlainSerializer(func=lambda dtp: dtp.name, when_used="json")
-]
-
 
 class AlternatingLeastSquaresConfig(tpe.TypedDict):
     """Config for implicit `AlternatingLeastSquares` model."""

diff --git a/rectools/models/implicit_bpr.py b/rectools/models/implicit_bpr.py
@@ -0,0 +1,224 @@
+import typing as tp
+from copy import deepcopy
+
+import numpy as np
+import typing_extensions as tpe
+from implicit.bpr import BayesianPersonalizedRanking
+
+# pylint: disable=no-name-in-module
+from implicit.cpu.bpr import BayesianPersonalizedRanking as CPUBayesianPersonalizedRanking
+from implicit.gpu.bpr import BayesianPersonalizedRanking as GPUBayesianPersonalizedRanking
+
+# pylint: enable=no-name-in-module
+from pydantic import BeforeValidator, ConfigDict, SerializationInfo, WrapSerializer
+
+from rectools.dataset.dataset import Dataset
+from rectools.exceptions import NotFittedError
+from rectools.models.base import ModelConfig
+from rectools.models.rank import Distance
+from rectools.models.vector import Factors, VectorModel
+from rectools.utils.misc import get_class_or_function_full_path, import_object
+from rectools.utils.serialization import DType, RandomState
+
+BPR_STRING = "BayesianPersonalizedRanking"
+
+AnyBayesianPersonalizedRanking = tp.Union[CPUBayesianPersonalizedRanking, GPUBayesianPersonalizedRanking]
+BayesianPersonalizedRankingType = tp.Union[
+    tp.Type[AnyBayesianPersonalizedRanking], tp.Literal["BayesianPersonalizedRanking"]
+]
+
+
+def _get_bpr_class(spec: tp.Any) -> tp.Any:
+    if spec in (BPR_STRING, get_class_or_function_full_path(BayesianPersonalizedRanking)):
+        return "BayesianPersonalizedRanking"
+    if isinstance(spec, str):
+        return import_object(spec)
+    return spec
+
+
+def _serialize_bpr_class(
+    cls: BayesianPersonalizedRankingType, handler: tp.Callable, info: SerializationInfo
+) -> tp.Union[None, str, AnyBayesianPersonalizedRanking]:
+    if cls in (CPUBayesianPersonalizedRanking, GPUBayesianPersonalizedRanking) or cls == "BayesianPersonalizedRanking":
+        return BPR_STRING
+    if info.mode == "json":
+        return get_class_or_function_full_path(cls)
+    return cls
+
+
+BayesianPersonalizedRankingClass = tpe.Annotated[
+    BayesianPersonalizedRankingType,
+    BeforeValidator(_get_bpr_class),
+    WrapSerializer(
+        func=_serialize_bpr_class,
+        when_used="always",
+    ),
+]
+
+
+class BayesianPersonalizedRankingConfig(tpe.TypedDict):
+    """Config for implicit `BayesianPersonalizedRanking` model."""
+
+    cls: tpe.NotRequired[BayesianPersonalizedRankingClass]
+    factors: tpe.NotRequired[int]
+    learning_rate: tpe.NotRequired[float]
+    regularization: tpe.NotRequired[float]
+    dtype: tpe.NotRequired[DType]
+    num_threads: tpe.NotRequired[int]
+    iterations: tpe.NotRequired[int]
+    verify_negative_samples: tpe.NotRequired[bool]
+    random_state: tpe.NotRequired[RandomState]
+    use_gpu: tpe.NotRequired[bool]
+
+
+class ImplicitBPRWrapperModelConfig(ModelConfig):
+    """Config for `ImplicitBPRWrapperModel`"""
+
+    model_config = ConfigDict(arbitrary_types_allowed=True)
+
+    model: BayesianPersonalizedRankingConfig
+
+
+class ImplicitBPRWrapperModel(VectorModel[ImplicitBPRWrapperModelConfig]):
+    """
+    Wrapper for `implicit.bpr.BayesianPersonalizedRanking` model.
+
+    See https://benfred.github.io/implicit/api/models/cpu/bpr.html for details of the base model.
+
+    Parameters
+    ----------
+    model : BayesianPersonalizedRanking
+        Base model to wrap.
+    verbose : int, default ``0``
+        Degree of verbose output. If ``0``, no output will be provided.
+    """
+
+    recommends_for_warm = False
+    recommends_for_cold = False
+
+    u2i_dist = Distance.DOT
+    i2i_dist = Distance.COSINE
+
+    config_class = ImplicitBPRWrapperModelConfig
+
+    def __init__(self, model: AnyBayesianPersonalizedRanking, verbose: int = 0):
+        self._config = self._make_config(model, verbose)
+        super().__init__(verbose=verbose)
+        self.model: AnyBayesianPersonalizedRanking
+        self._model = model  # for refit
+
+        self.use_gpu = isinstance(model, GPUBayesianPersonalizedRanking)
+        if not self.use_gpu:
+            self.n_threads = model.num_threads
+
+    @classmethod
+    def _make_config(cls, model: AnyBayesianPersonalizedRanking, verbose: int) -> ImplicitBPRWrapperModelConfig:
+        model_cls = (
+            model.__class__
+            if model.__class__ not in (CPUBayesianPersonalizedRanking, GPUBayesianPersonalizedRanking)
+            else "BayesianPersonalizedRanking"
+        )
+
+        inner_model_config = {
+            "cls": model_cls,
+            "factors": model.factors,
+            "learning_rate": model.learning_rate,
+            "dtype": None,
+            "regularization": model.regularization,
+            "iterations": model.iterations,
+            "verify_negative_samples": model.verify_negative_samples,
+            "random_state": model.random_state,
+        }
+        if isinstance(model, GPUBayesianPersonalizedRanking):  # pragma: no cover
+            inner_model_config["use_gpu"] = True
+        else:
+            inner_model_config.update(
+                {
+                    "use_gpu": False,
+                    "dtype": model.dtype,
+                    "num_threads": model.num_threads,
+                }
+            )
+
+        return ImplicitBPRWrapperModelConfig(
+            cls=cls,
+            model=tp.cast(BayesianPersonalizedRankingConfig, inner_model_config),
+            verbose=verbose,
+        )
+
+    def _get_config(self) -> ImplicitBPRWrapperModelConfig:
+        return self._config
+
+    @classmethod
+    def _from_config(cls, config: ImplicitBPRWrapperModelConfig) -> tpe.Self:
+        inner_model_params = deepcopy(config.model)
+        inner_model_cls = inner_model_params.pop("cls", BayesianPersonalizedRanking)
+        inner_model_cls = tp.cast(tp.Callable, inner_model_cls)
+        if inner_model_cls == BPR_STRING:
+            inner_model_cls = BayesianPersonalizedRanking
+        model = inner_model_cls(**inner_model_params)
+        return cls(model=model, verbose=config.verbose)
+
+    def _fit(self, dataset: Dataset) -> None:
+        self.model = deepcopy(self._model)
+
+        ui_csr = dataset.get_user_item_matrix(include_weights=True).astype(np.float32)
+        self.model.fit(ui_csr, show_progress=self.verbose > 0)
+
+    def _get_users_factors(self, dataset: Dataset) -> Factors:
+        return Factors(get_users_vectors(self.model))
+
+    def _get_items_factors(self, dataset: Dataset) -> Factors:
+        return Factors(get_items_vectors(self.model))
+
+    def get_vectors(self) -> tp.Tuple[np.ndarray, np.ndarray]:
+        """
+        Return user and item vector representation from fitted model.
+
+        Returns
+        -------
+        (np.ndarray, np.ndarray)
+            User and item vectors.
+            Shapes are (n_users, n_factors) and (n_items, n_factors).
+        """
+        if not self.is_fitted:
+            raise NotFittedError(self.__class__.__name__)
+        return get_users_vectors(self.model), get_items_vectors(self.model)
+
+
+def get_users_vectors(model: AnyBayesianPersonalizedRanking) -> np.ndarray:
+    """
+    Get user vectors from BPR model as a numpy array.
+
+    Parameters
+    ----------
+    model : BayesianPersonalizedRanking
+        Fitted BPR model. Can be CPU or GPU model
+
+    Returns
+    -------
+    np.ndarray
+        User vectors.
+    """
+    if isinstance(model, GPUBayesianPersonalizedRanking):  # pragma: no cover
+        return model.user_factors.to_numpy()
+    return model.user_factors
+
+
+def get_items_vectors(model: AnyBayesianPersonalizedRanking) -> np.ndarray:
+    """
+    Get item vectors from BPR model as a numpy array.
+
+    Parameters
+    ----------
+    model : BayesianPersonalizedRanking
+        Fitted BPR model. Can be CPU or GPU model
+
+    Returns
+    -------
+    np.ndarray
+        Item vectors.
+    """
+    if isinstance(model, GPUBayesianPersonalizedRanking):  # pragma: no cover
+        return model.item_factors.to_numpy()
+    return model.item_factors
diff --git a/rectools/utils/serialization.py b/rectools/utils/serialization.py
@@ -17,7 +17,7 @@
 
 import numpy as np
 import typing_extensions as tpe
-from pydantic import PlainSerializer
+from pydantic import BeforeValidator, PlainSerializer
 
 FileLike = tp.Union[str, Path, tp.IO[bytes]]
 
@@ -37,6 +37,10 @@ def _serialize_random_state(rs: tp.Optional[tp.Union[None, int, np.random.Random
     PlainSerializer(func=_serialize_random_state, when_used="json"),
 ]
 
+DType = tpe.Annotated[
+    np.dtype, BeforeValidator(func=np.dtype), PlainSerializer(func=lambda dtp: dtp.name, when_used="json")
+]
+
 
 def read_bytes(f: FileLike) -> bytes:
     """Read bytes from a file."""

diff --git a/setup.cfg b/setup.cfg
@@ -67,6 +67,7 @@ show_column_numbers = True
 disable_error_code = type-arg
 
 [isort]
+profile = black
 line_length = 120
 wrap_length = 120
 multi_line_output = 3