From 0467655f594d2c8bdc029279388fbec64a2edad8 Mon Sep 17 00:00:00 2001
From: miguelgfierro <miguelgfierro@users.noreply.github.com>
Date: Sat, 30 Dec 2023 12:20:20 +0100
Subject: [PATCH] :memo:

Signed-off-by: miguelgfierro <miguelgfierro@users.noreply.github.com>
---
 docs/_toc.yml                                |  4 --
 recommenders/evaluation/python_evaluation.py | 47 +++++++++++++++-----
 recommenders/evaluation/spark_evaluation.py  |  8 ++--
 3 files changed, 39 insertions(+), 20 deletions(-)

diff --git a/docs/_toc.yml b/docs/_toc.yml
index 1ef2b5fc3..90e4fe0e1 100644
--- a/docs/_toc.yml
+++ b/docs/_toc.yml
@@ -9,10 +9,6 @@ root: intro
 defaults:
   numbered: false
 parts:
-  - caption: Getting Started
-    chapters:
-      - file: ../SETUP.md
-      - file: TEMP
   - caption: Recommenders API Documentation
     chapters:
       - file: datasets
diff --git a/recommenders/evaluation/python_evaluation.py b/recommenders/evaluation/python_evaluation.py
index 7569c7246..e9adf621a 100644
--- a/recommenders/evaluation/python_evaluation.py
+++ b/recommenders/evaluation/python_evaluation.py
@@ -33,10 +33,28 @@
 
 
 class ColumnMismatchError(Exception):
+    """Exception raised when there is a mismatch in columns.
+
+    This exception is raised when an operation involving columns
+    encounters a mismatch or inconsistency.
+
+    Attributes:
+        message (str): Explanation of the error.
+    """
+
     pass
 
 
 class ColumnTypeMismatchError(Exception):
+    """Exception raised when there is a mismatch in column types.
+
+    This exception is raised when an operation involving column types
+    encounters a mismatch or inconsistency.
+
+    Attributes:
+        message (str): Explanation of the error.
+    """
+
     pass
 
 
@@ -63,7 +81,7 @@ def check_column_dtypes_wrapper(
         col_item=DEFAULT_ITEM_COL,
         col_prediction=DEFAULT_PREDICTION_COL,
         *args,
-        **kwargs
+        **kwargs,
     ):
         """Check columns of DataFrame inputs
 
@@ -81,12 +99,16 @@ def check_column_dtypes_wrapper(
             expected_true_columns.add(kwargs["col_rating"])
         if not has_columns(rating_true, expected_true_columns):
             raise ColumnMismatchError("Missing columns in true rating DataFrame")
-        
+
         if not has_columns(rating_pred, {col_user, col_item, col_prediction}):
             raise ColumnMismatchError("Missing columns in predicted rating DataFrame")
-        
-        if not has_same_base_dtype(rating_true, rating_pred, columns=[col_user, col_item]):
-            raise ColumnTypeMismatchError("Columns in provided DataFrames are not the same datatype")
+
+        if not has_same_base_dtype(
+            rating_true, rating_pred, columns=[col_user, col_item]
+        ):
+            raise ColumnTypeMismatchError(
+                "Columns in provided DataFrames are not the same datatype"
+            )
 
         return func(
             rating_true=rating_true,
@@ -95,7 +117,7 @@ def check_column_dtypes_wrapper(
             col_item=col_item,
             col_prediction=col_prediction,
             *args,
-            **kwargs
+            **kwargs,
         )
 
     return check_column_dtypes_wrapper
@@ -750,7 +772,9 @@ def map_at_k(
     if df_merge is None:
         return 0.0
     else:
-        return (df_merge["rr"] / df_merge["actual"].apply(lambda x: min(x, k))).sum() / n_users
+        return (
+            df_merge["rr"] / df_merge["actual"].apply(lambda x: min(x, k))
+        ).sum() / n_users
 
 
 def get_top_k_items(
@@ -837,7 +861,7 @@ def check_column_dtypes_diversity_serendipity_wrapper(
         col_sim=DEFAULT_SIMILARITY_COL,
         col_relevance=None,
         *args,
-        **kwargs
+        **kwargs,
     ):
         """Check columns of DataFrame inputs
 
@@ -904,7 +928,7 @@ def check_column_dtypes_diversity_serendipity_wrapper(
             col_sim=col_sim,
             col_relevance=col_relevance,
             *args,
-            **kwargs
+            **kwargs,
         )
 
     return check_column_dtypes_diversity_serendipity_wrapper
@@ -933,7 +957,7 @@ def check_column_dtypes_novelty_coverage_wrapper(
         col_user=DEFAULT_USER_COL,
         col_item=DEFAULT_ITEM_COL,
         *args,
-        **kwargs
+        **kwargs,
     ):
         """Check columns of DataFrame inputs
 
@@ -969,7 +993,7 @@ def check_column_dtypes_novelty_coverage_wrapper(
             col_user=col_user,
             col_item=col_item,
             *args,
-            **kwargs
+            **kwargs,
         )
 
     return check_column_dtypes_novelty_coverage_wrapper
@@ -1006,7 +1030,6 @@ def _get_cosine_similarity(
     col_item=DEFAULT_ITEM_COL,
     col_sim=DEFAULT_SIMILARITY_COL,
 ):
-
     if item_sim_measure == "item_cooccurrence_count":
         # calculate item-item similarity based on item co-occurrence count
         df_cosine_similarity = _get_cooccurrence_similarity(
diff --git a/recommenders/evaluation/spark_evaluation.py b/recommenders/evaluation/spark_evaluation.py
index 565ee9ce0..2e376edc2 100644
--- a/recommenders/evaluation/spark_evaluation.py
+++ b/recommenders/evaluation/spark_evaluation.py
@@ -306,7 +306,7 @@ def precision_at_k(self):
 
         Note:
             More details can be found
-            `on this website <http://spark.apache.org/docs/2.1.1/api/python/pyspark.mllib.html#pyspark.mllib.evaluation.RankingMetrics.precisionAt>`_.
+            `on the precisionAt PySpark documentation <http://spark.apache.org/docs/3.0.0/api/python/pyspark.mllib.html#pyspark.mllib.evaluation.RankingMetrics.precisionAt>`_.
 
         Return:
             float: precision at k (min=0, max=1)
@@ -318,7 +318,7 @@ def recall_at_k(self):
 
         Note:
             More details can be found
-            `here <http://spark.apache.org/docs/2.1.1/api/python/pyspark.mllib.html#pyspark.mllib.evaluation.RankingMetrics.meanAveragePrecision>`_.
+            `on the recallAt PySpark documentation <http://spark.apache.org/docs/3.0.0/api/python/pyspark.mllib.html#pyspark.mllib.evaluation.RankingMetrics.recallAt>`_.
 
         Return:
             float: recall at k (min=0, max=1).
@@ -330,7 +330,7 @@ def ndcg_at_k(self):
 
         Note:
             More details can be found
-            `on <http://spark.apache.org/docs/2.1.1/api/python/pyspark.mllib.html#pyspark.mllib.evaluation.RankingMetrics.ndcgAt>`_.
+            `on the ndcgAt PySpark documentation <http://spark.apache.org/docs/3.0.0/api/python/pyspark.mllib.html#pyspark.mllib.evaluation.RankingMetrics.ndcgAt>`_.
 
         Return:
             float: nDCG at k (min=0, max=1).
@@ -349,7 +349,7 @@ def map_at_k(self):
         """Get mean average precision at k.
 
         Note:
-            More details `on this link <http://spark.apache.org/docs/2.1.1/api/python/pyspark.mllib.html#pyspark.mllib.evaluation.RankingMetrics.meanAveragePrecision>`_.
+            More details `on the meanAveragePrecision PySpark documentation <http://spark.apache.org/docs/3.0.0/api/python/pyspark.mllib.html#pyspark.mllib.evaluation.RankingMetrics.meanAveragePrecision>`_.
 
         Return:
             float: MAP at k (min=0, max=1).