From 0467655f594d2c8bdc029279388fbec64a2edad8 Mon Sep 17 00:00:00 2001 From: miguelgfierro Date: Sat, 30 Dec 2023 12:20:20 +0100 Subject: [PATCH] :memo: Signed-off-by: miguelgfierro --- docs/_toc.yml | 4 -- recommenders/evaluation/python_evaluation.py | 47 +++++++++++++++----- recommenders/evaluation/spark_evaluation.py | 8 ++-- 3 files changed, 39 insertions(+), 20 deletions(-) diff --git a/docs/_toc.yml b/docs/_toc.yml index 1ef2b5fc3..90e4fe0e1 100644 --- a/docs/_toc.yml +++ b/docs/_toc.yml @@ -9,10 +9,6 @@ root: intro defaults: numbered: false parts: - - caption: Getting Started - chapters: - - file: ../SETUP.md - - file: TEMP - caption: Recommenders API Documentation chapters: - file: datasets diff --git a/recommenders/evaluation/python_evaluation.py b/recommenders/evaluation/python_evaluation.py index 7569c7246..e9adf621a 100644 --- a/recommenders/evaluation/python_evaluation.py +++ b/recommenders/evaluation/python_evaluation.py @@ -33,10 +33,28 @@ class ColumnMismatchError(Exception): + """Exception raised when there is a mismatch in columns. + + This exception is raised when an operation involving columns + encounters a mismatch or inconsistency. + + Attributes: + message (str): Explanation of the error. + """ + pass class ColumnTypeMismatchError(Exception): + """Exception raised when there is a mismatch in column types. + + This exception is raised when an operation involving column types + encounters a mismatch or inconsistency. + + Attributes: + message (str): Explanation of the error. + """ + pass @@ -63,7 +81,7 @@ def check_column_dtypes_wrapper( col_item=DEFAULT_ITEM_COL, col_prediction=DEFAULT_PREDICTION_COL, *args, - **kwargs + **kwargs, ): """Check columns of DataFrame inputs @@ -81,12 +99,16 @@ def check_column_dtypes_wrapper( expected_true_columns.add(kwargs["col_rating"]) if not has_columns(rating_true, expected_true_columns): raise ColumnMismatchError("Missing columns in true rating DataFrame") - + if not has_columns(rating_pred, {col_user, col_item, col_prediction}): raise ColumnMismatchError("Missing columns in predicted rating DataFrame") - - if not has_same_base_dtype(rating_true, rating_pred, columns=[col_user, col_item]): - raise ColumnTypeMismatchError("Columns in provided DataFrames are not the same datatype") + + if not has_same_base_dtype( + rating_true, rating_pred, columns=[col_user, col_item] + ): + raise ColumnTypeMismatchError( + "Columns in provided DataFrames are not the same datatype" + ) return func( rating_true=rating_true, @@ -95,7 +117,7 @@ def check_column_dtypes_wrapper( col_item=col_item, col_prediction=col_prediction, *args, - **kwargs + **kwargs, ) return check_column_dtypes_wrapper @@ -750,7 +772,9 @@ def map_at_k( if df_merge is None: return 0.0 else: - return (df_merge["rr"] / df_merge["actual"].apply(lambda x: min(x, k))).sum() / n_users + return ( + df_merge["rr"] / df_merge["actual"].apply(lambda x: min(x, k)) + ).sum() / n_users def get_top_k_items( @@ -837,7 +861,7 @@ def check_column_dtypes_diversity_serendipity_wrapper( col_sim=DEFAULT_SIMILARITY_COL, col_relevance=None, *args, - **kwargs + **kwargs, ): """Check columns of DataFrame inputs @@ -904,7 +928,7 @@ def check_column_dtypes_diversity_serendipity_wrapper( col_sim=col_sim, col_relevance=col_relevance, *args, - **kwargs + **kwargs, ) return check_column_dtypes_diversity_serendipity_wrapper @@ -933,7 +957,7 @@ def check_column_dtypes_novelty_coverage_wrapper( col_user=DEFAULT_USER_COL, col_item=DEFAULT_ITEM_COL, *args, - **kwargs + **kwargs, ): """Check columns of DataFrame inputs @@ -969,7 +993,7 @@ def check_column_dtypes_novelty_coverage_wrapper( col_user=col_user, col_item=col_item, *args, - **kwargs + **kwargs, ) return check_column_dtypes_novelty_coverage_wrapper @@ -1006,7 +1030,6 @@ def _get_cosine_similarity( col_item=DEFAULT_ITEM_COL, col_sim=DEFAULT_SIMILARITY_COL, ): - if item_sim_measure == "item_cooccurrence_count": # calculate item-item similarity based on item co-occurrence count df_cosine_similarity = _get_cooccurrence_similarity( diff --git a/recommenders/evaluation/spark_evaluation.py b/recommenders/evaluation/spark_evaluation.py index 565ee9ce0..2e376edc2 100644 --- a/recommenders/evaluation/spark_evaluation.py +++ b/recommenders/evaluation/spark_evaluation.py @@ -306,7 +306,7 @@ def precision_at_k(self): Note: More details can be found - `on this website `_. + `on the precisionAt PySpark documentation `_. Return: float: precision at k (min=0, max=1) @@ -318,7 +318,7 @@ def recall_at_k(self): Note: More details can be found - `here `_. + `on the recallAt PySpark documentation `_. Return: float: recall at k (min=0, max=1). @@ -330,7 +330,7 @@ def ndcg_at_k(self): Note: More details can be found - `on `_. + `on the ndcgAt PySpark documentation `_. Return: float: nDCG at k (min=0, max=1). @@ -349,7 +349,7 @@ def map_at_k(self): """Get mean average precision at k. Note: - More details `on this link `_. + More details `on the meanAveragePrecision PySpark documentation `_. Return: float: MAP at k (min=0, max=1).