diff --git a/src/lighteval/metrics/metrics_sample.py b/src/lighteval/metrics/metrics_sample.py
index dbab1e10b..e25479186 100644
--- a/src/lighteval/metrics/metrics_sample.py
+++ b/src/lighteval/metrics/metrics_sample.py
@@ -418,7 +418,7 @@ def __init__(
         normalize_gold: callable = None,
         normalize_pred: callable = None,
     ):
-        """A BERT scorer class. Relies on some called extracted from `bert-score`. By default, will use the
+        r"""A BERT scorer class. Relies on some called extracted from `bert-score`. By default, will use the
         `microsoft/deberta-large-mnli` as scorer. For each tokenized (pred, target) pair, it computes Precision,
         Recall and F1 as following:
 
@@ -427,7 +427,7 @@ def __init__(
             Recall = \sum_{t=1}^{len(target)} \div{max(Cos.Sim.(target_t, pred))}{IDF(target_t)}
 
             F1 = \div{Precision * Recall}{Precision + Recall}
-        
+
         in which `Cos.Sim.` is the Cosine Similarity metric and `IDF(.)` represents the Inverse Document
         Frequency of its input token. It defaults to 1 for all tokens and 0 for EOS and SEP tokens.