[chore] Enable ruff rules UP006 and UP007 to improve type hints. (

UKPLab#2830) * eable isort improve ci/cd improve ci/cd improve ci/cd fix isort try fix * fix * Enable UP006 and UP007 * fix
tomaarsen · Jul 12, 2024 · 65728ed · 65728ed
1 parent dc3788d
commit 65728ed
Show file tree

Hide file tree

Showing 110 changed files with 782 additions and 596 deletions.
diff --git a/pyproject.toml b/pyproject.toml
@@ -62,7 +62,7 @@ line-length = 119
 fix = true
 
 [tool.ruff.lint]
-select = ["E", "F", "W", "I"]
+select = ["E", "F", "W", "I", "UP006", "UP007"]
 # Skip `E731` (do not assign a lambda expression, use a def)
 ignore = [
     # LineTooLong
@@ -72,11 +72,21 @@ ignore = [
 ]
 
 [tool.ruff.lint.per-file-ignores]
-# Ignore `E402` (import violations) in all examples
-"examples/**" = ["E402"]
+"examples/**" = [
+    # Ignore `E402` (import violations) in all examples
+    "E402", 
+    # Ignore missing required imports
+    "I002"
+    ]
+"docs/**" = [
+    # Ignore missing required imports
+    "I002"
+    ]
 
 [tool.ruff.lint.isort]
 known-third-party = ["datasets"]
+required-imports = ["from __future__ import annotations"]
+
 
 [tool.pytest.ini_options]
 testpaths = [

diff --git a/sentence_transformers/LoggingHandler.py b/sentence_transformers/LoggingHandler.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 import logging
 
 import tqdm

diff --git a/sentence_transformers/SentenceTransformer.py b/sentence_transformers/SentenceTransformer.py
diff --git a/sentence_transformers/__init__.py b/sentence_transformers/__init__.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 __version__ = "3.1.0.dev0"
 __MODEL_HUB_ORGANIZATION__ = "sentence-transformers"
 

diff --git a/sentence_transformers/cross_encoder/CrossEncoder.py b/sentence_transformers/cross_encoder/CrossEncoder.py
@@ -1,7 +1,9 @@
+from __future__ import annotations
+
 import logging
 import os
 from functools import wraps
-from typing import Callable, Dict, List, Literal, Optional, Tuple, Type, Union, overload
+from typing import Callable, Literal, overload
 
 import numpy as np
 import torch
@@ -58,11 +60,11 @@ def __init__(
         model_name: str,
         num_labels: int = None,
         max_length: int = None,
-        device: Optional[str] = None,
-        tokenizer_args: Dict = None,
-        automodel_args: Dict = None,
+        device: str | None = None,
+        tokenizer_args: dict = None,
+        automodel_args: dict = None,
         trust_remote_code: bool = False,
-        revision: Optional[str] = None,
+        revision: str | None = None,
         local_files_only: bool = False,
         default_activation_function=None,
         classifier_dropout: float = None,
@@ -127,7 +129,7 @@ def __init__(
         else:
             self.default_activation_function = nn.Sigmoid() if self.config.num_labels == 1 else nn.Identity()
 
-    def smart_batching_collate(self, batch: List[InputExample]) -> Tuple[BatchEncoding, Tensor]:
+    def smart_batching_collate(self, batch: list[InputExample]) -> tuple[BatchEncoding, Tensor]:
         texts = [[] for _ in range(len(batch[0].texts))]
         labels = []
 
@@ -149,7 +151,7 @@ def smart_batching_collate(self, batch: List[InputExample]) -> Tuple[BatchEncodi
 
         return tokenized, labels
 
-    def smart_batching_collate_text_only(self, batch: List[InputExample]) -> BatchEncoding:
+    def smart_batching_collate_text_only(self, batch: list[InputExample]) -> BatchEncoding:
         texts = [[] for _ in range(len(batch[0]))]
 
         for example in batch:
@@ -174,8 +176,8 @@ def fit(
         activation_fct=nn.Identity(),
         scheduler: str = "WarmupLinear",
         warmup_steps: int = 10000,
-        optimizer_class: Type[Optimizer] = torch.optim.AdamW,
-        optimizer_params: Dict[str, object] = {"lr": 2e-5},
+        optimizer_class: type[Optimizer] = torch.optim.AdamW,
+        optimizer_params: dict[str, object] = {"lr": 2e-5},
         weight_decay: float = 0.01,
         evaluation_steps: int = 0,
         output_path: str = None,
@@ -305,66 +307,66 @@ def fit(
     @overload
     def predict(
         self,
-        sentences: Union[Tuple[str, str], List[str]],
+        sentences: tuple[str, str] | list[str],
         batch_size: int = ...,
-        show_progress_bar: Optional[bool] = ...,
+        show_progress_bar: bool | None = ...,
         num_workers: int = ...,
-        activation_fct: Optional[Callable] = ...,
-        apply_softmax: Optional[bool] = ...,
+        activation_fct: Callable | None = ...,
+        apply_softmax: bool | None = ...,
         convert_to_numpy: Literal[False] = ...,
         convert_to_tensor: Literal[False] = ...,
     ) -> torch.Tensor: ...
 
     @overload
     def predict(
         self,
-        sentences: Union[List[Tuple[str, str]], List[List[str]], Tuple[str, str], List[str]],
+        sentences: list[tuple[str, str]] | list[list[str]] | tuple[str, str] | list[str],
         batch_size: int = ...,
-        show_progress_bar: Optional[bool] = ...,
+        show_progress_bar: bool | None = ...,
         num_workers: int = ...,
-        activation_fct: Optional[Callable] = ...,
-        apply_softmax: Optional[bool] = ...,
+        activation_fct: Callable | None = ...,
+        apply_softmax: bool | None = ...,
         convert_to_numpy: Literal[True] = True,
         convert_to_tensor: Literal[False] = False,
     ) -> np.ndarray: ...
 
     @overload
     def predict(
         self,
-        sentences: Union[List[Tuple[str, str]], List[List[str]], Tuple[str, str], List[str]],
+        sentences: list[tuple[str, str]] | list[list[str]] | tuple[str, str] | list[str],
         batch_size: int = ...,
-        show_progress_bar: Optional[bool] = ...,
+        show_progress_bar: bool | None = ...,
         num_workers: int = ...,
-        activation_fct: Optional[Callable] = ...,
-        apply_softmax: Optional[bool] = ...,
+        activation_fct: Callable | None = ...,
+        apply_softmax: bool | None = ...,
         convert_to_numpy: bool = ...,
         convert_to_tensor: Literal[True] = ...,
     ) -> torch.Tensor: ...
 
     @overload
     def predict(
         self,
-        sentences: Union[List[Tuple[str, str]], List[List[str]]],
+        sentences: list[tuple[str, str]] | list[list[str]],
         batch_size: int = ...,
-        show_progress_bar: Optional[bool] = ...,
+        show_progress_bar: bool | None = ...,
         num_workers: int = ...,
-        activation_fct: Optional[Callable] = ...,
-        apply_softmax: Optional[bool] = ...,
+        activation_fct: Callable | None = ...,
+        apply_softmax: bool | None = ...,
         convert_to_numpy: Literal[False] = ...,
         convert_to_tensor: Literal[False] = ...,
-    ) -> List[torch.Tensor]: ...
+    ) -> list[torch.Tensor]: ...
 
     def predict(
         self,
-        sentences: Union[List[Tuple[str, str]], List[List[str]], Tuple[str, str], List[str]],
+        sentences: list[tuple[str, str]] | list[list[str]] | tuple[str, str] | list[str],
         batch_size: int = 32,
-        show_progress_bar: Optional[bool] = None,
+        show_progress_bar: bool | None = None,
         num_workers: int = 0,
-        activation_fct: Optional[Callable] = None,
-        apply_softmax: Optional[bool] = False,
+        activation_fct: Callable | None = None,
+        apply_softmax: bool | None = False,
         convert_to_numpy: bool = True,
         convert_to_tensor: bool = False,
-    ) -> Union[List[torch.Tensor], np.ndarray, torch.Tensor]:
+    ) -> list[torch.Tensor] | np.ndarray | torch.Tensor:
         """
         Performs predictions with the CrossEncoder on the given sentence pairs.
 
@@ -451,8 +453,8 @@ def predict(
     def rank(
         self,
         query: str,
-        documents: List[str],
-        top_k: Optional[int] = None,
+        documents: list[str],
+        top_k: int | None = None,
         return_documents: bool = False,
         batch_size: int = 32,
         show_progress_bar: bool = None,
@@ -461,7 +463,7 @@ def rank(
         apply_softmax=False,
         convert_to_numpy: bool = True,
         convert_to_tensor: bool = False,
-    ) -> List[Dict[Literal["corpus_id", "score", "text"], Union[int, float, str]]]:
+    ) -> list[dict[Literal["corpus_id", "score", "text"], int | float | str]]:
         """
         Performs ranking with the CrossEncoder on the given query and documents. Returns a sorted list with the document indices and scores.
 
@@ -572,10 +574,10 @@ def push_to_hub(
         self,
         repo_id: str,
         *,
-        commit_message: Optional[str] = None,
-        private: Optional[bool] = None,
+        commit_message: str | None = None,
+        private: bool | None = None,
         safe_serialization: bool = True,
-        tags: Optional[List[str]] = None,
+        tags: list[str] | None = None,
         **kwargs,
     ) -> str:
         if isinstance(tags, str):

diff --git a/sentence_transformers/cross_encoder/__init__.py b/sentence_transformers/cross_encoder/__init__.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 from .CrossEncoder import CrossEncoder
 
 __all__ = ["CrossEncoder"]
diff --git a/sentence_transformers/cross_encoder/evaluation/CEBinaryAccuracyEvaluator.py b/sentence_transformers/cross_encoder/evaluation/CEBinaryAccuracyEvaluator.py
@@ -1,7 +1,8 @@
+from __future__ import annotations
+
 import csv
 import logging
 import os
-from typing import List
 
 import numpy as np
 
@@ -22,8 +23,8 @@ class CEBinaryAccuracyEvaluator:
 
     def __init__(
         self,
-        sentence_pairs: List[List[str]],
-        labels: List[int],
+        sentence_pairs: list[list[str]],
+        labels: list[int],
         name: str = "",
         threshold: float = 0.5,
         write_csv: bool = True,
@@ -38,7 +39,7 @@ def __init__(
         self.write_csv = write_csv
 
     @classmethod
-    def from_input_examples(cls, examples: List[InputExample], **kwargs):
+    def from_input_examples(cls, examples: list[InputExample], **kwargs):
         sentence_pairs = []
         labels = []
 

diff --git a/sentence_transformers/cross_encoder/evaluation/CEBinaryClassificationEvaluator.py b/sentence_transformers/cross_encoder/evaluation/CEBinaryClassificationEvaluator.py
@@ -1,7 +1,8 @@
+from __future__ import annotations
+
 import csv
 import logging
 import os
-from typing import List
 
 import numpy as np
 from sklearn.metrics import average_precision_score
@@ -20,8 +21,8 @@ class CEBinaryClassificationEvaluator:
 
     def __init__(
         self,
-        sentence_pairs: List[List[str]],
-        labels: List[int],
+        sentence_pairs: list[list[str]],
+        labels: list[int],
         name: str = "",
         show_progress_bar: bool = False,
         write_csv: bool = True,
@@ -55,7 +56,7 @@ def __init__(
         self.write_csv = write_csv
 
     @classmethod
-    def from_input_examples(cls, examples: List[InputExample], **kwargs):
+    def from_input_examples(cls, examples: list[InputExample], **kwargs):
         sentence_pairs = []
         labels = []
 

diff --git a/sentence_transformers/cross_encoder/evaluation/CECorrelationEvaluator.py b/sentence_transformers/cross_encoder/evaluation/CECorrelationEvaluator.py
@@ -1,7 +1,8 @@
+from __future__ import annotations
+
 import csv
 import logging
 import os
-from typing import List
 
 from scipy.stats import pearsonr, spearmanr
 
@@ -17,7 +18,7 @@ class CECorrelationEvaluator:
     and the gold score.
     """
 
-    def __init__(self, sentence_pairs: List[List[str]], scores: List[float], name: str = "", write_csv: bool = True):
+    def __init__(self, sentence_pairs: list[list[str]], scores: list[float], name: str = "", write_csv: bool = True):
         self.sentence_pairs = sentence_pairs
         self.scores = scores
         self.name = name
@@ -27,7 +28,7 @@ def __init__(self, sentence_pairs: List[List[str]], scores: List[float], name: s
         self.write_csv = write_csv
 
     @classmethod
-    def from_input_examples(cls, examples: List[InputExample], **kwargs):
+    def from_input_examples(cls, examples: list[InputExample], **kwargs):
         sentence_pairs = []
         scores = []
 

diff --git a/sentence_transformers/cross_encoder/evaluation/CEF1Evaluator.py b/sentence_transformers/cross_encoder/evaluation/CEF1Evaluator.py
@@ -1,7 +1,8 @@
+from __future__ import annotations
+
 import csv
 import logging
 import os
-from typing import List
 
 import numpy as np
 from sklearn.metrics import f1_score
@@ -31,8 +32,8 @@ class CEF1Evaluator:
 
     def __init__(
         self,
-        sentence_pairs: List[List[str]],
-        labels: List[int],
+        sentence_pairs: list[list[str]],
+        labels: list[int],
         *,
         batch_size: int = 32,
         show_progress_bar: bool = False,
@@ -67,7 +68,7 @@ def __init__(
         self.csv_headers = ["epoch", "steps"] + [metric_name for metric_name, _ in self.f1_callables]
 
     @classmethod
-    def from_input_examples(cls, examples: List[InputExample], **kwargs):
+    def from_input_examples(cls, examples: list[InputExample], **kwargs):
         """
         Create an instance of CEF1Evaluator from a list of InputExample objects.
 

diff --git a/sentence_transformers/cross_encoder/evaluation/CERerankingEvaluator.py b/sentence_transformers/cross_encoder/evaluation/CERerankingEvaluator.py
@@ -1,7 +1,8 @@
+from __future__ import annotations
+
 import csv
 import logging
 import os
-from typing import Optional
 
 import numpy as np
 from sklearn.metrics import ndcg_score
@@ -22,9 +23,7 @@ class CERerankingEvaluator:
             of positive (relevant) documents, negative is a list of negative (irrelevant) documents.
     """
 
-    def __init__(
-        self, samples, at_k: int = 10, name: str = "", write_csv: bool = True, mrr_at_k: Optional[int] = None
-    ):
+    def __init__(self, samples, at_k: int = 10, name: str = "", write_csv: bool = True, mrr_at_k: int | None = None):
         self.samples = samples
         self.name = name
         if mrr_at_k is not None:

diff --git a/sentence_transformers/cross_encoder/evaluation/CESoftmaxAccuracyEvaluator.py b/sentence_transformers/cross_encoder/evaluation/CESoftmaxAccuracyEvaluator.py
@@ -1,7 +1,8 @@
+from __future__ import annotations
+
 import csv
 import logging
 import os
-from typing import List
 
 import numpy as np
 
@@ -18,7 +19,7 @@ class CESoftmaxAccuracyEvaluator:
     accuracy of the predict class vs. the gold labels.
     """
 
-    def __init__(self, sentence_pairs: List[List[str]], labels: List[int], name: str = "", write_csv: bool = True):
+    def __init__(self, sentence_pairs: list[list[str]], labels: list[int], name: str = "", write_csv: bool = True):
         self.sentence_pairs = sentence_pairs
         self.labels = labels
         self.name = name
@@ -28,7 +29,7 @@ def __init__(self, sentence_pairs: List[List[str]], labels: List[int], name: str
         self.write_csv = write_csv
 
     @classmethod
-    def from_input_examples(cls, examples: List[InputExample], **kwargs):
+    def from_input_examples(cls, examples: list[InputExample], **kwargs):
         sentence_pairs = []
         labels = []