Skip to content

Commit

Permalink
[chore] Enable ruff rules UP006 and UP007 to improve type hints. (
Browse files Browse the repository at this point in the history
UKPLab#2830)

* eable isort

improve ci/cd

improve ci/cd

improve ci/cd

fix isort

try

fix

* fix

* Enable UP006 and UP007

* fix
  • Loading branch information
fpgmaas authored Jul 12, 2024
1 parent dc3788d commit 65728ed
Show file tree
Hide file tree
Showing 110 changed files with 782 additions and 596 deletions.
16 changes: 13 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ line-length = 119
fix = true

[tool.ruff.lint]
select = ["E", "F", "W", "I"]
select = ["E", "F", "W", "I", "UP006", "UP007"]
# Skip `E731` (do not assign a lambda expression, use a def)
ignore = [
# LineTooLong
Expand All @@ -72,11 +72,21 @@ ignore = [
]

[tool.ruff.lint.per-file-ignores]
# Ignore `E402` (import violations) in all examples
"examples/**" = ["E402"]
"examples/**" = [
# Ignore `E402` (import violations) in all examples
"E402",
# Ignore missing required imports
"I002"
]
"docs/**" = [
# Ignore missing required imports
"I002"
]

[tool.ruff.lint.isort]
known-third-party = ["datasets"]
required-imports = ["from __future__ import annotations"]


[tool.pytest.ini_options]
testpaths = [
Expand Down
2 changes: 2 additions & 0 deletions sentence_transformers/LoggingHandler.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from __future__ import annotations

import logging

import tqdm
Expand Down
188 changes: 95 additions & 93 deletions sentence_transformers/SentenceTransformer.py

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions sentence_transformers/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from __future__ import annotations

__version__ = "3.1.0.dev0"
__MODEL_HUB_ORGANIZATION__ = "sentence-transformers"

Expand Down
76 changes: 39 additions & 37 deletions sentence_transformers/cross_encoder/CrossEncoder.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
from __future__ import annotations

import logging
import os
from functools import wraps
from typing import Callable, Dict, List, Literal, Optional, Tuple, Type, Union, overload
from typing import Callable, Literal, overload

import numpy as np
import torch
Expand Down Expand Up @@ -58,11 +60,11 @@ def __init__(
model_name: str,
num_labels: int = None,
max_length: int = None,
device: Optional[str] = None,
tokenizer_args: Dict = None,
automodel_args: Dict = None,
device: str | None = None,
tokenizer_args: dict = None,
automodel_args: dict = None,
trust_remote_code: bool = False,
revision: Optional[str] = None,
revision: str | None = None,
local_files_only: bool = False,
default_activation_function=None,
classifier_dropout: float = None,
Expand Down Expand Up @@ -127,7 +129,7 @@ def __init__(
else:
self.default_activation_function = nn.Sigmoid() if self.config.num_labels == 1 else nn.Identity()

def smart_batching_collate(self, batch: List[InputExample]) -> Tuple[BatchEncoding, Tensor]:
def smart_batching_collate(self, batch: list[InputExample]) -> tuple[BatchEncoding, Tensor]:
texts = [[] for _ in range(len(batch[0].texts))]
labels = []

Expand All @@ -149,7 +151,7 @@ def smart_batching_collate(self, batch: List[InputExample]) -> Tuple[BatchEncodi

return tokenized, labels

def smart_batching_collate_text_only(self, batch: List[InputExample]) -> BatchEncoding:
def smart_batching_collate_text_only(self, batch: list[InputExample]) -> BatchEncoding:
texts = [[] for _ in range(len(batch[0]))]

for example in batch:
Expand All @@ -174,8 +176,8 @@ def fit(
activation_fct=nn.Identity(),
scheduler: str = "WarmupLinear",
warmup_steps: int = 10000,
optimizer_class: Type[Optimizer] = torch.optim.AdamW,
optimizer_params: Dict[str, object] = {"lr": 2e-5},
optimizer_class: type[Optimizer] = torch.optim.AdamW,
optimizer_params: dict[str, object] = {"lr": 2e-5},
weight_decay: float = 0.01,
evaluation_steps: int = 0,
output_path: str = None,
Expand Down Expand Up @@ -305,66 +307,66 @@ def fit(
@overload
def predict(
self,
sentences: Union[Tuple[str, str], List[str]],
sentences: tuple[str, str] | list[str],
batch_size: int = ...,
show_progress_bar: Optional[bool] = ...,
show_progress_bar: bool | None = ...,
num_workers: int = ...,
activation_fct: Optional[Callable] = ...,
apply_softmax: Optional[bool] = ...,
activation_fct: Callable | None = ...,
apply_softmax: bool | None = ...,
convert_to_numpy: Literal[False] = ...,
convert_to_tensor: Literal[False] = ...,
) -> torch.Tensor: ...

@overload
def predict(
self,
sentences: Union[List[Tuple[str, str]], List[List[str]], Tuple[str, str], List[str]],
sentences: list[tuple[str, str]] | list[list[str]] | tuple[str, str] | list[str],
batch_size: int = ...,
show_progress_bar: Optional[bool] = ...,
show_progress_bar: bool | None = ...,
num_workers: int = ...,
activation_fct: Optional[Callable] = ...,
apply_softmax: Optional[bool] = ...,
activation_fct: Callable | None = ...,
apply_softmax: bool | None = ...,
convert_to_numpy: Literal[True] = True,
convert_to_tensor: Literal[False] = False,
) -> np.ndarray: ...

@overload
def predict(
self,
sentences: Union[List[Tuple[str, str]], List[List[str]], Tuple[str, str], List[str]],
sentences: list[tuple[str, str]] | list[list[str]] | tuple[str, str] | list[str],
batch_size: int = ...,
show_progress_bar: Optional[bool] = ...,
show_progress_bar: bool | None = ...,
num_workers: int = ...,
activation_fct: Optional[Callable] = ...,
apply_softmax: Optional[bool] = ...,
activation_fct: Callable | None = ...,
apply_softmax: bool | None = ...,
convert_to_numpy: bool = ...,
convert_to_tensor: Literal[True] = ...,
) -> torch.Tensor: ...

@overload
def predict(
self,
sentences: Union[List[Tuple[str, str]], List[List[str]]],
sentences: list[tuple[str, str]] | list[list[str]],
batch_size: int = ...,
show_progress_bar: Optional[bool] = ...,
show_progress_bar: bool | None = ...,
num_workers: int = ...,
activation_fct: Optional[Callable] = ...,
apply_softmax: Optional[bool] = ...,
activation_fct: Callable | None = ...,
apply_softmax: bool | None = ...,
convert_to_numpy: Literal[False] = ...,
convert_to_tensor: Literal[False] = ...,
) -> List[torch.Tensor]: ...
) -> list[torch.Tensor]: ...

def predict(
self,
sentences: Union[List[Tuple[str, str]], List[List[str]], Tuple[str, str], List[str]],
sentences: list[tuple[str, str]] | list[list[str]] | tuple[str, str] | list[str],
batch_size: int = 32,
show_progress_bar: Optional[bool] = None,
show_progress_bar: bool | None = None,
num_workers: int = 0,
activation_fct: Optional[Callable] = None,
apply_softmax: Optional[bool] = False,
activation_fct: Callable | None = None,
apply_softmax: bool | None = False,
convert_to_numpy: bool = True,
convert_to_tensor: bool = False,
) -> Union[List[torch.Tensor], np.ndarray, torch.Tensor]:
) -> list[torch.Tensor] | np.ndarray | torch.Tensor:
"""
Performs predictions with the CrossEncoder on the given sentence pairs.
Expand Down Expand Up @@ -451,8 +453,8 @@ def predict(
def rank(
self,
query: str,
documents: List[str],
top_k: Optional[int] = None,
documents: list[str],
top_k: int | None = None,
return_documents: bool = False,
batch_size: int = 32,
show_progress_bar: bool = None,
Expand All @@ -461,7 +463,7 @@ def rank(
apply_softmax=False,
convert_to_numpy: bool = True,
convert_to_tensor: bool = False,
) -> List[Dict[Literal["corpus_id", "score", "text"], Union[int, float, str]]]:
) -> list[dict[Literal["corpus_id", "score", "text"], int | float | str]]:
"""
Performs ranking with the CrossEncoder on the given query and documents. Returns a sorted list with the document indices and scores.
Expand Down Expand Up @@ -572,10 +574,10 @@ def push_to_hub(
self,
repo_id: str,
*,
commit_message: Optional[str] = None,
private: Optional[bool] = None,
commit_message: str | None = None,
private: bool | None = None,
safe_serialization: bool = True,
tags: Optional[List[str]] = None,
tags: list[str] | None = None,
**kwargs,
) -> str:
if isinstance(tags, str):
Expand Down
2 changes: 2 additions & 0 deletions sentence_transformers/cross_encoder/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from __future__ import annotations

from .CrossEncoder import CrossEncoder

__all__ = ["CrossEncoder"]
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
from __future__ import annotations

import csv
import logging
import os
from typing import List

import numpy as np

Expand All @@ -22,8 +23,8 @@ class CEBinaryAccuracyEvaluator:

def __init__(
self,
sentence_pairs: List[List[str]],
labels: List[int],
sentence_pairs: list[list[str]],
labels: list[int],
name: str = "",
threshold: float = 0.5,
write_csv: bool = True,
Expand All @@ -38,7 +39,7 @@ def __init__(
self.write_csv = write_csv

@classmethod
def from_input_examples(cls, examples: List[InputExample], **kwargs):
def from_input_examples(cls, examples: list[InputExample], **kwargs):
sentence_pairs = []
labels = []

Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
from __future__ import annotations

import csv
import logging
import os
from typing import List

import numpy as np
from sklearn.metrics import average_precision_score
Expand All @@ -20,8 +21,8 @@ class CEBinaryClassificationEvaluator:

def __init__(
self,
sentence_pairs: List[List[str]],
labels: List[int],
sentence_pairs: list[list[str]],
labels: list[int],
name: str = "",
show_progress_bar: bool = False,
write_csv: bool = True,
Expand Down Expand Up @@ -55,7 +56,7 @@ def __init__(
self.write_csv = write_csv

@classmethod
def from_input_examples(cls, examples: List[InputExample], **kwargs):
def from_input_examples(cls, examples: list[InputExample], **kwargs):
sentence_pairs = []
labels = []

Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
from __future__ import annotations

import csv
import logging
import os
from typing import List

from scipy.stats import pearsonr, spearmanr

Expand All @@ -17,7 +18,7 @@ class CECorrelationEvaluator:
and the gold score.
"""

def __init__(self, sentence_pairs: List[List[str]], scores: List[float], name: str = "", write_csv: bool = True):
def __init__(self, sentence_pairs: list[list[str]], scores: list[float], name: str = "", write_csv: bool = True):
self.sentence_pairs = sentence_pairs
self.scores = scores
self.name = name
Expand All @@ -27,7 +28,7 @@ def __init__(self, sentence_pairs: List[List[str]], scores: List[float], name: s
self.write_csv = write_csv

@classmethod
def from_input_examples(cls, examples: List[InputExample], **kwargs):
def from_input_examples(cls, examples: list[InputExample], **kwargs):
sentence_pairs = []
scores = []

Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
from __future__ import annotations

import csv
import logging
import os
from typing import List

import numpy as np
from sklearn.metrics import f1_score
Expand Down Expand Up @@ -31,8 +32,8 @@ class CEF1Evaluator:

def __init__(
self,
sentence_pairs: List[List[str]],
labels: List[int],
sentence_pairs: list[list[str]],
labels: list[int],
*,
batch_size: int = 32,
show_progress_bar: bool = False,
Expand Down Expand Up @@ -67,7 +68,7 @@ def __init__(
self.csv_headers = ["epoch", "steps"] + [metric_name for metric_name, _ in self.f1_callables]

@classmethod
def from_input_examples(cls, examples: List[InputExample], **kwargs):
def from_input_examples(cls, examples: list[InputExample], **kwargs):
"""
Create an instance of CEF1Evaluator from a list of InputExample objects.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
from __future__ import annotations

import csv
import logging
import os
from typing import Optional

import numpy as np
from sklearn.metrics import ndcg_score
Expand All @@ -22,9 +23,7 @@ class CERerankingEvaluator:
of positive (relevant) documents, negative is a list of negative (irrelevant) documents.
"""

def __init__(
self, samples, at_k: int = 10, name: str = "", write_csv: bool = True, mrr_at_k: Optional[int] = None
):
def __init__(self, samples, at_k: int = 10, name: str = "", write_csv: bool = True, mrr_at_k: int | None = None):
self.samples = samples
self.name = name
if mrr_at_k is not None:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
from __future__ import annotations

import csv
import logging
import os
from typing import List

import numpy as np

Expand All @@ -18,7 +19,7 @@ class CESoftmaxAccuracyEvaluator:
accuracy of the predict class vs. the gold labels.
"""

def __init__(self, sentence_pairs: List[List[str]], labels: List[int], name: str = "", write_csv: bool = True):
def __init__(self, sentence_pairs: list[list[str]], labels: list[int], name: str = "", write_csv: bool = True):
self.sentence_pairs = sentence_pairs
self.labels = labels
self.name = name
Expand All @@ -28,7 +29,7 @@ def __init__(self, sentence_pairs: List[List[str]], labels: List[int], name: str
self.write_csv = write_csv

@classmethod
def from_input_examples(cls, examples: List[InputExample], **kwargs):
def from_input_examples(cls, examples: list[InputExample], **kwargs):
sentence_pairs = []
labels = []

Expand Down
Loading

0 comments on commit 65728ed

Please sign in to comment.