From a335e1009399d0e9a42087607d3b602456a33ac8 Mon Sep 17 00:00:00 2001 From: Guillaume Bernard Date: Tue, 9 Jan 2024 15:07:50 +0100 Subject: [PATCH 1/7] chore: add .idea directory in .gitignore --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index 8516c00..c79ce99 100644 --- a/.gitignore +++ b/.gitignore @@ -107,3 +107,6 @@ dmypy.json # Pytest-env pytest.ini + +# IDEA/Jetbrains +.idea From 5dc30feddd6bbdee10670871c631a7dea9907992 Mon Sep 17 00:00:00 2001 From: Guillaume Bernard Date: Tue, 9 Jan 2024 15:10:30 +0100 Subject: [PATCH 2/7] chore: remove execute permission from non-code files --- AUTHORS.rst | 0 LICENSE | 0 MANIFEST.in | 0 Makefile | 0 README.md | 0 5 files changed, 0 insertions(+), 0 deletions(-) mode change 100755 => 100644 AUTHORS.rst mode change 100755 => 100644 LICENSE mode change 100755 => 100644 MANIFEST.in mode change 100755 => 100644 Makefile mode change 100755 => 100644 README.md diff --git a/AUTHORS.rst b/AUTHORS.rst old mode 100755 new mode 100644 diff --git a/LICENSE b/LICENSE old mode 100755 new mode 100644 diff --git a/MANIFEST.in b/MANIFEST.in old mode 100755 new mode 100644 diff --git a/Makefile b/Makefile old mode 100755 new mode 100644 diff --git a/README.md b/README.md old mode 100755 new mode 100644 From b5d39c6029133e2da84dc138caeb7c15feee72a9 Mon Sep 17 00:00:00 2001 From: Guillaume Bernard Date: Tue, 9 Jan 2024 15:23:42 +0100 Subject: [PATCH 3/7] chore: update pre-commit used tools - Move configuration to global pyproject.toml - The source code files are also updated because new errors and lints were raised by the tools used in the pre-commit config. --- .flake8 | 28 ++++ .pre-commit-config.yaml | 34 ++-- melusine/__init__.py | 4 +- melusine/backend/active_backend.py | 4 +- melusine/backend/base_backend.py | 4 +- melusine/backend/dict_backend.py | 6 +- melusine/backend/pandas_backend.py | 8 +- melusine/base.py | 52 +++--- melusine/connectors/exchange.py | 16 +- melusine/data/_data_loader.py | 3 +- melusine/detectors.py | 8 +- melusine/io/_classes.py | 16 +- melusine/message.py | 4 +- melusine/pipeline.py | 34 ++-- melusine/processors.py | 158 +++++++++--------- pyproject.toml | 9 + tests/base/test_base_logging.py | 1 + tests/base/test_melusine_detectors.py | 1 - tests/base/test_message.py | 7 - tests/conf/test_config.py | 7 - tests/data/test_data.py | 1 - tests/docs/test_configurations.py | 7 +- tests/docs/test_detectors.py | 1 - tests/functional/test_emails_generic.py | 1 - .../huggingface/test_basic_classification.py | 4 +- tests/regex/test_builtin_regex.py | 8 +- 26 files changed, 237 insertions(+), 189 deletions(-) create mode 100644 .flake8 diff --git a/.flake8 b/.flake8 new file mode 100644 index 0000000..4173d11 --- /dev/null +++ b/.flake8 @@ -0,0 +1,28 @@ +[flake8] +extend-ignore = + # Line too long + E501 + # Whitespace issues + D2 + # Quote issues + D3 + # Docstring Content Issues + D4 + # Missing docstring in public module + D100 + # Missing docstring in public package + D104 + # Missing docstring in magic method + D105 + # Missing docstring in public nested class + D106 + # Missing docstring in __init__ + D107 + # Line break occurred before a binary operator + W503 + # Whitespace before ':' + E203 +extend-exclude = + tests/**, + docs/** +max-line-length = 119 diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index e3c6410..b71d547 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -2,7 +2,7 @@ default_language_version: python: python3 repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v3.2.0 + rev: v4.5.0 hooks: - id: check-ast - id: check-byte-order-marker @@ -11,7 +11,9 @@ repos: - id: check-executables-have-shebangs - id: check-json - id: check-yaml - exclude: ^chart/ + # Ignore mkdocs because the linter fails on the pymdownx specific + # syntax to inject Python code from configuration. + exclude: mkdocs.yml - id: debug-statements - id: end-of-file-fixer exclude: ^(docs/|gdocs/) @@ -24,42 +26,44 @@ repos: args: ['--maxkb=500'] - id: no-commit-to-branch args: ['--branch', 'master', '--branch', 'develop'] + - repo: https://github.com/psf/black - rev: 21.12b0 + rev: 23.12.1 hooks: - id: black - args: [--line-length=120] additional_dependencies: ['click==8.0.4'] + - repo: https://github.com/pre-commit/mirrors-mypy - rev: 'v0.931' + rev: 'v1.8.0' hooks: - id: mypy - args: [--ignore-missing-imports, --disallow-untyped-defs, --show-error-codes, --no-site-packages] files: ^melusine + - repo: https://github.com/PyCQA/flake8 - rev: 6.0.0 + rev: 7.0.0 hooks: - id: flake8 - exclude: '^tests/|^docs' - args: ['--ignore=E501,D2,D3,D4,D104,D100,D106,D107,W503,D105,E203'] additional_dependencies: [ flake8-docstrings, "flake8-bugbear==22.8.23" ] + - repo: https://github.com/pre-commit/mirrors-isort - rev: v5.4.2 + rev: v5.10.1 hooks: - id: isort - args: ["--profile", "black", "-l", "120"] + - repo: https://github.com/asottile/pyupgrade - rev: v2.7.2 + rev: v3.15.0 hooks: - id: pyupgrade args: [--py37-plus] + - repo: https://github.com/asottile/blacken-docs - rev: v1.8.0 + rev: 1.16.0 hooks: - id: blacken-docs - additional_dependencies: [black==21.12b0] + additional_dependencies: [black>=22.1] + - repo: https://github.com/compilerla/conventional-pre-commit - rev: v2.1.1 + rev: v3.0.0 hooks: - id: conventional-pre-commit stages: [commit-msg] diff --git a/melusine/__init__.py b/melusine/__init__.py index ebf0c26..5c4fd1b 100644 --- a/melusine/__init__.py +++ b/melusine/__init__.py @@ -2,11 +2,11 @@ Top-level package. """ import logging -import pandas as pd - from ctypes import CDLL, cdll from typing import Any, Optional +import pandas as pd + from melusine._config import config __all__ = ["config"] diff --git a/melusine/backend/active_backend.py b/melusine/backend/active_backend.py index 1844b00..be9deff 100644 --- a/melusine/backend/active_backend.py +++ b/melusine/backend/active_backend.py @@ -102,7 +102,7 @@ def apply_transform( **kwargs, ) - def copy(self, data: Any, fields: List[str] = None) -> Any: + def copy(self, data: Any, fields: Optional[List[str]] = None) -> Any: """ Method to make a copy of the input dataset. @@ -136,7 +136,7 @@ def get_fields(self, data: Any) -> List[str]: """ return self.backend.get_fields(data=data) - def add_fields(self, left: Any, right: Any, fields: List[str] = None) -> Any: + def add_fields(self, left: Any, right: Any, fields: Optional[List[str]] = None) -> Any: """ Method to add fields from the right object to the left object diff --git a/melusine/backend/base_backend.py b/melusine/backend/base_backend.py index 42ac450..16e88f7 100644 --- a/melusine/backend/base_backend.py +++ b/melusine/backend/base_backend.py @@ -51,7 +51,7 @@ def apply_transform( """ @abstractmethod - def add_fields(self, left: Any, right: Any, fields: List[str] = None) -> Any: + def add_fields(self, left: Any, right: Any, fields: Optional[List[str]] = None) -> Any: """ Method to add fields form the right object to the left object. @@ -71,7 +71,7 @@ def add_fields(self, left: Any, right: Any, fields: List[str] = None) -> Any: """ @abstractmethod - def copy(self, data: Any, fields: List[str] = None) -> Any: + def copy(self, data: Any, fields: Optional[List[str]] = None) -> Any: """ Method to make a copy of the dataset. diff --git a/melusine/backend/dict_backend.py b/melusine/backend/dict_backend.py index 1c34069..5a2fcb2 100644 --- a/melusine/backend/dict_backend.py +++ b/melusine/backend/dict_backend.py @@ -80,7 +80,9 @@ def apply_transform( return data - def add_fields(self, left: Dict[str, Any], right: Dict[str, Any], fields: List[str] = None) -> Dict[str, Any]: + def add_fields( + self, left: Dict[str, Any], right: Dict[str, Any], fields: Optional[List[str]] = None + ) -> Dict[str, Any]: """ Method to add fields form the right object to the left object. @@ -106,7 +108,7 @@ def add_fields(self, left: Dict[str, Any], right: Dict[str, Any], fields: List[s return left - def copy(self, data: Dict[str, Any], fields: List[str] = None) -> Dict[str, Any]: + def copy(self, data: Dict[str, Any], fields: Optional[List[str]] = None) -> Dict[str, Any]: """ Method to make a copy of the dataset. diff --git a/melusine/backend/pandas_backend.py b/melusine/backend/pandas_backend.py index 267d1b3..9228d5e 100644 --- a/melusine/backend/pandas_backend.py +++ b/melusine/backend/pandas_backend.py @@ -238,7 +238,7 @@ def apply_transform_multiprocessing( def apply_joblib_dataframe( df: pd.DataFrame, func: Callable, - expand: str = None, + expand: Optional[str] = None, progress_bar: bool = False, **kwargs: Any, ) -> pd.DataFrame: @@ -260,7 +260,7 @@ def apply_joblib_dataframe( def apply_joblib_series( s: pd.Series, func: Callable, - expand: str = None, + expand: Optional[str] = None, progress_bar: bool = False, **kwargs: Any, ) -> pd.DataFrame: @@ -280,7 +280,7 @@ def apply_joblib_series( return result - def add_fields(self, left: pd.DataFrame, right: pd.DataFrame, fields: List[str] = None) -> pd.DataFrame: + def add_fields(self, left: pd.DataFrame, right: pd.DataFrame, fields: Optional[List[str]] = None) -> pd.DataFrame: """ Method to add fields form the right object to the left object. @@ -302,7 +302,7 @@ def add_fields(self, left: pd.DataFrame, right: pd.DataFrame, fields: List[str] return left - def copy(self, data: pd.DataFrame, fields: List[str] = None) -> pd.DataFrame: + def copy(self, data: pd.DataFrame, fields: Optional[List[str]] = None) -> pd.DataFrame: """ Method to make a copy of the dataset. diff --git a/melusine/base.py b/melusine/base.py index 4890ddf..f586db5 100644 --- a/melusine/base.py +++ b/melusine/base.py @@ -18,7 +18,7 @@ import logging import re from abc import ABC, abstractmethod -from typing import Any, Callable, Dict, Iterable, List, Optional, TypeVar, Union +from typing import Any, Callable, Dict, Iterable, TypeVar, Union import pandas as pd from sklearn.base import BaseEstimator, TransformerMixin @@ -55,9 +55,9 @@ class MelusineTransformer(BaseEstimator, TransformerMixin, IoMixin): def __init__( self, - input_columns: Union[str, Iterable[str]], - output_columns: Union[str, Iterable[str]], - func: Optional[Callable] = None, + input_columns: str | Iterable[str], + output_columns: str | Iterable[str], + func: Callable | None = None, ) -> None: """ Attribute initialization. @@ -73,12 +73,12 @@ def __init__( """ IoMixin.__init__(self) - self.input_columns: List[str] = self.parse_column_list(input_columns) - self.output_columns: List[str] = self.parse_column_list(output_columns) + self.input_columns: list[str] = self.parse_column_list(input_columns) + self.output_columns: list[str] = self.parse_column_list(output_columns) self.func = func @staticmethod - def parse_column_list(columns: Union[str, Iterable[str]]) -> List[str]: + def parse_column_list(columns: str | Iterable[str]) -> list[str]: """ Transform a string into a list with a single element. @@ -89,7 +89,7 @@ def parse_column_list(columns: Union[str, Iterable[str]]) -> List[str]: Returns ------- - _: List[str] + _: list[str] A list of column names. """ # Change string into list of strings if necessary @@ -142,8 +142,8 @@ class BaseMelusineDetector(MelusineTransformer, ABC): def __init__( self, name: str, - input_columns: List[str], - output_columns: List[str], + input_columns: list[str], + output_columns: list[str], ): """ Attributes initialization. @@ -185,13 +185,13 @@ def debug_dict_col(self) -> str: @property @abstractmethod - def transform_methods(self) -> List[Callable]: + def transform_methods(self) -> list[Callable]: """ Specify the sequence of methods to be called by the transform method. Returns ------- - _: List[Callable] + _: list[Callable] List of methods to be called by the transform method. """ @@ -260,8 +260,8 @@ def validate_input_fields(self, data: MelusineDataset) -> None: data: MelusineDataset Input data. """ - input_fields: List[str] = backend.get_fields(data) - missing_fields: List[str] = [x for x in self.input_columns if x not in input_fields] + input_fields: list[str] = backend.get_fields(data) + missing_fields: list[str] = [x for x in self.input_columns if x not in input_fields] if missing_fields: raise MissingFieldError(f"Fields {missing_fields} are missing from the input data") @@ -277,13 +277,13 @@ class MelusineDetector(BaseMelusineDetector, ABC): """ @property - def transform_methods(self) -> List[Callable]: + def transform_methods(self) -> list[Callable]: """ Specify the sequence of methods to be called by the transform method. Returns ------- - _: List[Callable] + _: list[Callable] List of methods to be called by the transform method. """ return [self.pre_detect, self.detect, self.post_detect] @@ -343,7 +343,7 @@ def regex_name(self) -> str: @property @abstractmethod - def positive(self) -> Union[Dict[str, str], str]: + def positive(self) -> dict[str, str] | str: """ Define regex patterns required to activate the MelusineRegex. @@ -352,7 +352,7 @@ def positive(self) -> Union[Dict[str, str], str]: """ @property - def neutral(self) -> Optional[Union[Dict[str, str], str]]: + def neutral(self) -> dict[str, str] | str | None: """ Define regex patterns to be ignored when running detection. @@ -362,7 +362,7 @@ def neutral(self) -> Optional[Union[Dict[str, str], str]]: return None @property - def negative(self) -> Optional[Union[Dict[str, str], str]]: + def negative(self) -> dict[str, str] | str | None: """ Define regex patterns prohibited to activate the MelusineRegex. @@ -373,7 +373,7 @@ def negative(self) -> Optional[Union[Dict[str, str], str]]: @property @abstractmethod - def match_list(self) -> List[str]: + def match_list(self) -> list[str]: """ List of texts that should activate the MelusineRegex. @@ -383,7 +383,7 @@ def match_list(self) -> List[str]: @property @abstractmethod - def no_match_list(self) -> List[str]: + def no_match_list(self) -> list[str]: """ List of texts that should NOT activate the MelusineRegex. @@ -392,8 +392,8 @@ def no_match_list(self) -> List[str]: """ def _get_match( - self, text: str, base_regex: Union[str, Dict[str, str]], regex_group: Optional[str] = None - ) -> Dict[str, List[Dict[str, Any]]]: + self, text: str, base_regex: str | dict[str, str], regex_group: str | None = None + ) -> dict[str, list[dict[str, Any]]]: """ Run specified regex on the input text and return a dict with matching group as key. @@ -435,7 +435,7 @@ def _get_match( def ignore_text( self, text: str, - match_data_dict: Dict[str, List[Dict[str, Any]]], + match_data_dict: dict[str, list[dict[str, Any]]], ) -> str: """ Replace neutral regex match text with substitution text to ignore it. @@ -471,7 +471,7 @@ def get_match_result(self, text: str) -> bool: result = self(text) return result[self.MATCH_RESULT] - def __call__(self, text: str) -> Dict[str, Any]: + def __call__(self, text: str) -> dict[str, Any]: """ Apply MelusineRegex patterns (neutral, negative and positive) on the input text. Return a detailed output of the match results as a dict. @@ -519,7 +519,7 @@ def describe(self, text: str, position: bool = False) -> None: position: If True, print regex match start and stop positions. """ - def _describe_match_field(match_field_data: Dict[str, List[Dict[str, Any]]]) -> None: + def _describe_match_field(match_field_data: dict[str, list[dict[str, Any]]]) -> None: """ Format and print result description text. diff --git a/melusine/connectors/exchange.py b/melusine/connectors/exchange.py index 3018cc3..df97a4d 100644 --- a/melusine/connectors/exchange.py +++ b/melusine/connectors/exchange.py @@ -29,12 +29,12 @@ def __init__( mailbox_address: str, credentials: Credentials, config: Configuration, - routing_folder_path: str = None, - correction_folder_path: str = None, - done_folder_path: str = None, + routing_folder_path: Optional[str] = None, + correction_folder_path: Optional[str] = None, + done_folder_path: Optional[str] = None, target_column: str = "target", - account_args: Dict[str, Any] = None, - sender_address: str = None, + account_args: Optional[Dict[str, Any]] = None, + sender_address: Optional[str] = None, ): """ Parameters @@ -238,7 +238,7 @@ def correction_folder_path(self, correction_folder_path: str) -> None: folder_path = self._get_folder_path(self.correction_folder) logger.info(f"Correction folder path set to '{folder_path}'") - def create_folders(self, folder_list: List[str], base_folder_path: str = None) -> None: + def create_folders(self, folder_list: List[str], base_folder_path: Optional[str] = None) -> None: """Create folders in the mailbox. Parameters @@ -267,7 +267,7 @@ def create_folders(self, folder_list: List[str], base_folder_path: str = None) - def get_emails( self, max_emails: int = 100, - base_folder_path: str = None, + base_folder_path: Optional[str] = None, ascending: bool = True, ) -> pd.DataFrame: """ @@ -495,7 +495,7 @@ def move_to_done(self, emails_id: List[str]) -> None: self.mailbox_account.bulk_move(ids=items, to_folder=self.done_folder, chunk_size=5) logger.info(f"Moved {n_items} corrected emails to the folder {self.done_folder_path}") - def list_subfolders(self, base_folder_path: str = None) -> List[str]: + def list_subfolders(self, base_folder_path: Optional[str] = None) -> List[str]: """ List the sub-folders of the specified folder. diff --git a/melusine/data/_data_loader.py b/melusine/data/_data_loader.py index 333e456..998708a 100644 --- a/melusine/data/_data_loader.py +++ b/melusine/data/_data_loader.py @@ -1,6 +1,7 @@ -import pandas as pd from pathlib import Path +import pandas as pd + def load_email_data() -> pd.DataFrame: """ diff --git a/melusine/detectors.py b/melusine/detectors.py index fec6b31..e6a10c7 100644 --- a/melusine/detectors.py +++ b/melusine/detectors.py @@ -9,7 +9,13 @@ from melusine.base import MelusineDetector, MelusineItem, MelusineRegex from melusine.message import Message -from melusine.regex import EmergencyRegex, ReplyRegex, ThanksRegex, TransferRegex, VacationReplyRegex +from melusine.regex import ( + EmergencyRegex, + ReplyRegex, + ThanksRegex, + TransferRegex, + VacationReplyRegex, +) class ThanksDetector(MelusineDetector): diff --git a/melusine/io/_classes.py b/melusine/io/_classes.py index 2daec83..02c6c6e 100644 --- a/melusine/io/_classes.py +++ b/melusine/io/_classes.py @@ -6,7 +6,7 @@ from __future__ import annotations import logging -from typing import Any, Dict, List, Optional, Type, TypeVar +from typing import Any, TypeVar from melusine import config @@ -28,13 +28,13 @@ class IoMixin: def __init__(self, **kwargs: Any): """Initialize attribute.""" - self.json_exclude_list: List[str] = ["_func", "json_exclude_list"] + self.json_exclude_list: list[str] = ["_func", "json_exclude_list"] @classmethod def from_config( - cls: Type[T], - config_key: Optional[str] = None, - config_dict: Optional[Dict[str, Any]] = None, + cls: type[T], + config_key: str | None = None, + config_dict: dict[str, Any] | None = None, **kwargs: Any, ) -> T: """ @@ -44,7 +44,7 @@ def from_config( ---------- config_key: str Configuration key. - config_dict: Dict[str, Any] + config_dict: dict[str, Any] Dictionary of config. kwargs: Any @@ -69,13 +69,13 @@ def from_config( return cls.from_dict(**config_dict) @classmethod - def from_dict(cls: Type[T], **params_dict: Dict[str, Any]) -> T: + def from_dict(cls: type[T], **params_dict: dict[str, Any]) -> T: """ Method to instantiate a class based a dict object. Parameters ---------- - params_dict: Dict[str, Any] + params_dict: dict[str, Any] Parameters dict. Returns diff --git a/melusine/message.py b/melusine/message.py index 08af8dc..97d5c01 100644 --- a/melusine/message.py +++ b/melusine/message.py @@ -82,7 +82,9 @@ def str_line_length(self) -> int: else: return config["message"].get("str_line_length", self.DEFAULT_STR_LINE_LENGTH) - def extract_parts(self, target_tags: Iterable[str] = None, stop_at: Iterable[str] = None) -> List[Tuple[str, str]]: + def extract_parts( + self, target_tags: Optional[Iterable[str]] = None, stop_at: Optional[Iterable[str]] = None + ) -> List[Tuple[str, str]]: """ Function to extract target tags from the message. diff --git a/melusine/pipeline.py b/melusine/pipeline.py index f714c0b..6888ae2 100644 --- a/melusine/pipeline.py +++ b/melusine/pipeline.py @@ -7,7 +7,7 @@ import copy import importlib -from typing import Dict, Iterable, List, Optional, Set, Tuple, TypeVar +from typing import Iterable, TypeVar from sklearn.pipeline import Pipeline @@ -42,8 +42,8 @@ class MelusinePipeline(Pipeline): def __init__( self, - steps: List[Tuple[str, MelusineTransformer]], - memory: Optional[bool] = None, + steps: list[tuple[str, MelusineTransformer]], + memory: bool | None = None, verbose: bool = False, ) -> None: """ @@ -64,7 +64,7 @@ def __init__( self.verbose = verbose @property - def input_columns(self) -> List[str]: + def input_columns(self) -> list[str]: """ Input fields of the Pipeline. @@ -73,7 +73,7 @@ def input_columns(self) -> List[str]: _: List[str] List of input fields. """ - column_set: Set[str] = set() + column_set: set[str] = set() for _, step in self.steps: # UNION between sets column_set |= set(step.input_columns) @@ -81,7 +81,7 @@ def input_columns(self) -> List[str]: return list(column_set) @property - def output_columns(self) -> List[str]: + def output_columns(self) -> list[str]: """ Output fields of the Pipeline. @@ -90,14 +90,14 @@ def output_columns(self) -> List[str]: _: List[str] List of output fields. """ - column_set: Set[str] = set() + column_set: set[str] = set() for _, step in self.steps: column_set |= set(step.output_columns) return list(column_set) @classmethod - def get_obj_class(cls, obj_params: Dict[str, Any]) -> Any: + def get_obj_class(cls, obj_params: dict[str, Any]) -> Any: """ Get the class object of an instance. @@ -142,7 +142,7 @@ def import_class(obj_class_name: str, obj_module: str) -> Any: return obj_class @classmethod - def flatten_pipeline_config(cls, conf: Dict[str, Any]) -> Dict[str, Any]: + def flatten_pipeline_config(cls, conf: dict[str, Any]) -> dict[str, Any]: """ Flatten nested Melusine Pipelines. @@ -158,7 +158,7 @@ def flatten_pipeline_config(cls, conf: Dict[str, Any]) -> Dict[str, Any]: _: Dict[str, Any] Flattened conf. """ - new_conf: List[Any] = list() + new_conf: list[Any] = list() for step in conf[cls.STEPS_KEY]: if step.get(cls.OBJ_CLASS, "") == cls.__name__: subpipeline_conf = cls.flatten_pipeline_config(step["parameters"]) @@ -171,7 +171,7 @@ def flatten_pipeline_config(cls, conf: Dict[str, Any]) -> Dict[str, Any]: @classmethod def from_config( - cls, config_key: Optional[str] = None, config_dict: Optional[Dict[str, Any]] = None, **kwargs: Any + cls, config_key: str | None = None, config_dict: dict[str, Any] | None = None, **kwargs: Any ) -> MelusinePipeline: """ Instantiate a MelusinePipeline from a config key. @@ -233,7 +233,7 @@ def from_config( return cls(steps=steps, **init_params) @classmethod - def validate_step_config(cls, step: Dict[str, Any]) -> Dict[str, Any]: + def validate_step_config(cls, step: dict[str, Any]) -> dict[str, Any]: """ Validate a pipeline step configuration. @@ -276,7 +276,7 @@ def validate_step_config(cls, step: Dict[str, Any]) -> Dict[str, Any]: } @classmethod - def validate_pipeline_config(cls, pipeline_conf: Dict[str, Any]) -> Dict[str, Any]: + def validate_pipeline_config(cls, pipeline_conf: dict[str, Any]) -> dict[str, Any]: """ Validate a pipeline configuration. @@ -288,7 +288,7 @@ def validate_pipeline_config(cls, pipeline_conf: Dict[str, Any]) -> Dict[str, An ------- _: Validated pipeline configuration. """ - validated_pipeline_conf: Dict[str, Any] = {cls.STEPS_KEY: []} + validated_pipeline_conf: dict[str, Any] = {cls.STEPS_KEY: []} steps = pipeline_conf.get(cls.STEPS_KEY) if not steps or not isinstance(steps, list): @@ -302,7 +302,7 @@ def validate_pipeline_config(cls, pipeline_conf: Dict[str, Any]) -> Dict[str, An return validated_pipeline_conf @classmethod - def parse_pipeline_config(cls, config_dict: Dict[str, Any]) -> Dict[str, Any]: + def parse_pipeline_config(cls, config_dict: dict[str, Any]) -> dict[str, Any]: """ Parse config dict to replace config key by the associated configurations. @@ -345,7 +345,7 @@ def parse_pipeline_config(cls, config_dict: Dict[str, Any]) -> Dict[str, Any]: return MelusinePipeline.flatten_pipeline_config(config_dict) @classmethod - def get_config_from_key(cls, config_key: str) -> Dict[str, Any]: + def get_config_from_key(cls, config_key: str) -> dict[str, Any]: """ Parse config dict to replace config key by the associated configurations. @@ -370,7 +370,7 @@ def validate_input_fields(self, data: Any) -> None: data: Any Input data. """ - active_fields: Set[str] = set(backend.get_fields(data)) + active_fields: set[str] = set(backend.get_fields(data)) for step_name, step in self.steps: difference = set(step.input_columns).difference(active_fields) diff --git a/melusine/processors.py b/melusine/processors.py index 82c0d17..eda1ff0 100644 --- a/melusine/processors.py +++ b/melusine/processors.py @@ -25,7 +25,7 @@ import unicodedata from abc import abstractmethod from re import Pattern -from typing import Any, Dict, Iterable, List, Optional, Sequence, Set, Tuple, Union +from typing import Any, Iterable, Sequence, Union import arrow @@ -82,18 +82,18 @@ def __init__( # Fix newlines self.fix_newlines = fix_newlines - def normalize_message(self, message_list: List[Message]) -> List[Message]: + def normalize_message(self, message_list: list[Message]) -> list[Message]: """ Normalize the text of a message. Parameters ---------- - message_list: List[Message] + message_list: list[Message] Input message list Returns ------- - _: List[Message] + _: list[Message] Normalized message list """ @@ -162,9 +162,9 @@ class RegexTokenizer(MelusineTransformer): def __init__( self, tokenizer_regex: str = r"\w+(?:[\?\-\"_]\w+)*", - stopwords: List[str] = None, + stopwords: list[str] | None = None, lowercase: bool = True, - normalization_form: Optional[str] = None, + normalization_form: str | None = None, input_columns: str = "text", output_columns: str = "tokens", ): @@ -309,7 +309,7 @@ def create_segmentation_regex_list() -> Iterable[str]: """ @staticmethod - def compile_regex_from_list(regex_list: Iterable[str], flags: Union[int, re.RegexFlag] = re.M) -> Pattern: + def compile_regex_from_list(regex_list: Iterable[str], flags: int | re.RegexFlag = re.M) -> Pattern: """ Method to create a meta-regex from a list of regexs. @@ -333,18 +333,18 @@ def compile_regex_from_list(regex_list: Iterable[str], flags: Union[int, re.Rege return re.compile(regex, flags=flags) - def create_messages(self, match_list: List[str]) -> List[Message]: + def create_messages(self, match_list: list[str]) -> list[Message]: """ Method to create Message instances based on the segmented email data. Parameters ---------- - match_list: List[str] + match_list: list[str] List of text elements matched by the segmentation regex Returns ------- - _: List[Message] + _: list[Message] """ # Create first message meta based on email meta first_message_meta = "" @@ -379,7 +379,7 @@ def create_messages(self, match_list: List[str]) -> List[Message]: return [Message(text=text, meta=meta) for text, meta in zip(text_list, meta_list)] - def segment_text(self, text: str) -> List[Message]: + def segment_text(self, text: str) -> list[Message]: """ Method to segment a conversation by splitting the text on transition patterns. Ex: @@ -395,7 +395,7 @@ def segment_text(self, text: str) -> List[Message]: Returns ------- - _: List[Message] + _: list[Message] List of messages """ # Strip start / end characters @@ -532,7 +532,7 @@ class BaseExtractor(MelusineTransformer): def __init__( self, - input_columns: Union[str, Iterable[str]], + input_columns: str | Iterable[str], output_columns: str, ): """ @@ -550,13 +550,13 @@ def __init__( ) @abstractmethod - def extract(self, message_list: List[Message]) -> Any: + def extract(self, message_list: list[Message]) -> Any: """ Method to extract data from a list of messages. Parameters ---------- - message_list: List[Message] + message_list: list[Message] List of Messages Returns @@ -575,10 +575,10 @@ def __init__( self, input_columns: str = "messages", output_columns: str = "last_message", - include_tags: List[str] = None, - exclude_tags: List[str] = None, + include_tags: list[str] | None = None, + exclude_tags: list[str] | None = None, sep: str = "\n", - n_messages: Union[int, None] = 1, + n_messages: int | None = 1, stop_at: Iterable[str] = ("GREETINGS",), ): """ @@ -588,15 +588,15 @@ def __init__( Input columns for the transform operation output_columns: str Outputs columns for the transform operation - include_tags: List[str] + include_tags: list[str] Message tags to be included in the text extraction - exclude_tags: List[str] + exclude_tags: list[str] Message tags to be excluded from the text extraction sep: str Separation symbol to join text parts n_messages: Union[int, None] Number of messages to take into account (starting with the latest) - stop_at: List[str] + stop_at: list[str] When stop_at tags are encountered, stop extracting text of the message """ super().__init__( @@ -613,13 +613,13 @@ def __init__( self.n_messages = n_messages self.stop_at = stop_at - def extract(self, message_list: List[Message]) -> str: + def extract(self, message_list: list[Message]) -> str: """ Method to extract text parts from a list of messages. Parameters ---------- - message_list: List[Message] + message_list: list[Message] Input message list Returns @@ -670,7 +670,7 @@ class TokensExtractor(BaseExtractor): def __init__( self, - input_columns: Union[str, Iterable[str]] = ("header_tokens", "body_tokens"), + input_columns: str | Iterable[str] = ("header_tokens", "body_tokens"), output_columns: str = "tokens", sep_token: str = "[PAD]", pad_size: int = 5, @@ -691,7 +691,7 @@ def __init__( self.sep_token = sep_token self.pad_size = pad_size - def extract(self, row: MelusineDataset) -> List[str]: + def extract(self, row: MelusineDataset) -> list[str]: """ Method to extract tokens from different columns of a DataFrame. @@ -702,7 +702,7 @@ def extract(self, row: MelusineDataset) -> List[str]: Returns ------- - _: List[str] + _: list[str] List of extracted tokens """ @@ -741,7 +741,7 @@ def __init__( self, input_columns: str = "messages", output_columns: str = "messages", - tag_list: List[str] = None, + tag_list: list[str] | None = None, default_tag: str = "BODY", valid_part_regex: str = r"[a-z0-9?]", default_regex_flag: int = re.IGNORECASE, @@ -753,7 +753,7 @@ def __init__( ---------- input_columns: str output_columns: str - tag_list: List[str] + tag_list: list[str] (Ordered) List of tags to look for default_tag: str Tag given to arbitrary text parts @@ -867,29 +867,29 @@ def compile_split_pattern() -> re.Pattern: return re.compile(sentence_split_pattern) @classmethod - def get_tag_list(cls) -> List[str]: + def get_tag_list(cls) -> list[str]: """ Method to get the list of available tags. Returns ------- - _: List[str] + _: list[str] List of tags """ return [p for p in dir(cls) if isinstance(getattr(cls, p), Tag)] - def tag_email(self, messages: List[Message]) -> Union[List[Message], None]: + def tag_email(self, messages: list[Message]) -> list[Message] | None: """ Method to apply content tagging on an email (= List of Messages) Parameters ---------- - messages : List[Message] + messages : list[Message] List of messages Returns ------- - messages : List[Message] + messages : list[Message] List of messages after content tagging """ if not messages: @@ -938,14 +938,14 @@ def compile_tag_regex(self, tag: str) -> re.Pattern: raise ValueError( f"Tag {tag} does not return any of the supported types : " "str " - "List[str] " + "list[str] " "re.Pattern " f"Got {type(regex)} instead." ) return regex - def tag_text(self, text: str) -> List[Tuple[str, str]]: + def tag_text(self, text: str) -> list[tuple[str, str]]: """ Method to apply content tagging on a text. @@ -956,7 +956,7 @@ def tag_text(self, text: str) -> List[Tuple[str, str]]: Returns ------- - _: List[Tuple[str, str]] + _: list[tuple[str, str]] List of tag/text couples (ex: [("HELLO", "bonjour")]) """ parts = self.split_text(text) @@ -970,7 +970,7 @@ def tag_text(self, text: str) -> List[Tuple[str, str]]: return tags - def split_text(self, text: str) -> List[str]: + def split_text(self, text: str) -> list[str]: """ Method to split input text into sentences/parts using a regex. @@ -981,7 +981,7 @@ def split_text(self, text: str) -> List[str]: Returns ------- - _: List[str] + _: list[str] List of parts/sentences """ # Replace multiple spaces by single spaces @@ -1015,20 +1015,20 @@ def validate_part(self, text: str) -> bool: return bool(re.search(self.valid_part_regex, text, flags=re.I)) @staticmethod - def clean_up_after_split(parts: List[Union[str, None]]) -> List[str]: + def clean_up_after_split(parts: list[str | None]) -> list[str]: """ Clean up sentences after splitting. Typically, put punctuation back at the end of sentences. Parameters ---------- - parts: List[Union[str, None]] + parts: list[Union[str, None]] Returns ------- - clean_parts: List[str] + clean_parts: list[str] """ - clean_parts: List[str] = [] + clean_parts: list[str] = [] for part in parts: if not part: continue @@ -1044,7 +1044,7 @@ def clean_up_after_split(parts: List[Union[str, None]]) -> List[str]: return clean_parts - def tag_part(self, part: str) -> Tuple[str, str]: + def tag_part(self, part: str) -> tuple[str, str]: """ Method to apply tagging on a text chunk (sentence/part). @@ -1095,7 +1095,7 @@ def word_block(n_words: int, word_character_only: bool = False) -> str: return rf"(?:[ \-–]*(?:{positive}+(?:[ \-–]+{positive}+){{,{n_words - 1}}})? *)" - def __call__(self, text: str) -> List[Tuple[str, str, str]]: + def __call__(self, text: str) -> list[tuple[str, str, str]]: """ Method to find all regex patterns matching the input text. @@ -1106,7 +1106,7 @@ def __call__(self, text: str) -> List[Tuple[str, str, str]]: Returns ------- - match_list: List[Tuple[str, str]] + match_list: list[tuple[str, str]] List of matching regexes and associated tags """ full_match_list = list() @@ -1126,7 +1126,7 @@ def __call__(self, text: str) -> List[Tuple[str, str, str]]: return full_match_list - def find_matching_regex_patterns(self, part: str, regex: TagPattern) -> List[str]: + def find_matching_regex_patterns(self, part: str, regex: TagPattern) -> list[str]: """ Given a regex string, a regex pattern or a list of regexes. Find all matching patterns @@ -1153,18 +1153,18 @@ def find_matching_regex_patterns(self, part: str, regex: TagPattern) -> List[str return matching_regex_list @abstractmethod - def post_process_tags(self, tags: List[Tuple[str, str]]) -> List[Tuple[str, str]]: + def post_process_tags(self, tags: list[tuple[str, str]]) -> list[tuple[str, str]]: """ Method to apply tagging rules posterior to the standard regex tagging. Parameters ---------- - tags: List[Tuple[str, str]] + tags: list[tuple[str, str]] Original tags Returns ------- - _: List[Tuple[str, str]] + _: list[tuple[str, str]] Post-processed tags """ @@ -1190,7 +1190,7 @@ def __init__( self, input_columns: str = "messages", output_columns: str = "messages", - tag_list: List[str] = None, + tag_list: list[str] | None = None, default_tag: str = "BODY", valid_part_regex: str = r"[a-z0-9?]", default_regex_flag: int = re.IGNORECASE | re.MULTILINE, @@ -1202,7 +1202,7 @@ def __init__( ---------- input_columns: str output_columns: str - tag_list: List[str] + tag_list: list[str] (Ordered) List of tags to look for default_tag: str Tag given to arbitrary text parts @@ -1223,7 +1223,7 @@ def __init__( ) @Tag - def GREETINGS(self) -> Union[str, List[str], re.Pattern]: + def GREETINGS(self) -> str | list[str] | re.Pattern: """ Tag associated with email closure sentences. Watchout, this tag typically marks the end of a message. @@ -1265,7 +1265,7 @@ def GREETINGS(self) -> Union[str, List[str], re.Pattern]: ] @Tag - def HELLO(self) -> Union[str, List[str], re.Pattern]: + def HELLO(self) -> str | list[str] | re.Pattern: """ Tag associated with email opening sentences. Sentences that can be either opening or closing should be placed here. @@ -1328,7 +1328,7 @@ def HELLO(self) -> Union[str, List[str], re.Pattern]: ] @Tag - def PJ(self) -> Union[str, List[str], re.Pattern]: + def PJ(self) -> str | list[str] | re.Pattern: """ Tag associated with email attachment mentions. Ex: "See attached files" @@ -1340,7 +1340,7 @@ def PJ(self) -> Union[str, List[str], re.Pattern]: ] @Tag - def FOOTER(self) -> Union[str, List[str], re.Pattern]: + def FOOTER(self) -> str | list[str] | re.Pattern: """ Tag associated with email footer sentences. Ex: "Envoyé de mon iPhone" @@ -1417,7 +1417,7 @@ def FOOTER(self) -> Union[str, List[str], re.Pattern]: return diclaimer_regex_list + miscellaneous_footer_regex @Tag - def THANKS(self) -> Union[str, List[str], re.Pattern]: + def THANKS(self) -> str | list[str] | re.Pattern: """ Tag associated with email thanks sentences. Ex: "Merci beaucoup" @@ -1447,7 +1447,7 @@ def THANKS(self) -> Union[str, List[str], re.Pattern]: ] @Tag - def SIGNATURE(self) -> Union[str, List[str], re.Pattern]: + def SIGNATURE(self) -> str | list[str] | re.Pattern: """ Tag associated with email signature sentences. Ex: "Tel : 0600000000" @@ -1544,18 +1544,18 @@ def SIGNATURE(self) -> Union[str, List[str], re.Pattern]: r"^[A-Za-z]+(?: [A-Za-z]+)*, le \d{1,2} [A-Za-z]+ \d{4}.{,3}$", ] - def post_process_tags(self, tags: List[Tuple[str, str]]) -> List[Tuple[str, str]]: + def post_process_tags(self, tags: list[tuple[str, str]]) -> list[tuple[str, str]]: """ Method to apply tagging rules posterior to the standard regex tagging. Parameters ---------- - tags: List[Tuple[str, str]] + tags: list[tuple[str, str]] Original tags Returns ------- - _: List[Tuple[str, str]] + _: list[tuple[str, str]] Post-processed tags """ # Signature lines containing first/last name @@ -1563,19 +1563,19 @@ def post_process_tags(self, tags: List[Tuple[str, str]]) -> List[Tuple[str, str] return tags - def detect_name_signature(self, tags: List[Tuple[str, str]]) -> List[Tuple[str, str]]: + def detect_name_signature(self, tags: list[tuple[str, str]]) -> list[tuple[str, str]]: """ Method to detect lines containing First name / Surname Ex: Mr Joe Dupond Parameters ---------- - tags: List[Tuple[str, str]] + tags: list[tuple[str, str]] Original tags Returns ------- - _: List[Tuple[str, str]] + _: list[tuple[str, str]] Post processed tags """ # First name / Last name Signatures @@ -1587,9 +1587,9 @@ def detect_name_signature(self, tags: List[Tuple[str, str]]) -> List[Tuple[str, ) # Forbidden words (lowercase) - forbidden_words: Set[str] = {"urgent", "attention"} + forbidden_words: set[str] = {"urgent", "attention"} - new_tags: List[Tuple[str, str]] = list() + new_tags: list[tuple[str, str]] = list() for tag, text in tags: if tag == self.default_tag: match = re.match(line_with_name, text) @@ -1671,7 +1671,7 @@ def meta_email_address_regex(self) -> str: return meta_pattern - def process_transfered_mail(self, message_list: List[Message]) -> Tuple[List[Message], Optional[str]]: + def process_transfered_mail(self, message_list: list[Message]) -> tuple[list[Message], str | None]: """ Run all transformations related to transfer emails. @@ -1682,14 +1682,14 @@ def process_transfered_mail(self, message_list: List[Message]) -> Tuple[List[Mes message_list: List of messages in the conversation clean_address_from: Processed sender email address """ - clean_address_from: Optional[str] = None + clean_address_from: str | None = None # Filter out transfer message (contains only irrelevant tags) message_list = self.filter_message_list(message_list) # Extract email address data from transition pattern top_message = message_list[0] - extracted_address_from: Optional[str] = self.extract_email_address(top_message) + extracted_address_from: str | None = self.extract_email_address(top_message) # If no address if extracted_address_from: @@ -1697,7 +1697,7 @@ def process_transfered_mail(self, message_list: List[Message]) -> Tuple[List[Mes return message_list, clean_address_from - def extract_email_address(self, message: Message) -> Optional[str]: + def extract_email_address(self, message: Message) -> str | None: """ Extract sender email address from message meta (transition pattern). @@ -1722,7 +1722,7 @@ def extract_email_address(self, message: Message) -> Optional[str]: return extracted_address_from - def filter_message_list(self, message_list: List[Message]) -> List[Message]: + def filter_message_list(self, message_list: list[Message]) -> list[Message]: """ """ top_message = message_list[0] @@ -1743,7 +1743,7 @@ class DeterministicTextFlagger(MelusineTransformer): def __init__( self, - text_flags: Dict[str, Any], + text_flags: dict[str, Any], input_columns: str = "text", output_columns: str = "text", remove_multiple_spaces: bool = True, @@ -1752,7 +1752,7 @@ def __init__( """ Parameters ---------- - text_flags: Dict[str, Any] + text_flags: dict[str, Any] Dict containing flag name as key and regex pattern as value add_spaces: bool If true, add spaces around flags @@ -1771,7 +1771,7 @@ def __init__( @staticmethod def default_flag_text( text: str, - flag_dict: Dict[str, str], + flag_dict: dict[str, str], add_spaces: bool = True, remove_multiple_spaces: bool = True, ) -> str: @@ -1782,7 +1782,7 @@ def default_flag_text( Parameters ---------- - flag_dict: Dict[str, str] + flag_dict: dict[str, str] Flagging dict with regex as key and replace_text as value text: str Text to be flagged @@ -1849,14 +1849,14 @@ class Cleaner(MelusineTransformer): def __init__( self, - substitutions: Dict[str, Any], + substitutions: dict[str, Any], input_columns: str = "text", output_columns: str = "text", ): """ Parameters ---------- - substitutions: Dict[str, Any] + substitutions: dict[str, Any] Dict containing replace pattern and replacement value """ super().__init__( @@ -1971,7 +1971,7 @@ def __init__( ) @classmethod - def parse_date_to_iso(cls, date_: str) -> Optional[str]: + def parse_date_to_iso(cls, date_: str) -> str | None: """ This function use the package arrow to convert a date from string format with any type of format (i.e. vendredi 8 juillet 2020 -> 2020-07-08) @@ -1990,7 +1990,7 @@ def parse_date_to_iso(cls, date_: str) -> Optional[str]: date_ as string with iso format (YYYY-MM-DD) """ # Initialization - matched_group: Optional[str] = None + matched_group: str | None = None date_ = date_ or "" date_ = date_.lower() @@ -2034,7 +2034,7 @@ def process_single_digit(matched_group: str, pattern: str) -> str: return matched_group @classmethod - def convert_to_iso_format(cls, matched_group: str, format_: str) -> Optional[str]: + def convert_to_iso_format(cls, matched_group: str, format_: str) -> str | None: """ Try to convert the date found as any string form to ISO format """ diff --git a/pyproject.toml b/pyproject.toml index 9a5a19f..67f789e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -59,6 +59,15 @@ readme = {file = ["README.md"]} [tool.black] line-length = 120 +[tool.isort] +profile = "black" + +[tool.mypy] +ignore_missing_imports = true +disallow_untyped_defs = false +hide_error_codes = false +no_site_packages = true + [tool.coverage.run] omit = [ # omit init files in docs_src diff --git a/tests/base/test_base_logging.py b/tests/base/test_base_logging.py index 772eea7..4471866 100644 --- a/tests/base/test_base_logging.py +++ b/tests/base/test_base_logging.py @@ -1,4 +1,5 @@ import logging + from melusine.data import load_email_data from melusine.pipeline import MelusinePipeline diff --git a/tests/base/test_melusine_detectors.py b/tests/base/test_melusine_detectors.py index 1b379f6..44070e5 100644 --- a/tests/base/test_melusine_detectors.py +++ b/tests/base/test_melusine_detectors.py @@ -21,7 +21,6 @@ def df_method(self, df, debug_mode=False): def test_detector_transform_dataframe_wise(): - df = pd.DataFrame([{"input_col": "test0"}, {"input_col": "test1"}]) detector = MyDetector(name="test_detector", input_columns=["input_col"], output_columns=["row_output", "df_output"]) df = detector.transform(df) diff --git a/tests/base/test_message.py b/tests/base/test_message.py index 4cea0c2..3db9d60 100644 --- a/tests/base/test_message.py +++ b/tests/base/test_message.py @@ -5,7 +5,6 @@ def test_message_repr(): - message = Message(text="Hello") assert re.search(r"meta='NA'", repr(message)) @@ -18,7 +17,6 @@ def test_message_repr(): def test_message_has_tags(): - message = Message(text="Hello") message.tags = [ ("HELLO", "Bonjour"), @@ -32,7 +30,6 @@ def test_message_has_tags(): def test_message_has_tags_stop_at(): - message = Message(text="Hello") message.tags = [ ("HELLO", "Bonjour"), @@ -44,14 +41,12 @@ def test_message_has_tags_stop_at(): def test_message_has_tags_no_tags(): - message = Message(text="Hello") assert not message.has_tags(target_tags=["BODY"]) def test_message_extract_parts(): - message = Message(text="Hello") message.tags = [ ("HELLO", "Bonjour"), @@ -67,7 +62,6 @@ def test_message_extract_parts(): def test_message_extract_parts_stop(): - message = Message(text="Hello") message.tags = [ ("HELLO", "Bonjour"), @@ -88,7 +82,6 @@ def test_message_extract_parts_no_tags(): def test_message_extract_last_body(): - message = Message(text="Hello") message.tags = [ ("HELLO", "Bonjour"), diff --git a/tests/conf/test_config.py b/tests/conf/test_config.py index 69fe595..b9f435e 100644 --- a/tests/conf/test_config.py +++ b/tests/conf/test_config.py @@ -10,7 +10,6 @@ def test_load_default_conf(caplog): with caplog.at_level(logging.WARNING): - test_conf = MelusineConfig() test_conf.reset() @@ -19,7 +18,6 @@ def test_load_default_conf(caplog): def test_load_conf_from_env_variable(caplog): - try: with caplog.at_level(logging.INFO): test_conf = MelusineConfig() @@ -40,7 +38,6 @@ def test_load_conf_from_env_variable(caplog): def test_load_conf_from_config_path(caplog): - with caplog.at_level(logging.INFO): test_conf = MelusineConfig() test_conf.reset(config_path=test_conf.DEFAULT_CONFIG_PATH) @@ -56,7 +53,6 @@ def test_load_conf_from_config_path(caplog): def test_load_conf_from_config_dict(caplog): - with caplog.at_level(logging.INFO): test_conf = MelusineConfig() test_conf.reset(config_dict={"my_key": "hello"}) @@ -65,7 +61,6 @@ def test_load_conf_from_config_dict(caplog): def test_config_modif_error(): - test_conf = MelusineConfig() test_conf.reset(config_dict={"my_key": "hello"}) @@ -80,14 +75,12 @@ def test_config_modif_error(): def test_shared_variable(): - # Shared variable TEST_VAR specified in conf/shared.yaml # Conf test_shared_variable specified in global.yaml assert config["global"]["test_shared_variable"] == "test" def test_export_config(tmp_path): - file_list = config.export_default_config(path=str(tmp_path)) assert file_list for file in file_list: diff --git a/tests/data/test_data.py b/tests/data/test_data.py index 70e9e99..8f5bdc0 100644 --- a/tests/data/test_data.py +++ b/tests/data/test_data.py @@ -2,6 +2,5 @@ def test_load_data(): - df = load_email_data() assert "body" in df diff --git a/tests/docs/test_configurations.py b/tests/docs/test_configurations.py index 379ac08..9ab82c3 100644 --- a/tests/docs/test_configurations.py +++ b/tests/docs/test_configurations.py @@ -1,5 +1,10 @@ def test_tutorial001(add_docs_to_pythonpath): - from docs_src.Configurations.tutorial001 import from_config, from_config_dict, modify_conf_with_dict, print_config + from docs_src.Configurations.tutorial001 import ( + from_config, + from_config_dict, + modify_conf_with_dict, + print_config, + ) _ = from_config() _ = from_config_dict() diff --git a/tests/docs/test_detectors.py b/tests/docs/test_detectors.py index ec1124c..baf5e03 100644 --- a/tests/docs/test_detectors.py +++ b/tests/docs/test_detectors.py @@ -11,7 +11,6 @@ def test_tutorial002(add_docs_to_pythonpath): def test_tutorial003(add_docs_to_pythonpath): - from docs_src.MelusineDetectors.tutorial003 import run as run003 _ = run003() diff --git a/tests/functional/test_emails_generic.py b/tests/functional/test_emails_generic.py index 5414516..dc2d453 100644 --- a/tests/functional/test_emails_generic.py +++ b/tests/functional/test_emails_generic.py @@ -9,7 +9,6 @@ # test_cases defined in melusine_code/tests/fixtures/test_emails_fixtures.py @pytest.mark.usefixtures("use_dict_backend") def test_pipeline_steps(testcase): - # Run pipeline tests pipeline_name = testcase.pop("pipeline") assert_pipeline_results(testcase, pipeline_name) diff --git a/tests/huggingface/test_basic_classification.py b/tests/huggingface/test_basic_classification.py index c5500b4..a63d3a2 100644 --- a/tests/huggingface/test_basic_classification.py +++ b/tests/huggingface/test_basic_classification.py @@ -6,7 +6,9 @@ transformers = pytest.importorskip("transformers") from typing import List -from transformers.pipelines.zero_shot_classification import ZeroShotClassificationPipeline +from transformers.pipelines.zero_shot_classification import ( + ZeroShotClassificationPipeline, +) class MockZeroShotClassificationPipeline(ZeroShotClassificationPipeline): diff --git a/tests/regex/test_builtin_regex.py b/tests/regex/test_builtin_regex.py index f664c54..1c5a6b0 100644 --- a/tests/regex/test_builtin_regex.py +++ b/tests/regex/test_builtin_regex.py @@ -1,4 +1,10 @@ -from melusine.regex import EmergencyRegex, ReplyRegex, ThanksRegex, TransferRegex, VacationReplyRegex +from melusine.regex import ( + EmergencyRegex, + ReplyRegex, + ThanksRegex, + TransferRegex, + VacationReplyRegex, +) def test_reply_regex(): From a9d3e15e01162501816508d89f3af1125c30237a Mon Sep 17 00:00:00 2001 From: Guillaume Bernard Date: Tue, 9 Jan 2024 17:33:32 +0100 Subject: [PATCH 4/7] feat: add ruff in pre-commit & config --- .pre-commit-config.yaml | 5 +++++ pyproject.toml | 9 ++++++++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index b71d547..a2902c5 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -45,6 +45,11 @@ repos: - id: flake8 additional_dependencies: [ flake8-docstrings, "flake8-bugbear==22.8.23" ] +- repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.1.11 + hooks: + - id: ruff + - repo: https://github.com/pre-commit/mirrors-isort rev: v5.10.1 hooks: diff --git a/pyproject.toml b/pyproject.toml index 67f789e..06ff4c1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -44,7 +44,7 @@ dependencies = [ dynamic = ["version", "readme"] [project.optional-dependencies] # Optional -dev = ["tox", "pre-commit", "black", "flake8", "isort", "mypy", "pytest", "coverage", "build"] +dev = ["tox", "pre-commit", "black", "flake8", "isort", "mypy", "pytest", "coverage", "build", "ruff"] test = ["pytest", "coverage"] transformers = ["transformers>4"] docs = ["mkdocs", "markdown", "mkdocs-material", "mdx-include"] @@ -68,6 +68,13 @@ disallow_untyped_defs = false hide_error_codes = false no_site_packages = true +[tool.ruff] +line-length = 120 +exclude = [ + "tests", + "docs" +] + [tool.coverage.run] omit = [ # omit init files in docs_src From 4c654bed114eb81b7bf73172cd75c64cc71ec5dc Mon Sep 17 00:00:00 2001 From: BERNARD Guillaume Date: Wed, 10 Jan 2024 15:47:18 +0100 Subject: [PATCH 5/7] ci/cd: add more python versions to run tests --- .github/workflows/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 6c40562..53727fd 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -18,7 +18,7 @@ jobs: fail-fast: false max-parallel: 2 matrix: - python-version: ["3.8", "3.10", "3.12"] + python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] steps: - name: Checkout the repository uses: actions/checkout@v3 From 7d7e6d61ed114fa2a8d5a6478575be2016f6f62c Mon Sep 17 00:00:00 2001 From: BERNARD Guillaume Date: Wed, 10 Jan 2024 16:07:35 +0100 Subject: [PATCH 6/7] ci: add pre-commit in GitHub Actions --- .github/workflows/main.yml | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 53727fd..da715fe 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -5,11 +5,23 @@ jobs: lint: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 - - uses: psf/black@stable + - name: Checkout the repository + uses: actions/checkout@v3 + - name: Run black + uses: psf/black@stable with: src: "./melusine" + pre-commit: + runs-on: ubuntu-latest + steps: + - name: Checkout the repository + uses: actions/checkout@v3 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v4 + - name: Run pre-commit + uses: pre-commit/action@v3.0.0 + test: name: Test needs: lint From 4fb5bf9eab74fcaa4f3af86cc6f41bc48f336245 Mon Sep 17 00:00:00 2001 From: BERNARD Guillaume Date: Wed, 10 Jan 2024 16:09:48 +0100 Subject: [PATCH 7/7] chore: fix end of file raised by pre-commit --- .github/ISSUE_TEMPLATE/bug_report.md | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md index cb1b898..b5eb4c5 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -47,4 +47,3 @@ Please write the debug information inside ``` quotes in order to pr ``` ``` -