diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 8695e196c4f38..b5c22a949d9f5 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -83,6 +83,7 @@ Other enhancements - Improved deprecation message for offset aliases (:issue:`60820`) - Multiplying two :class:`DateOffset` objects will now raise a ``TypeError`` instead of a ``RecursionError`` (:issue:`59442`) - Restore support for reading Stata 104-format and enable reading 103-format dta files (:issue:`58554`) +- Support :class:`DataFrame` plugin accessor via entry points (:issue:`29076`) - Support passing a :class:`Iterable[Hashable]` input to :meth:`DataFrame.drop_duplicates` (:issue:`59237`) - Support reading Stata 102-format (Stata 1) dta files (:issue:`58978`) - Support reading Stata 110-format (Stata 7) dta files (:issue:`47176`) diff --git a/pandas/__init__.py b/pandas/__init__.py index 7d6dd7b7c1a88..ecb69548445d5 100644 --- a/pandas/__init__.py +++ b/pandas/__init__.py @@ -346,3 +346,8 @@ "unique", "wide_to_long", ] + +from .core.accessor import accessor_entry_point_loader + +accessor_entry_point_loader() +del accessor_entry_point_loader diff --git a/pandas/core/accessor.py b/pandas/core/accessor.py index 0331c26c805b6..39bc9b0af3bd5 100644 --- a/pandas/core/accessor.py +++ b/pandas/core/accessor.py @@ -10,6 +10,7 @@ import functools from typing import ( TYPE_CHECKING, + Any, final, ) import warnings @@ -25,6 +26,11 @@ from pandas import Index from pandas.core.generic import NDFrame +from importlib.metadata import ( + EntryPoints, + entry_points, +) + class DirNamesMixin: _accessors: set[str] = set() @@ -393,3 +399,80 @@ def register_index_accessor(name: str) -> Callable[[TypeT], TypeT]: from pandas import Index return _register_accessor(name, Index) + + +def accessor_entry_point_loader() -> None: + """ + Load and register pandas accessors declared via entry points. + + This function scans the 'pandas.accessor' entry point group for accessors + registered by third-party packages. Each entry point is expected to follow + the format: + + TODO + + For example: + + TODO + TODO + TODO + + + For each valid entry point: + - The accessor class is dynamically imported and registered using + the appropriate registration decorator function + (e.g. register_dataframe_accessor). + - If two packages declare the same accessor name, a warning is issued, + and only the first one is used. + + Notes + ----- + - This function is only intended to be called at pandas startup. + + Raises + ------ + UserWarning + If two accessors share the same name, the second one is ignored. + + Examples + -------- + >>> df.myplugin.do_something() # Assuming such accessor was registered + """ + + ENTRY_POINT_GROUP: str = "pandas.accessor" + + accessors: EntryPoints = entry_points(group=ENTRY_POINT_GROUP) + accessor_package_dict: dict[str, str] = {} + + for new_accessor in accessors: + try: + new_pkg_name: str = new_accessor.dist.name + except AttributeError: + new_pkg_name: str = "Unknown" + + # Verifies duplicated accessor names + if new_accessor.name in accessor_package_dict: + loaded_pkg_name: str = accessor_package_dict.get(new_accessor.name) + + warnings.warn( + "Warning: you have two accessors with the same name:" + f" '{new_accessor.name}' has already been registered" + f" by the package '{new_pkg_name}'. So the " + f"'{new_accessor.name}' provided by the package " + f"'{loaded_pkg_name}' is not being used. " + "Uninstall the package you don't want" + "to use if you want to get rid of this warning.\n", + UserWarning, + stacklevel=2, + ) + + accessor_package_dict.update({new_accessor.name: new_pkg_name}) + + def make_accessor(ep): + def accessor(self) -> Any: + cls_ = ep.load() + return cls_(self) + + return accessor + + register_dataframe_accessor(new_accessor.name)(make_accessor(new_accessor)) diff --git a/pandas/tests/test_plugins_entrypoint_loader.py b/pandas/tests/test_plugins_entrypoint_loader.py new file mode 100644 index 0000000000000..155957879d9b4 --- /dev/null +++ b/pandas/tests/test_plugins_entrypoint_loader.py @@ -0,0 +1,231 @@ +import pandas as pd +import pandas._testing as tm +from pandas.core.accessor import accessor_entry_point_loader + +# TODO: test for pkg names + +PANDAS_ENTRY_POINT_GROUP: str = "pandas.accessor" + + +def test_no_accessors(monkeypatch): + # GH29076 + + # Mock entry_points + def mock_entry_points(*, group): + return [] + + # Patch entry_points in the correct module + monkeypatch.setattr("pandas.core.accessor.entry_points", mock_entry_points) + + accessor_entry_point_loader() + + +def test_load_dataframe_accessors(monkeypatch): + # GH29076 + # Mocked EntryPoint to simulate a plugin + class MockEntryPoint: + name = "test_accessor" + + def load(self): + class TestAccessor: + def __init__(self, df): + self._df = df + + def test_method(self): + return "success" + + return TestAccessor + + # Mock entry_points + def mock_entry_points(*, group): + if group == PANDAS_ENTRY_POINT_GROUP: + return [MockEntryPoint()] + return [] + + # Patch entry_points in the correct module + monkeypatch.setattr("pandas.core.accessor.entry_points", mock_entry_points) + + accessor_entry_point_loader() + + # Create DataFrame and verify that the accessor was registered + df = pd.DataFrame({"a": [1, 2, 3]}) + assert hasattr(df, "test_accessor") + assert df.test_accessor.test_method() == "success" + + +def test_duplicate_accessor_names(monkeypatch): + # GH29076 + # Create plugin + class MockEntryPoint1: + name = "duplicate_accessor" + + def load(self): + class Accessor1: + def __init__(self, df): + self._df = df + + def which(self): + return "Accessor1" + + return Accessor1 + + # Create plugin + class MockEntryPoint2: + name = "duplicate_accessor" + + def load(self): + class Accessor2: + def __init__(self, df): + self._df = df + + def which(self): + return "Accessor2" + + return Accessor2 + + def mock_entry_points(*, group): + if group == PANDAS_ENTRY_POINT_GROUP: + return [MockEntryPoint1(), MockEntryPoint2()] + return [] + + monkeypatch.setattr("pandas.core.accessor.entry_points", mock_entry_points) + + # Check that the UserWarning is raised + with tm.assert_produces_warning(UserWarning, match="duplicate_accessor") as record: + accessor_entry_point_loader() + + messages = [str(w.message) for w in record] + assert any("you have two accessors with the same name:" in msg for msg in messages) + + df = pd.DataFrame({"x": [1, 2, 3]}) + assert hasattr(df, "duplicate_accessor") + assert df.duplicate_accessor.which() in {"Accessor1", "Accessor2"} + + +def test_unique_accessor_names(monkeypatch): + # GH29076 + # Create plugin + class MockEntryPoint1: + name = "accessor1" + + def load(self): + class Accessor1: + def __init__(self, df): + self._df = df + + def which(self): + return "Accessor1" + + return Accessor1 + + # Create plugin + class MockEntryPoint2: + name = "accessor2" + + def load(self): + class Accessor2: + def __init__(self, df): + self._df = df + + def which(self): + return "Accessor2" + + return Accessor2 + + def mock_entry_points(*, group): + if group == PANDAS_ENTRY_POINT_GROUP: + return [MockEntryPoint1(), MockEntryPoint2()] + return [] + + monkeypatch.setattr("pandas.core.accessor.entry_points", mock_entry_points) + + # Check that no UserWarning is raised + with tm.assert_produces_warning(None, check_stacklevel=False): + accessor_entry_point_loader() + + df = pd.DataFrame({"x": [1, 2, 3]}) + assert hasattr(df, "accessor1"), "Accessor1 not registered" + assert hasattr(df, "accessor2"), "Accessor2 not registered" + assert df.accessor1.which() == "Accessor1", "Accessor1 method incorrect" + assert df.accessor2.which() == "Accessor2", "Accessor2 method incorrect" + + +def test_duplicate_and_unique_accessor_names(monkeypatch): + # GH29076 + # Create plugin + class MockEntryPoint1: + name = "duplicate_accessor" + + def load(self): + class Accessor1: + def __init__(self, df): + self._df = df + + def which(self): + return "Accessor1" + + return Accessor1 + + # Create plugin + class MockEntryPoint2: + name = "duplicate_accessor" + + def load(self): + class Accessor2: + def __init__(self, df): + self._df = df + + def which(self): + return "Accessor2" + + return Accessor2 + + # Create plugin + class MockEntryPoint3: + name = "unique_accessor" + + def load(self): + class Accessor3: + def __init__(self, df): + self._df = df + + def which(self): + return "Accessor3" + + return Accessor3 + + def mock_entry_points(*, group): + if group == PANDAS_ENTRY_POINT_GROUP: + return [MockEntryPoint1(), MockEntryPoint2(), MockEntryPoint3()] + return [] + + monkeypatch.setattr("pandas.core.accessor.entry_points", mock_entry_points) + + # Capture warnings + with tm.assert_produces_warning(UserWarning, match="duplicate_accessor") as record: + accessor_entry_point_loader() + + messages = [str(w.message) for w in record] + + # Filter warnings for the specific message about duplicate accessors + duplicate_package_warnings = [ + msg + for msg in messages + if "you have two accessors with the same name: 'duplicate_accessor'" in msg + ] + + # Assert one warning about duplicate accessors + assert len(duplicate_package_warnings) == 1, ( + f"Expected exactly one warning about duplicate accessors, " + f"got {len(duplicate_package_warnings)}: {duplicate_package_warnings}" + ) + + df = pd.DataFrame({"x": [1, 2, 3]}) + assert hasattr(df, "duplicate_accessor"), "duplicate_accessor not registered" + + assert hasattr(df, "unique_accessor"), "unique_accessor not registered" + + assert df.duplicate_accessor.which() in {"Accessor1", "Accessor2"}, ( + "duplicate_accessor method incorrect" + ) + assert df.unique_accessor.which() == "Accessor3", "unique_accessor method incorrect"