Skip to content

Commit

Permalink
Fix bad docstring formatting that was breaking docs build
Browse files Browse the repository at this point in the history
  • Loading branch information
zaneselvans committed Dec 1, 2023
1 parent 152e776 commit ea643bd
Showing 1 changed file with 14 additions and 13 deletions.
27 changes: 14 additions & 13 deletions src/pudl/analysis/record_linkage/models.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""This module defines an interface record linkage models can conform to and implements common functionality."""
"""Define a record linkage model interface and implement common functionality."""
from abc import ABC, abstractmethod
from collections.abc import Callable
from typing import Any
Expand All @@ -24,7 +24,7 @@


class ModelComponent(BaseModel, ABC):
""":class:`ModelComponent`s are the basic building blocks of a record linkage model.
"""A :class:`ModelComponent` is the basic building block of a record linkage model.
:class:`ModelComponent` defines a simple interface that should be implemented to
create basic model steps that can be combined and reused at will. This interface
Expand Down Expand Up @@ -108,15 +108,16 @@ def as_pipeline(self) -> Pipeline:
class DataFrameEmbedder(ModelComponent):
"""This ModelComponent performs a series of column transformations on a DataFrame.
Under the hood this uses :class:`sklearn.compose.ColumnTransformer`. As configuration
it takes as configuration a mapping of column names to a list of transformations to apply.
Transformations can be specified either by passing an instance of a
:class:`sklearn.base.BaseEstimator`, or a string to select from several common/generic
transformers defined by this class. If a string is used, it should be one of the following:
Under the hood this uses :class:`sklearn.compose.ColumnTransformer`. As
configuration it takes as configuration a mapping of column names to a list of
transformations to apply. Transformations can be specified either by passing an
instance of a :class:`sklearn.base.BaseEstimator`, or a string to select from
several common/generic transformers defined by this class. If a string is used, it
should be one of the following:
'string' - Applies a TfidfVectorizer to the column.
'category' - Applies a OneHotEncoder to the column.
'number' - Applies a MinMaxScaler to the column.
* ``string`` - Applies a TfidfVectorizer to the column.
* ``category`` - Applies a OneHotEncoder to the column.
* ``number`` - Applies a MinMaxScaler to the column.
"""

#: Maps step name to list of transformations.
Expand Down Expand Up @@ -151,7 +152,7 @@ def __call__(self, df: pd.DataFrame):


class ReducedDimDataFrameEmbedder(DataFrameEmbedder):
"""Subclass of :class:`DataFrameEmbedder`, which applies PCA to reduce dimensions of the output."""
""":class:`DataFrameEmbedder` subclass that reduces output dimensions using PCA."""

#: Passed to :class:`sklearn.decomposition.PCA` param n_components
output_dims: int | float | None = 500
Expand All @@ -166,7 +167,7 @@ def __call__(self, df: pd.DataFrame):


class ReducedDimDataFrameEmbedderSparse(DataFrameEmbedder):
"""Subclass of :class:`DataFrameEmbedder`, which applies IncrementalPCA to reduce dimensions of the output.
""":class:`DataFrameEmbedder` subclass, using IncrementalPCA to reduce dimensions.
This class differs from :class:`ReducedDimDataFrameEmbedder` in that it applies
IncrementalPCA instead of a normal PCA implementation. This implementation is
Expand All @@ -191,7 +192,7 @@ def __call__(self, df: pd.DataFrame):


class HierarchicalClusteringClassifier(ModelComponent):
"""Apply agglomerative clustering algorithm to distance matrix to classif records."""
"""Apply agglomerative clustering to distance matrix to classify records."""

n_clusters: int | None = None
distance_threshold: float = 1.5
Expand Down

0 comments on commit ea643bd

Please sign in to comment.