Skip to content

Commit

Permalink
fixing docstring formatting for mkdocsstrings
Browse files Browse the repository at this point in the history
  • Loading branch information
brifordwylie committed Dec 28, 2023
1 parent e1abf97 commit 517cf21
Show file tree
Hide file tree
Showing 8 changed files with 46 additions and 36 deletions.
10 changes: 8 additions & 2 deletions docs/api_classes/feature_set.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,9 @@ my_features = test_data.to_features()
print(my_features.details())
```

**Use/Show some of the EDA Statistics**
**FeatureSet EDA Statistics**

```py title="featureset_stats.py"
```py title="featureset_eda.py"
from sageworks.api.feature_set import FeatureSet
import pandas as pd

Expand All @@ -36,6 +36,10 @@ print(corr_df)
# Get some outliers
outliers = my_features.outliers()
pprint(outliers.head())

# Full set of EDA Stats
eda_stats = my_features.column_stats()
pprint(eda_stats)
```
**Output**

Expand All @@ -55,6 +59,8 @@ iq_score -0.295513 0.395378 0.076477 -0.435033 0.033364 -0.655210
1 Person 68 73.918663 189.527313 219994.000000 80 100.000000 0 0 0 1 0 iq_score_low
2 Person 49 70.381790 261.237000 175633.703125 49 107.933998 0 0 0 1 0 iq_score_low
3 Person 90 73.488739 193.840698 227760.000000 72 110.821541 1 0 0 0 0 salary_high
<lots of EDA data and statistics>
```


Expand Down
4 changes: 4 additions & 0 deletions examples/featureset_stats.py → examples/featureset_eda.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,7 @@
# Get some outliers
outliers = my_features.outliers()
pprint(outliers.head())

# Full set of EDA Stats
eda_stats = my_features.column_stats()
pprint(eda_stats)
2 changes: 1 addition & 1 deletion src/sageworks/algorithms/sql/column_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def column_stats(data_source: DataSourceAbstract, recompute: bool = False) -> di
"""SQL based Column Statistics: Compute Column Statistics for a DataSource using SQL
Args:
data_source(DataSource): The DataSource that we're computing column stats on
recompute(bool): Whether or not to recompute the column stats (default: False)
recompute (bool): Whether or not to recompute the column stats (default: False)
Returns:
dict(dict): A dictionary of stats for each column this format
NB: String columns will have value_counts but NOT have num_zeros and descriptive stats
Expand Down
8 changes: 4 additions & 4 deletions src/sageworks/algorithms/sql/outliers.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ def compute_outliers(
"""Compute outliers for all the numeric columns in a DataSource
Args:
data_source(DataSource): The DataSource that we're computing outliers on
scale(float): The scale to use for either the IQR or stddev outlier calculation (default: 1.5)
use_stddev(bool): Option to use the standard deviation for the outlier calculation (default: False)
scale (float): The scale to use for either the IQR or stddev outlier calculation (default: 1.5)
use_stddev (bool): Option to use the standard deviation for the outlier calculation (default: False)
Returns:
pd.DataFrame: A DataFrame of outliers for this DataSource
Notes:
Expand Down Expand Up @@ -67,8 +67,8 @@ def _numeric_outliers(self, data_source: DataSourceAbstract, scale: float, use_s
"""Internal method to compute outliers for all numeric columns
Args:
data_source(DataSource): The DataSource that we're computing outliers on
scale(float): The scale to use for the IQR outlier calculation
use_stddev(bool): Option to use the standard deviation for the outlier calculation (default: False)
scale (float): The scale to use for the IQR outlier calculation
use_stddev (bool): Option to use the standard deviation for the outlier calculation (default: False)
Returns:
pd.DataFrame: A DataFrame of all the outliers combined
"""
Expand Down
24 changes: 12 additions & 12 deletions src/sageworks/core/artifacts/athena_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,7 @@ def descriptive_stats(self, recompute: bool = False) -> dict[dict]:
"""Compute Descriptive Stats for all the numeric columns in a DataSource
Args:
recompute(bool): Recompute the descriptive stats (default: False)
recompute (bool): Recompute the descriptive stats (default: False)
Returns:
dict(dict): A dictionary of descriptive stats for each column in the form
Expand All @@ -270,9 +270,9 @@ def outliers_impl(self, scale: float = 1.5, use_stddev=False, recompute: bool =
"""Compute outliers for all the numeric columns in a DataSource
Args:
scale(float): The scale to use for the IQR (default: 1.5)
use_stddev(bool): Use Standard Deviation instead of IQR (default: False)
recompute(bool): Recompute the outliers (default: False)
scale (float): The scale to use for the IQR (default: 1.5)
use_stddev (bool): Use Standard Deviation instead of IQR (default: False)
recompute (bool): Recompute the outliers (default: False)
Returns:
pd.DataFrame: A DataFrame of outliers from this DataSource
Expand Down Expand Up @@ -311,7 +311,7 @@ def correlations(self, recompute: bool = False) -> dict[dict]:
"""Compute Correlations for all the numeric columns in a DataSource
Args:
recompute(bool): Recompute the column stats (default: False)
recompute (bool): Recompute the column stats (default: False)
Returns:
dict(dict): A dictionary of correlations for each column in this format
Expand All @@ -337,15 +337,15 @@ def column_stats(self, recompute: bool = False) -> dict[dict]:
"""Compute Column Stats for all the columns in a DataSource
Args:
recompute(bool): Recompute the column stats (default: False)
recompute (bool): Recompute the column stats (default: False)
Returns:
dict(dict): A dictionary of stats for each column this format
NB: String columns will NOT have num_zeros, descriptive_stats or correlation data
{'col1': {'dtype': 'string', 'unique': 4321, 'nulls': 12},
'col2': {'dtype': 'int', 'unique': 4321, 'nulls': 12, 'num_zeros': 100,
'descriptive_stats': {...}, 'correlations': {...}},
...}
{'col1': {'dtype': 'string', 'unique': 4321, 'nulls': 12},
'col2': {'dtype': 'int', 'unique': 4321, 'nulls': 12, 'num_zeros': 100,
'descriptive_stats': {...}, 'correlations': {...}},
...}
"""

# First check if we have already computed the column stats
Expand All @@ -366,7 +366,7 @@ def value_counts(self, recompute: bool = False) -> dict[dict]:
"""Compute 'value_counts' for all the string columns in a DataSource
Args:
recompute(bool): Recompute the value counts (default: False)
recompute (bool): Recompute the value counts (default: False)
Returns:
dict(dict): A dictionary of value counts for each column in the form
Expand All @@ -392,7 +392,7 @@ def details(self, recompute: bool = False) -> dict[dict]:
"""Additional Details about this AthenaSource Artifact
Args:
recompute(bool): Recompute the details (default: False)
recompute (bool): Recompute the details (default: False)
Returns:
dict(dict): A dictionary of details about this AthenaSource
Expand Down
20 changes: 10 additions & 10 deletions src/sageworks/core/artifacts/data_source_abstract.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def column_types(self) -> list[str]:
def column_details(self, view: str = "all") -> dict:
"""Return the column details for this Data Source
Args:
view(str): The view to get column details for (default: "all")
view (str): The view to get column details for (default: "all")
Returns:
dict: The column details for this Data Source
"""
Expand Down Expand Up @@ -106,7 +106,7 @@ def get_display_columns(self) -> list[str]:
def set_display_columns(self, display_columns: list[str]):
"""Set the display columns for this Data Source
Args:
display_columns(list[str]): The display columns for this Data Source
display_columns (list[str]): The display columns for this Data Source
"""
self._display_columns = display_columns
self.upsert_sageworks_meta({"sageworks_display_columns": self._display_columns})
Expand Down Expand Up @@ -144,7 +144,7 @@ def execute_statement(self, query: str):
def sample(self, recompute: bool = False) -> pd.DataFrame:
"""Return a sample DataFrame from this DataSource
Args:
recompute(bool): Recompute the sample (default: False)
recompute (bool): Recompute the sample (default: False)
Returns:
pd.DataFrame: A sample DataFrame from this DataSource
"""
Expand Down Expand Up @@ -172,7 +172,7 @@ def sample_impl(self) -> pd.DataFrame:
def descriptive_stats(self, recompute: bool = False) -> dict[dict]:
"""Compute Descriptive Stats for all the numeric columns in a DataSource
Args:
recompute(bool): Recompute the descriptive stats (default: False)
recompute (bool): Recompute the descriptive stats (default: False)
Returns:
dict(dict): A dictionary of descriptive stats for each column in the form
{'col1': {'min': 0, 'q1': 1, 'median': 2, 'q3': 3, 'max': 4},
Expand All @@ -183,8 +183,8 @@ def descriptive_stats(self, recompute: bool = False) -> dict[dict]:
def outliers(self, scale: float = 1.5, recompute: bool = False) -> pd.DataFrame:
"""Return a DataFrame of outliers from this DataSource
Args:
scale(float): The scale to use for the IQR (default: 1.5)
recompute(bool): Recompute the outliers (default: False)
scale (float): The scale to use for the IQR (default: 1.5)
recompute (bool): Recompute the outliers (default: False)
Returns:
pd.DataFrame: A DataFrame of outliers from this DataSource
Notes:
Expand All @@ -207,8 +207,8 @@ def outliers(self, scale: float = 1.5, recompute: bool = False) -> pd.DataFrame:
def outliers_impl(self, scale: float = 1.5, recompute: bool = False) -> pd.DataFrame:
"""Return a DataFrame of outliers from this DataSource
Args:
scale(float): The scale to use for the IQR (default: 1.5)
recompute(bool): Recompute the outliers (default: False)
scale (float): The scale to use for the IQR (default: 1.5)
recompute (bool): Recompute the outliers (default: False)
Returns:
pd.DataFrame: A DataFrame of outliers from this DataSource
Notes:
Expand All @@ -229,7 +229,7 @@ def smart_sample(self) -> pd.DataFrame:
def value_counts(self, recompute: bool = False) -> dict[dict]:
"""Compute 'value_counts' for all the string columns in a DataSource
Args:
recompute(bool): Recompute the value counts (default: False)
recompute (bool): Recompute the value counts (default: False)
Returns:
dict(dict): A dictionary of value counts for each column in the form
{'col1': {'value_1': X, 'value_2': Y, 'value_3': Z,...},
Expand All @@ -241,7 +241,7 @@ def value_counts(self, recompute: bool = False) -> dict[dict]:
def column_stats(self, recompute: bool = False) -> dict[dict]:
"""Compute Column Stats for all the columns in a DataSource
Args:
recompute(bool): Recompute the column stats (default: False)
recompute (bool): Recompute the column stats (default: False)
Returns:
dict(dict): A dictionary of stats for each column this format
NB: String columns will NOT have num_zeros and descriptive stats
Expand Down
2 changes: 1 addition & 1 deletion src/sageworks/core/artifacts/endpoint_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -280,7 +280,7 @@ def endpoint_metrics(self) -> pd.DataFrame:
def details(self, recompute: bool = False) -> dict:
"""Additional Details about this Endpoint
Args:
recompute(bool): Recompute the details (default: False)
recompute (bool): Recompute the details (default: False)
Returns:
dict(dict): A dictionary of details about this Endpoint
"""
Expand Down
12 changes: 6 additions & 6 deletions src/sageworks/core/artifacts/feature_set_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ def column_details(self, view: str = "all") -> dict:
"""Return the column details of the Feature Set
Args:
view(str): The view to get column details for (default: "all")
view (str): The view to get column details for (default: "all")
Returns:
dict: The column details of the Feature Set
Expand Down Expand Up @@ -167,7 +167,7 @@ def set_display_columns(self, display_columns: list[str]):
"""Set the display columns for this FeatureSet
Args:
display_columns(list[str]): The display columns for this FeatureSet
display_columns (list[str]): The display columns for this FeatureSet
Notes:
This just sets the display columns for the underlying DataSource
Expand Down Expand Up @@ -278,7 +278,7 @@ def details(self, recompute: bool = False) -> dict[dict]:
"""Additional Details about this FeatureSet Artifact
Args:
recompute(bool): Recompute the details (default: False)
recompute (bool): Recompute the details (default: False)
Returns:
dict(dict): A dictionary of details about this FeatureSet
Expand Down Expand Up @@ -465,8 +465,8 @@ def sample(self, recompute: bool = False) -> pd.DataFrame:
def outliers(self, scale: float = 1.5, recompute: bool = False) -> pd.DataFrame:
"""Compute outliers for all the numeric columns in a DataSource
Args:
scale(float): The scale to use for the IQR (default: 1.5)
recompute(bool): Recompute the outliers (default: False)
scale (float): The scale to use for the IQR (default: 1.5)
recompute (bool): Recompute the outliers (default: False)
Returns:
pd.DataFrame: A DataFrame of outliers from this DataSource
Notes:
Expand Down Expand Up @@ -517,7 +517,7 @@ def correlations(self, recompute: bool = False) -> dict:
def column_stats(self, recompute: bool = False) -> dict[dict]:
"""Compute Column Stats for all the columns in the FeatureSets underlying DataSource
Args:
recompute(bool): Recompute the column stats (default: False)
recompute (bool): Recompute the column stats (default: False)
Returns:
dict(dict): A dictionary of stats for each column this format
NB: String columns will NOT have num_zeros and descriptive_stats
Expand Down

0 comments on commit 517cf21

Please sign in to comment.