fixing docstring formatting for mkdocsstrings

SuperCowPowers · Dec 28, 2023 · 517cf21 · 517cf21
1 parent e1abf97
commit 517cf21
Show file tree

Hide file tree

Showing 8 changed files with 46 additions and 36 deletions.
diff --git a/docs/api_classes/feature_set.md b/docs/api_classes/feature_set.md
@@ -19,9 +19,9 @@ my_features = test_data.to_features()
 print(my_features.details())
 ```
 
-**Use/Show some of the EDA Statistics**
+**FeatureSet EDA Statistics**
 
-```py title="featureset_stats.py"
+```py title="featureset_eda.py"
 from sageworks.api.feature_set import FeatureSet
 import pandas as pd
 
@@ -36,6 +36,10 @@ print(corr_df)
 # Get some outliers
 outliers = my_features.outliers()
 pprint(outliers.head())
+
+# Full set of EDA Stats
+eda_stats = my_features.column_stats()
+pprint(eda_stats)
 ```
 **Output**
 
@@ -55,6 +59,8 @@ iq_score   -0.295513    0.395378    0.076477   -0.435033    0.033364 -0.655210
 1  Person 68  73.918663  189.527313  219994.000000   80  100.000000           0           0           0           1           0  iq_score_low
 2  Person 49  70.381790  261.237000  175633.703125   49  107.933998           0           0           0           1           0  iq_score_low
 3  Person 90  73.488739  193.840698  227760.000000   72  110.821541           1           0           0           0           0   salary_high
+
+<lots of EDA data and statistics>
 ```
 
 

diff --git a/examples/featureset_stats.py → examples/featureset_eda.py b/examples/featureset_stats.py → examples/featureset_eda.py
@@ -22,3 +22,7 @@
 # Get some outliers
 outliers = my_features.outliers()
 pprint(outliers.head())
+
+# Full set of EDA Stats
+eda_stats = my_features.column_stats()
+pprint(eda_stats)
diff --git a/src/sageworks/algorithms/sql/column_stats.py b/src/sageworks/algorithms/sql/column_stats.py
@@ -53,7 +53,7 @@ def column_stats(data_source: DataSourceAbstract, recompute: bool = False) -> di
     """SQL based Column Statistics: Compute Column Statistics for a DataSource using SQL
     Args:
         data_source(DataSource): The DataSource that we're computing column stats on
-        recompute(bool): Whether or not to recompute the column stats (default: False)
+        recompute (bool): Whether or not to recompute the column stats (default: False)
     Returns:
         dict(dict): A dictionary of stats for each column this format
         NB: String columns will have value_counts but NOT have num_zeros and descriptive stats

diff --git a/src/sageworks/algorithms/sql/outliers.py b/src/sageworks/algorithms/sql/outliers.py
@@ -24,8 +24,8 @@ def compute_outliers(
         """Compute outliers for all the numeric columns in a DataSource
         Args:
             data_source(DataSource): The DataSource that we're computing outliers on
-            scale(float): The scale to use for either the IQR or stddev outlier calculation (default: 1.5)
-            use_stddev(bool): Option to use the standard deviation for the outlier calculation (default: False)
+            scale (float): The scale to use for either the IQR or stddev outlier calculation (default: 1.5)
+            use_stddev (bool): Option to use the standard deviation for the outlier calculation (default: False)
         Returns:
             pd.DataFrame: A DataFrame of outliers for this DataSource
         Notes:
@@ -67,8 +67,8 @@ def _numeric_outliers(self, data_source: DataSourceAbstract, scale: float, use_s
         """Internal method to compute outliers for all numeric columns
         Args:
             data_source(DataSource): The DataSource that we're computing outliers on
-            scale(float): The scale to use for the IQR outlier calculation
-            use_stddev(bool): Option to use the standard deviation for the outlier calculation (default: False)
+            scale (float): The scale to use for the IQR outlier calculation
+            use_stddev (bool): Option to use the standard deviation for the outlier calculation (default: False)
         Returns:
             pd.DataFrame: A DataFrame of all the outliers combined
         """

diff --git a/src/sageworks/core/artifacts/athena_source.py b/src/sageworks/core/artifacts/athena_source.py
@@ -244,7 +244,7 @@ def descriptive_stats(self, recompute: bool = False) -> dict[dict]:
         """Compute Descriptive Stats for all the numeric columns in a DataSource
 
         Args:
-            recompute(bool): Recompute the descriptive stats (default: False)
+            recompute (bool): Recompute the descriptive stats (default: False)
 
         Returns:
             dict(dict): A dictionary of descriptive stats for each column in the form
@@ -270,9 +270,9 @@ def outliers_impl(self, scale: float = 1.5, use_stddev=False, recompute: bool =
         """Compute outliers for all the numeric columns in a DataSource
 
         Args:
-            scale(float): The scale to use for the IQR (default: 1.5)
-            use_stddev(bool): Use Standard Deviation instead of IQR (default: False)
-            recompute(bool): Recompute the outliers (default: False)
+            scale (float): The scale to use for the IQR (default: 1.5)
+            use_stddev (bool): Use Standard Deviation instead of IQR (default: False)
+            recompute (bool): Recompute the outliers (default: False)
 
         Returns:
             pd.DataFrame: A DataFrame of outliers from this DataSource
@@ -311,7 +311,7 @@ def correlations(self, recompute: bool = False) -> dict[dict]:
         """Compute Correlations for all the numeric columns in a DataSource
 
         Args:
-            recompute(bool): Recompute the column stats (default: False)
+            recompute (bool): Recompute the column stats (default: False)
 
         Returns:
             dict(dict): A dictionary of correlations for each column in this format
@@ -337,15 +337,15 @@ def column_stats(self, recompute: bool = False) -> dict[dict]:
         """Compute Column Stats for all the columns in a DataSource
 
         Args:
-            recompute(bool): Recompute the column stats (default: False)
+            recompute (bool): Recompute the column stats (default: False)
 
         Returns:
             dict(dict): A dictionary of stats for each column this format
             NB: String columns will NOT have num_zeros, descriptive_stats or correlation data
-             {'col1': {'dtype': 'string', 'unique': 4321, 'nulls': 12},
-              'col2': {'dtype': 'int', 'unique': 4321, 'nulls': 12, 'num_zeros': 100,
-                       'descriptive_stats': {...}, 'correlations': {...}},
-              ...}
+                {'col1': {'dtype': 'string', 'unique': 4321, 'nulls': 12},
+                 'col2': {'dtype': 'int', 'unique': 4321, 'nulls': 12, 'num_zeros': 100,
+                          'descriptive_stats': {...}, 'correlations': {...}},
+                 ...}
         """
 
         # First check if we have already computed the column stats
@@ -366,7 +366,7 @@ def value_counts(self, recompute: bool = False) -> dict[dict]:
         """Compute 'value_counts' for all the string columns in a DataSource
 
         Args:
-            recompute(bool): Recompute the value counts (default: False)
+            recompute (bool): Recompute the value counts (default: False)
 
         Returns:
             dict(dict): A dictionary of value counts for each column in the form
@@ -392,7 +392,7 @@ def details(self, recompute: bool = False) -> dict[dict]:
         """Additional Details about this AthenaSource Artifact
 
         Args:
-            recompute(bool): Recompute the details (default: False)
+            recompute (bool): Recompute the details (default: False)
 
         Returns:
             dict(dict): A dictionary of details about this AthenaSource

diff --git a/src/sageworks/core/artifacts/data_source_abstract.py b/src/sageworks/core/artifacts/data_source_abstract.py
@@ -58,7 +58,7 @@ def column_types(self) -> list[str]:
     def column_details(self, view: str = "all") -> dict:
         """Return the column details for this Data Source
         Args:
-            view(str): The view to get column details for (default: "all")
+            view (str): The view to get column details for (default: "all")
         Returns:
             dict: The column details for this Data Source
         """
@@ -106,7 +106,7 @@ def get_display_columns(self) -> list[str]:
     def set_display_columns(self, display_columns: list[str]):
         """Set the display columns for this Data Source
         Args:
-            display_columns(list[str]): The display columns for this Data Source
+            display_columns (list[str]): The display columns for this Data Source
         """
         self._display_columns = display_columns
         self.upsert_sageworks_meta({"sageworks_display_columns": self._display_columns})
@@ -144,7 +144,7 @@ def execute_statement(self, query: str):
     def sample(self, recompute: bool = False) -> pd.DataFrame:
         """Return a sample DataFrame from this DataSource
         Args:
-            recompute(bool): Recompute the sample (default: False)
+            recompute (bool): Recompute the sample (default: False)
         Returns:
             pd.DataFrame: A sample DataFrame from this DataSource
         """
@@ -172,7 +172,7 @@ def sample_impl(self) -> pd.DataFrame:
     def descriptive_stats(self, recompute: bool = False) -> dict[dict]:
         """Compute Descriptive Stats for all the numeric columns in a DataSource
         Args:
-            recompute(bool): Recompute the descriptive stats (default: False)
+            recompute (bool): Recompute the descriptive stats (default: False)
         Returns:
             dict(dict): A dictionary of descriptive stats for each column in the form
                  {'col1': {'min': 0, 'q1': 1, 'median': 2, 'q3': 3, 'max': 4},
@@ -183,8 +183,8 @@ def descriptive_stats(self, recompute: bool = False) -> dict[dict]:
     def outliers(self, scale: float = 1.5, recompute: bool = False) -> pd.DataFrame:
         """Return a DataFrame of outliers from this DataSource
         Args:
-            scale(float): The scale to use for the IQR (default: 1.5)
-            recompute(bool): Recompute the outliers (default: False)
+            scale (float): The scale to use for the IQR (default: 1.5)
+            recompute (bool): Recompute the outliers (default: False)
         Returns:
             pd.DataFrame: A DataFrame of outliers from this DataSource
         Notes:
@@ -207,8 +207,8 @@ def outliers(self, scale: float = 1.5, recompute: bool = False) -> pd.DataFrame:
     def outliers_impl(self, scale: float = 1.5, recompute: bool = False) -> pd.DataFrame:
         """Return a DataFrame of outliers from this DataSource
         Args:
-            scale(float): The scale to use for the IQR (default: 1.5)
-            recompute(bool): Recompute the outliers (default: False)
+            scale (float): The scale to use for the IQR (default: 1.5)
+            recompute (bool): Recompute the outliers (default: False)
         Returns:
             pd.DataFrame: A DataFrame of outliers from this DataSource
         Notes:
@@ -229,7 +229,7 @@ def smart_sample(self) -> pd.DataFrame:
     def value_counts(self, recompute: bool = False) -> dict[dict]:
         """Compute 'value_counts' for all the string columns in a DataSource
         Args:
-            recompute(bool): Recompute the value counts (default: False)
+            recompute (bool): Recompute the value counts (default: False)
         Returns:
             dict(dict): A dictionary of value counts for each column in the form
                  {'col1': {'value_1': X, 'value_2': Y, 'value_3': Z,...},
@@ -241,7 +241,7 @@ def value_counts(self, recompute: bool = False) -> dict[dict]:
     def column_stats(self, recompute: bool = False) -> dict[dict]:
         """Compute Column Stats for all the columns in a DataSource
         Args:
-            recompute(bool): Recompute the column stats (default: False)
+            recompute (bool): Recompute the column stats (default: False)
         Returns:
             dict(dict): A dictionary of stats for each column this format
             NB: String columns will NOT have num_zeros and descriptive stats

diff --git a/src/sageworks/core/artifacts/endpoint_core.py b/src/sageworks/core/artifacts/endpoint_core.py
@@ -280,7 +280,7 @@ def endpoint_metrics(self) -> pd.DataFrame:
     def details(self, recompute: bool = False) -> dict:
         """Additional Details about this Endpoint
         Args:
-            recompute(bool): Recompute the details (default: False)
+            recompute (bool): Recompute the details (default: False)
         Returns:
             dict(dict): A dictionary of details about this Endpoint
         """

diff --git a/src/sageworks/core/artifacts/feature_set_core.py b/src/sageworks/core/artifacts/feature_set_core.py
@@ -124,7 +124,7 @@ def column_details(self, view: str = "all") -> dict:
         """Return the column details of the Feature Set
 
         Args:
-            view(str): The view to get column details for (default: "all")
+            view (str): The view to get column details for (default: "all")
 
         Returns:
             dict: The column details of the Feature Set
@@ -167,7 +167,7 @@ def set_display_columns(self, display_columns: list[str]):
         """Set the display columns for this FeatureSet
 
         Args:
-            display_columns(list[str]): The display columns for this FeatureSet
+            display_columns (list[str]): The display columns for this FeatureSet
 
         Notes:
             This just sets the display columns for the underlying DataSource
@@ -278,7 +278,7 @@ def details(self, recompute: bool = False) -> dict[dict]:
         """Additional Details about this FeatureSet Artifact
 
         Args:
-            recompute(bool): Recompute the details (default: False)
+            recompute (bool): Recompute the details (default: False)
 
         Returns:
             dict(dict): A dictionary of details about this FeatureSet
@@ -465,8 +465,8 @@ def sample(self, recompute: bool = False) -> pd.DataFrame:
     def outliers(self, scale: float = 1.5, recompute: bool = False) -> pd.DataFrame:
         """Compute outliers for all the numeric columns in a DataSource
         Args:
-            scale(float): The scale to use for the IQR (default: 1.5)
-            recompute(bool): Recompute the outliers (default: False)
+            scale (float): The scale to use for the IQR (default: 1.5)
+            recompute (bool): Recompute the outliers (default: False)
         Returns:
             pd.DataFrame: A DataFrame of outliers from this DataSource
         Notes:
@@ -517,7 +517,7 @@ def correlations(self, recompute: bool = False) -> dict:
     def column_stats(self, recompute: bool = False) -> dict[dict]:
         """Compute Column Stats for all the columns in the FeatureSets underlying DataSource
         Args:
-            recompute(bool): Recompute the column stats (default: False)
+            recompute (bool): Recompute the column stats (default: False)
         Returns:
             dict(dict): A dictionary of stats for each column this format
             NB: String columns will NOT have num_zeros and descriptive_stats