add periods

cleanlab · Jul 17, 2023 · d077d5a · d077d5a
1 parent ad01269
commit d077d5a
Showing 1 changed file with 36 additions and 41 deletions.
diff --git a/cleanlab_studio/studio/studio.py b/cleanlab_studio/studio/studio.py
@@ -23,11 +23,7 @@
 
 
 class Studio:
-    """Used to interact with Cleanlab Studio
-
-    Attributes:
-        _api_key: API key for user interacting with Cleanlab Studio
-    """
+    """Used to interact with Cleanlab Studio."""
 
     _api_key: str
 
@@ -59,17 +55,17 @@ def upload_dataset(
         id_column: Optional[str] = None,
     ) -> str:
         """
-        Uploads a dataset to Cleanlab Studio
+        Uploads a dataset to Cleanlab Studio.
 
         Args:
-            dataset: Object representing the dataset to upload. Currently supported formats include a `str` path to your dataset, a pandas DataFrame, a pyspark DataFrame
-            dataset_name: Name for your dataset in Cleanlab Studio (optional if uploading from filepath)
-            schema_overrides: Optional dictionary of overrides you would like to make to the schema of your dataset. If not provided, schema will be inferred
-            modality: Optional parameter to override the modality of your dataset. If not provided, modality will be inferred
-            id_column: Optional parameter to override the ID column of your dataset. If not provided, a monotonically increasing ID column will be generated
+            dataset: Object representing the dataset to upload. Currently supported formats include a `str` path to your dataset, a pandas DataFrame, a pyspark DataFrame.
+            dataset_name: Name for your dataset in Cleanlab Studio (optional if uploading from filepath).
+            schema_overrides: Optional dictionary of overrides you would like to make to the schema of your dataset. If not provided, schema will be inferred.
+            modality: Optional parameter to override the modality of your dataset. If not provided, modality will be inferred.
+            id_column: Optional parameter to override the ID column of your dataset. If not provided, a monotonically increasing ID column will be generated.
 
         Returns:
-            ID of uploaded dataset
+            ID of uploaded dataset.
         """
         ds = init_dataset_source(dataset, dataset_name)
         return upload.upload_dataset(
@@ -87,15 +83,14 @@ def download_cleanlab_columns(
         to_spark: bool = False,
     ) -> Any:
         """
-        Downloads Cleanlab columns for a cleanset
+        Downloads Cleanlab columns for a cleanset.
 
         Args:
-            cleanset_id: ID of cleanset to download columns from
-            include_action: Whether to include a column with any actions taken on the cleanset in the downloaded columns
+            cleanset_id: ID of cleanset to download columns from.
+            include_action: Whether to include a column with any actions taken on the cleanset in the downloaded columns.
 
         Returns:
-            A pandas or pyspark DataFrame
-            Type Any because don't want to rely on pyspark being installed
+            A pandas or pyspark DataFrame. Type is `Any` to avoid requiring pyspark installation.
         """
         rows_df = api.download_cleanlab_columns(
             self._api_key, cleanset_id, all=True, to_spark=to_spark
@@ -109,15 +104,15 @@ def download_cleanlab_columns(
 
     def apply_corrections(self, cleanset_id: str, dataset: Any, keep_excluded: bool = False) -> Any:
         """
-        Applies corrections from a Cleanlab Studio cleanset to your dataset. Corrections can be made by viewing your project in the Cleanlab Studio webapp
+        Applies corrections from a Cleanlab Studio cleanset to your dataset. Corrections can be made by viewing your project in the Cleanlab Studio webapp.
 
         Args:
-            cleanset_id: ID of cleanset to apply corrections from
-            dataset: Dataset to apply corrections to. Supported formats include pandas DataFrame and pyspark DataFrame. Dataset should have the same number of rows as the dataset used to create the project. It should also contain a label column with the same name as the label column for the project
-            keep_excluded: Whether to retain rows with an "exclude" action. By default these rows will be removed from the dataset
+            cleanset_id: ID of cleanset to apply corrections from.
+            dataset: Dataset to apply corrections to. Supported formats include pandas DataFrame and pyspark DataFrame. Dataset should have the same number of rows as the dataset used to create the project. It should also contain a label column with the same name as the label column for the project.
+            keep_excluded: Whether to retain rows with an "exclude" action. By default these rows will be removed from the dataset.
 
         Returns:
-            A copy of the dataset with corrections applied
+            A copy of the dataset with corrections applied.
         """
         project_id = api.get_project_of_cleanset(self._api_key, cleanset_id)
         label_column = api.get_label_column_of_project(self._api_key, project_id)
@@ -201,20 +196,20 @@ def create_project(
         text_column: Optional[str] = None,
     ) -> str:
         """
-        Creates a Cleanlab Studio project
+        Creates a Cleanlab Studio project.
 
         Args:
-            dataset_id: ID of dataset to create project for
-            project_name: name for resulting project
-            modality: modality of project (i.e. text, tabular, image)
-            task_type: type of classification to perform (i.e. multi-class, multi-label)
-            model_type: type of model to train (i.e. fast, regular)
-            label_column: name of column in dataset containing labels (if not supplied, we'll make our best guess)
-            feature_columns: list of columns to use as features when training tabular modality project (if not supplied and modality is "tabular" we'll use all valid feature columns)
-            text_column: name of column containing the text to train text modality project on (if not supplied and modality is "text" we'll make our best guess)
+            dataset_id: ID of dataset to create project for.
+            project_name: name for resulting project.
+            modality: modality of project (i.e. text, tabular, image).
+            task_type: type of classification to perform (i.e. multi-class, multi-label).
+            model_type: type of model to train (i.e. fast, regular).
+            label_column: name of column in dataset containing labels (if not supplied, we'll make our best guess).
+            feature_columns: list of columns to use as features when training tabular modality project (if not supplied and modality is "tabular" we'll use all valid feature columns).
+            text_column: name of column containing the text to train text modality project on (if not supplied and modality is "text" we'll make our best guess).
 
         Returns:
-            ID of created project
+            ID of created project.
         """
         dataset_details = api.get_dataset_details(self._api_key, dataset_id)
 
@@ -262,35 +257,35 @@ def create_project(
 
     def poll_cleanset_status(self, cleanset_id: str, timeout: Optional[int] = None) -> bool:
         """
-        Polls for cleanset status. Blocks until cleanset is ready, there is a cleanset error, or `timeout` is exceeded
+        Polls for cleanset status. Blocks until cleanset is ready, there is a cleanset error, or `timeout` is exceeded.
 
         Args:
-            cleanset_id: ID of cleanset to check status of
-            timeout: Optional timeout after which to stop polling for progress. If not provided, will block until cleanset is ready
+            cleanset_id: ID of cleanset to check status of.
+            timeout: Optional timeout after which to stop polling for progress. If not provided, will block until cleanset is ready.
 
         Returns:
-            `True` if cleanset is ready, `False` otherwise
+            `True` if cleanset is ready, `False` otherwise.
         """
         return clean.poll_cleanset_status(self._api_key, cleanset_id, timeout)
 
     def get_latest_cleanset_id(self, project_id: str) -> str:
         """
-        Gets latest cleanset ID for a project
+        Gets latest cleanset ID for a project.
 
         Args:
-            project_id: ID of project
+            project_id: ID of project.
 
         Returns:
-            ID of latest associated cleanset
+            ID of latest associated cleanset.
         """
         return api.get_latest_cleanset_id(self._api_key, project_id)
 
     def delete_project(self, project_id: str) -> None:
         """
-        Deletes a project from Cleanlab Studio
+        Deletes a project from Cleanlab Studio.
 
         Args:
-            project_id: ID of project to delete
+            project_id: ID of project to delete.
         """
         api.delete_project(self._api_key, project_id)
         print(f"Successfully deleted project: {project_id}")