cleanlab · ryansingman · Aug 2, 2023 · Jul 12, 2023 · Jul 19, 2023 · Jul 21, 2023
diff --git a/cleanlab_studio/internal/api/api.py b/cleanlab_studio/internal/api/api.py
@@ -1,6 +1,8 @@
+import io
 import os
 import time
-from typing import Callable, List, Optional, Tuple, Union, Any
+from itertools import chain
+from typing import Callable, List, Optional, Tuple, Dict, Union, Any
 from cleanlab_studio.errors import APIError
 
 import requests
@@ -19,12 +21,14 @@
 from cleanlab_studio.internal.types import JSONDict
 from cleanlab_studio.version import __version__
 
+
 base_url = os.environ.get("CLEANLAB_API_BASE_URL", "https://api.cleanlab.ai/api")
 cli_base_url = f"{base_url}/cli/v0"
 upload_base_url = f"{base_url}/upload/v0"
 dataset_base_url = f"{base_url}/datasets"
 project_base_url = f"{base_url}/projects"
 cleanset_base_url = f"{base_url}/cleansets"
+model_base_url = f"{base_url}/v1/deployment"
 
 
 def _construct_headers(
@@ -330,3 +334,51 @@ def poll_progress(
             res = request_function(progress_id)
         pbar.update(float(1) - pbar.n)
     return res
+
+
+def upload_predict_batch(api_key: str, model_id: str, batch: io.StringIO) -> str:
+    """Uploads prediction batch and returns query ID."""
+    url = f"{model_base_url}/{model_id}/upload"
+    res = requests.post(
+        url,
+        headers=_construct_headers(api_key),
+    )
+
+    handle_api_error(res)
+    presigned_url = res.json()["upload_url"]
+    query_id: str = res.json()["query_id"]
+
+    requests.post(presigned_url["url"], data=presigned_url["fields"], files={"file": batch})
+
+    return query_id
+
+
+def start_prediction(api_key: str, model_id: str, query_id: str) -> None:
+    """Starts prediction for query."""
+    res = requests.post(
+        f"{model_base_url}/{model_id}/predict/{query_id}",
+        headers=_construct_headers(api_key),
+    )
+
+    handle_api_error(res)
+
+
+def get_prediction_status(api_key: str, query_id: str) -> Dict[str, str]:
+    """Gets status of model prediction query."""
+    res = requests.get(
+        f"{model_base_url}/predict/{query_id}",
+        headers=_construct_headers(api_key),
+    )
+    handle_api_error(res)
+
+    prediction_results = res.json()
+    status = prediction_results["status"]
+    result_url = prediction_results["results"]
+    error_msg = prediction_results["error_msg"]
+
+    if status == "COMPLETE":
+        return {"status": "done", "result_url": result_url}
+    elif status == "FAILED":
+        return {"status": "error", "error_msg": error_msg}
+    else:
+        return {"status": "running"}
diff --git a/cleanlab_studio/studio/inference.py b/cleanlab_studio/studio/inference.py
@@ -0,0 +1,101 @@
+import abc
+import csv
+import functools
+import io
+import time
+from typing import List, Union, Optional
+
+import numpy as np
+import numpy.typing as npt
+import pandas as pd
+
+from cleanlab_studio.errors import APIError
+from cleanlab_studio.internal.api import api
+
+
+TextBatch = Union[List[str], npt.NDArray[np.str_], pd.Series]
+TabularBatch = Union[pd.DataFrame]
+Batch = Union[TextBatch, TabularBatch]
+
+Predictions = Union[npt.NDArray[np.int_], npt.NDArray[np.str_]]
+ClassProbablities = pd.DataFrame
+
+
+class Model(abc.ABC):
+    """Base class for deployed model inference."""
+
+    def __init__(self, api_key: str, model_id: str):
+        """Initializes model class w/ API key and model ID."""
+        self._api_key = api_key
+        self._model_id = model_id
+
+    def predict(
+        self,
+        batch: Batch,
+        timeout: int = 600,
+    ) -> Union[str, Predictions]:
+        """
+        Gets predictions for batch of examples.
+
+        Args:
+            batch: batch of example to predict classes for
+            timeout: optional parameter to set timeout for predictions in seconds
+
+        Returns:
+            predictions from batch as a numpy array or an error message if predictions fail
+        """
+        csv_batch = self._convert_batch_to_csv(batch)
+        return self._predict(csv_batch, timeout)
+
+    def _predict(self, batch: io.StringIO, timeout: int) -> Union[str, Predictions]:
+        """Gets predictions for batch of examples.
+
+        :param batch: batch of example to predict classes for, as in-memory CSV file
+        :return: predictions from batch
+        """
+        query_id: str = api.upload_predict_batch(self._api_key, self._model_id, batch)
+        api.start_prediction(self._api_key, self._model_id, query_id)
+
+        resp = api.get_prediction_status(self._api_key, query_id)
+        status: Optional[str] = resp["status"]
+        # Set timeout to prevent users from getting stuck indefinitely when there is a failure
+        timeout_limit = time.time() + timeout
+
+        while status == "running" and time.time() < timeout_limit:
+            resp = api.get_prediction_status(self._api_key, query_id)
+            status = resp["status"]
+            # Set time.sleep so that the while loop doesn't flood backend with api calls
+            time.sleep(3)
+
+        if status == "error":
+            raise APIError(resp["error_msg"])
+        else:
+            result_url = resp["result_url"]
+            results_converted: Predictions = pd.read_csv(result_url).to_numpy()
+            return results_converted
+
+    @staticmethod
+    def _convert_batch_to_csv(batch: Batch) -> io.StringIO:
+        """Converts batch object to CSV string IO."""
+        sio = io.StringIO()
+
+        # handle text batches
+        if isinstance(batch, (list, np.ndarray, pd.Series)):
+            writer = csv.writer(sio)
+
+            # write header
+            writer.writerow(["text"])
+
+            # write labels to CSV
+            for input_data in batch:
+                writer.writerow([input_data])
+
+        # handle tabular batches
+        elif isinstance(batch, pd.DataFrame):
+            batch.to_csv(sio)
+
+        else:
+            raise TypeError(f"Invalid type of batch: {type(batch)}")
+
+        sio.seek(0)
+        return sio
diff --git a/cleanlab_studio/studio/studio.py b/cleanlab_studio/studio/studio.py
@@ -7,7 +7,7 @@
 import numpy.typing as npt
 import pandas as pd
 
-from . import clean, upload
+from . import clean, upload, inference
 from cleanlab_studio.internal.api import api
 from cleanlab_studio.internal.util import (
     init_dataset_source,
@@ -290,6 +290,18 @@ def delete_project(self, project_id: str) -> None:
         api.delete_project(self._api_key, project_id)
         print(f"Successfully deleted project: {project_id}")
 
+    def get_model(self, model_id: str) -> inference.Model:
+        """
+        Gets a model deployed by Cleanlab Studio.
+
+        Args:
+            model_id: ID of model to get. This ID should be fetched in the deployments page of the app UI.
+
+        Returns:
+            Model object with methods run predictions on new input data
+        """
+        return inference.Model(self._api_key, model_id)
+
     class Experimental:
         def __init__(self, outer):  # type: ignore
             self._outer = outer