From cafec6184ad355855d95a2fa7472eaa700494514 Mon Sep 17 00:00:00 2001
From: ryansingman <ryanjs@vt.edu>
Date: Wed, 12 Jul 2023 16:23:11 -0600
Subject: [PATCH 01/42] adds inference API (v0)

---
 cleanlab_studio/internal/api/api.py | 53 ++++++++++++++++++-
 cleanlab_studio/studio/inference.py | 81 +++++++++++++++++++++++++++++
 2 files changed, 133 insertions(+), 1 deletion(-)
 create mode 100644 cleanlab_studio/studio/inference.py

diff --git a/cleanlab_studio/internal/api/api.py b/cleanlab_studio/internal/api/api.py
index fde0f230..ae9157fe 100644
--- a/cleanlab_studio/internal/api/api.py
+++ b/cleanlab_studio/internal/api/api.py
@@ -1,8 +1,10 @@
+import io
 import os
 import time
-from typing import Callable, List, Optional, Tuple, Union, Any
+from typing import Callable, List, Optional, Tuple, Dict, Union, Any
 from cleanlab_studio.errors import APIError
 
+import aiohttp
 import requests
 from tqdm import tqdm
 import pandas as pd
@@ -25,6 +27,7 @@
 dataset_base_url = f"{base_url}/datasets"
 project_base_url = f"{base_url}/projects"
 cleanset_base_url = f"{base_url}/cleansets"
+model_base_url = f"{base_url}/models"
 
 
 def _construct_headers(
@@ -329,3 +332,51 @@ def poll_progress(
             res = request_function(progress_id)
         pbar.update(float(1) - pbar.n)
     return res
+
+
+async def upload_predict_batch(api_key: str, model_id: str, batch: io.StringIO) -> str:
+    """Uploads prediction batch and returns query ID."""
+    async with aiohttp.ClientSession() as session:
+        async with session.post(
+            f"{model_base_url}/{model_id}/upload",
+            headers=_construct_headers(api_key),
+        ) as resp:
+            resp_json = await resp.json()
+            handle_api_error_from_json(resp_json)
+
+            query_id: str = resp_json["query_id"]
+            upload_url: str = resp_json["upload_url"]
+
+        session.put(upload_url, data=batch)
+
+    return query_id
+
+
+async def start_prediction(api_key: str, model_id: str, query_id: str) -> None:
+    """Starts prediction for query."""
+    async with aiohttp.ClientSession() as session:
+        async with session.post(
+            f"{model_base_url}/{model_id}/predict/{query_id}",
+            headers=_construct_headers(api_key),
+        ) as resp:
+            handle_api_error_from_json(await resp.json())
+
+
+async def get_prediction_status(api_key: str, model_id: str, query_id: str) -> Dict[str, str]:
+    """Gets status of model prediction query."""
+    async with aiohttp.ClientSession() as session:
+        async with session.get(
+            f"{model_base_url}/{model_id}/predict/{query_id}",
+            headers=_construct_headers(api_key),
+        ) as resp:
+            resp_json = await resp.json()
+            handle_api_error_from_json(resp_json)
+
+            return resp_json
+
+
+async def download_prediction_results(result_url: str) -> io.StringIO:
+    """Downloads prediction results from presigned URL."""
+    async with aiohttp.ClientSession() as session:
+        async with session.get(result_url) as resp:
+            return io.StringIO(await resp.text())
diff --git a/cleanlab_studio/studio/inference.py b/cleanlab_studio/studio/inference.py
new file mode 100644
index 00000000..9dc64957
--- /dev/null
+++ b/cleanlab_studio/studio/inference.py
@@ -0,0 +1,81 @@
+import abc
+import io
+from typing import Any, Awaitable, Tuple
+
+import numpy as np
+import numpy.typing as npt
+import pandas as pd
+
+from cleanlab_studio.internal.api import api
+
+
+Predictions = npt.NDArray[np.int_] | npt.NDArray[np.str_]
+ClassProbablities = pd.DataFrame
+
+
+class Model(abc.ABC):
+    """Base class for deployed model inference."""
+
+    def __init__(self, api_key: str, model_id: str):
+        """Initializes model class w/ API key and model ID."""
+        self._api_key = api_key
+        self._model_id = model_id
+
+    @abc.abstractmethod
+    def predict(
+        self, batch: Any, return_pred_proba: bool = False
+    ) -> Predictions | Tuple[Predictions, ClassProbablities]:
+        """Gets predictions for batch of examples, optionally returning class probabilities.
+
+        :param batch: batch of example to predict classes for
+        :param return_pred_proba: if should return class probabilities, defaults to False
+        :return: predictions + class probabilities, if requested
+        """
+        raise NotImplementedError
+
+    def _predict(
+        self, batch: io.StringIO, return_pred_proba: bool
+    ) -> Predictions | Tuple[Predictions, ClassProbablities]:
+        """Gets predictions for batch of examples, optionally returning class probabilities.
+
+        :param batch: batch of example to predict classes for, as in-memory CSV file
+        :param return_pred_proba: if should return class probabilities
+        :return: predictions + class probabilities, if requested
+        """
+        return asyncio.run(self._predict_async(batch, return_pred_proba))
+
+    @abc.abstractmethod
+    async def predict_async(
+        self, batch: Any, return_pred_proba: bool = False
+    ) -> Awaitable[Predictions] | Awaitable[Tuple[Predictions, ClassProbablities]]:
+        """Asynchronously gets predictions for batch of examples, optionally returning class probabilities.
+
+        :param batch: batch of example to predict classes for
+        :param return_pred_proba: if should return class probabilities, defaults to False
+        :return: predictions + class probabilities, if requested
+        """
+        raise NotImplementedError
+
+    async def _predict_async(
+        self, batch: io.StringIO, return_pred_proba: bool
+    ) -> Predictions | Tuple[Predictions, ClassProbablities]:
+        """Asynchronously gets predictions for batch of examples, optionally returning class probabilities.
+
+        :param batch: batch of example to predict classes for, as in-memory CSV file
+        :param return_pred_proba: if should return class probabilities, defaults to False
+        :return: predictions + class probabilities, if requested
+        """
+        query_id: str = await api.upload_predict_batch(self._api_key, self._model_id, batch)
+        await api.start_prediction(self._api_key, self._model_id, query_id)
+
+        status: str | None = None
+        result_url: str = ""
+        while status != "done":
+            status, result_url = await api.get_prediction_status(
+                self._api_key, self._model_id, query_id
+            )
+
+        # TODO handle get pred proba case
+        return pd.read_csv(
+            await api.download_prediction_results(result_url),
+        ).values

From 55f93a28e21ff0b2db30372232f9cb2328888c2a Mon Sep 17 00:00:00 2001
From: taekang1618 <tony@cleanlab.ai>
Date: Thu, 20 Jul 2023 01:27:33 +0900
Subject: [PATCH 02/42] modify client to fit backend api endpoints

---
 cleanlab_studio/internal/api/api.py | 25 ++++++++++++++++---------
 1 file changed, 16 insertions(+), 9 deletions(-)

diff --git a/cleanlab_studio/internal/api/api.py b/cleanlab_studio/internal/api/api.py
index ae9157fe..7d38b8f0 100644
--- a/cleanlab_studio/internal/api/api.py
+++ b/cleanlab_studio/internal/api/api.py
@@ -27,7 +27,7 @@
 dataset_base_url = f"{base_url}/datasets"
 project_base_url = f"{base_url}/projects"
 cleanset_base_url = f"{base_url}/cleansets"
-model_base_url = f"{base_url}/models"
+model_base_url = f"{base_url}/v1/deployment"
 
 
 def _construct_headers(
@@ -344,29 +344,36 @@ async def upload_predict_batch(api_key: str, model_id: str, batch: io.StringIO)
             resp_json = await resp.json()
             handle_api_error_from_json(resp_json)
 
-            query_id: str = resp_json["query_id"]
             upload_url: str = resp_json["upload_url"]
 
-        session.put(upload_url, data=batch)
+        session.put(upload_url["url"], data=upload_url["fields"], files=batch)
 
-    return query_id
+        return upload_url["fields"]["key"]
 
 
-async def start_prediction(api_key: str, model_id: str, query_id: str) -> None:
+async def start_prediction(api_key: str, model_id: str, s3_key: str) -> None:
     """Starts prediction for query."""
     async with aiohttp.ClientSession() as session:
         async with session.post(
-            f"{model_base_url}/{model_id}/predict/{query_id}",
+            f"{model_base_url}/{model_id}/predict",
             headers=_construct_headers(api_key),
+            data={
+                "s3_key": s3_key,
+            }
         ) as resp:
-            handle_api_error_from_json(await resp.json())
+            resp_json = await resp.json()
+            handle_api_error_from_json(resp_json)
+
+            query_id: str = resp_json["id"]
+
+            return query_id
 
 
-async def get_prediction_status(api_key: str, model_id: str, query_id: str) -> Dict[str, str]:
+async def get_prediction_status(api_key: str, query_id: str) -> Dict[str, str]:
     """Gets status of model prediction query."""
     async with aiohttp.ClientSession() as session:
         async with session.get(
-            f"{model_base_url}/{model_id}/predict/{query_id}",
+            f"{model_base_url}/predict/{query_id}",
             headers=_construct_headers(api_key),
         ) as resp:
             resp_json = await resp.json()

From 9257172d12cd61669bd97a82d8d99e2b09f0abca Mon Sep 17 00:00:00 2001
From: taekang1618 <tony@cleanlab.ai>
Date: Sat, 22 Jul 2023 01:01:07 +0900
Subject: [PATCH 03/42] modify cli to make model prediction work

---
 cleanlab_studio/internal/api/api.py | 79 +++++++++++++++--------------
 1 file changed, 42 insertions(+), 37 deletions(-)

diff --git a/cleanlab_studio/internal/api/api.py b/cleanlab_studio/internal/api/api.py
index 7d38b8f0..0b0ede71 100644
--- a/cleanlab_studio/internal/api/api.py
+++ b/cleanlab_studio/internal/api/api.py
@@ -21,6 +21,8 @@
 from cleanlab_studio.internal.types import JSONDict
 from cleanlab_studio.version import __version__
 
+os.environ["CLEANLAB_API_BASE_URL"] = "http://localhost:8500/api"
+
 base_url = os.environ.get("CLEANLAB_API_BASE_URL", "https://api.cleanlab.ai/api")
 cli_base_url = f"{base_url}/cli/v0"
 upload_base_url = f"{base_url}/upload/v0"
@@ -334,56 +336,59 @@ def poll_progress(
     return res
 
 
-async def upload_predict_batch(api_key: str, model_id: str, batch: io.StringIO) -> str:
+def upload_predict_batch(api_key: str, model_id: str, batch: io.StringIO) -> str:
     """Uploads prediction batch and returns query ID."""
-    async with aiohttp.ClientSession() as session:
-        async with session.post(
-            f"{model_base_url}/{model_id}/upload",
-            headers=_construct_headers(api_key),
-        ) as resp:
-            resp_json = await resp.json()
-            handle_api_error_from_json(resp_json)
+    res = requests.post(
+        f"{model_base_url}/{model_id}/upload",
+        headers=_construct_headers(api_key),
+    )
 
-            upload_url: str = resp_json["upload_url"]
+    handle_api_error(res)
+    presigned_url = res.json()["upload_url"]
 
-        session.put(upload_url["url"], data=upload_url["fields"], files=batch)
+    requests.post(presigned_url["url"], data=presigned_url["fields"], files={"file": batch})
 
-        return upload_url["fields"]["key"]
+    return presigned_url["fields"]["key"]
 
 
-async def start_prediction(api_key: str, model_id: str, s3_key: str) -> None:
+def start_prediction(api_key: str, model_id: str, s3_key: str) -> str:
     """Starts prediction for query."""
-    async with aiohttp.ClientSession() as session:
-        async with session.post(
-            f"{model_base_url}/{model_id}/predict",
-            headers=_construct_headers(api_key),
-            data={
-                "s3_key": s3_key,
-            }
-        ) as resp:
-            resp_json = await resp.json()
-            handle_api_error_from_json(resp_json)
+    res = requests.post(
+        f"{model_base_url}/{model_id}/predict",
+        headers=_construct_headers(api_key),
+        json={
+            "s3_key": s3_key,
+        }
+    )
 
-            query_id: str = resp_json["id"]
+    handle_api_error(res)
+    query_id: str = res.json()["id"]
 
-            return query_id
+    return query_id
 
 
-async def get_prediction_status(api_key: str, query_id: str) -> Dict[str, str]:
+def get_prediction_status(api_key: str, query_id: str) -> Dict[str, str]:
     """Gets status of model prediction query."""
-    async with aiohttp.ClientSession() as session:
-        async with session.get(
-            f"{model_base_url}/predict/{query_id}",
-            headers=_construct_headers(api_key),
-        ) as resp:
-            resp_json = await resp.json()
-            handle_api_error_from_json(resp_json)
+    res = requests.get(
+        f"{model_base_url}/predict/{query_id}",
+        headers=_construct_headers(api_key),
+    )
+    handle_api_error(res)
+
+    prediction_results = res.json()
+    status = prediction_results["status"]
+    result_url = prediction_results["results"]
+    error_msg = prediction_results["error_msg"]
 
-            return resp_json
+    if prediction_results["status"] == "COMPLETE":
+        return {"status": status, "result_url": result_url}
+    elif prediction_results["status"] == "FAILED":
+        return {"status": status, "error_msg": error_msg}
+    else:
+        return {"status": status}
 
 
-async def download_prediction_results(result_url: str) -> io.StringIO:
+def download_prediction_results(result_url: str) -> io.StringIO:
     """Downloads prediction results from presigned URL."""
-    async with aiohttp.ClientSession() as session:
-        async with session.get(result_url) as resp:
-            return io.StringIO(await resp.text())
+    res = requests.get(result_url)
+    return io.StringIO(res.text)

From 79b5cde1eac9297c00b98fc984a22f5470dd8043 Mon Sep 17 00:00:00 2001
From: taekang1618 <tony@cleanlab.ai>
Date: Sat, 22 Jul 2023 01:16:43 +0900
Subject: [PATCH 04/42] black

---
 cleanlab_studio/internal/api/api.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cleanlab_studio/internal/api/api.py b/cleanlab_studio/internal/api/api.py
index 0b0ede71..28ef7ae8 100644
--- a/cleanlab_studio/internal/api/api.py
+++ b/cleanlab_studio/internal/api/api.py
@@ -358,7 +358,7 @@ def start_prediction(api_key: str, model_id: str, s3_key: str) -> str:
         headers=_construct_headers(api_key),
         json={
             "s3_key": s3_key,
-        }
+        },
     )
 
     handle_api_error(res)

From 92d973cd33efc0c85e9e2f33b659cd6310ba84b3 Mon Sep 17 00:00:00 2001
From: taekang1618 <tony@cleanlab.ai>
Date: Mon, 24 Jul 2023 10:14:38 -0700
Subject: [PATCH 05/42] modify cli after testing api endpoints

---
 cleanlab_studio/internal/api/api.py | 13 ++++---------
 cleanlab_studio/studio/inference.py | 16 ++++++++--------
 2 files changed, 12 insertions(+), 17 deletions(-)

diff --git a/cleanlab_studio/internal/api/api.py b/cleanlab_studio/internal/api/api.py
index 28ef7ae8..573e5cfb 100644
--- a/cleanlab_studio/internal/api/api.py
+++ b/cleanlab_studio/internal/api/api.py
@@ -345,26 +345,21 @@ def upload_predict_batch(api_key: str, model_id: str, batch: io.StringIO) -> str
 
     handle_api_error(res)
     presigned_url = res.json()["upload_url"]
+    query_id = res.json()["query_id"]
 
     requests.post(presigned_url["url"], data=presigned_url["fields"], files={"file": batch})
 
-    return presigned_url["fields"]["key"]
+    return query_id
 
 
-def start_prediction(api_key: str, model_id: str, s3_key: str) -> str:
+def start_prediction(api_key: str, model_id: str, query_id: str) -> None:
     """Starts prediction for query."""
     res = requests.post(
-        f"{model_base_url}/{model_id}/predict",
+        f"{model_base_url}/{model_id}/predict/{query_id}",
         headers=_construct_headers(api_key),
-        json={
-            "s3_key": s3_key,
-        },
     )
 
     handle_api_error(res)
-    query_id: str = res.json()["id"]
-
-    return query_id
 
 
 def get_prediction_status(api_key: str, query_id: str) -> Dict[str, str]:
diff --git a/cleanlab_studio/studio/inference.py b/cleanlab_studio/studio/inference.py
index 9dc64957..d1947531 100644
--- a/cleanlab_studio/studio/inference.py
+++ b/cleanlab_studio/studio/inference.py
@@ -42,10 +42,10 @@ def _predict(
         :param return_pred_proba: if should return class probabilities
         :return: predictions + class probabilities, if requested
         """
-        return asyncio.run(self._predict_async(batch, return_pred_proba))
+        return self._predict_async(batch, return_pred_proba)
 
     @abc.abstractmethod
-    async def predict_async(
+    def predict_async(
         self, batch: Any, return_pred_proba: bool = False
     ) -> Awaitable[Predictions] | Awaitable[Tuple[Predictions, ClassProbablities]]:
         """Asynchronously gets predictions for batch of examples, optionally returning class probabilities.
@@ -56,7 +56,7 @@ async def predict_async(
         """
         raise NotImplementedError
 
-    async def _predict_async(
+    def _predict_async(
         self, batch: io.StringIO, return_pred_proba: bool
     ) -> Predictions | Tuple[Predictions, ClassProbablities]:
         """Asynchronously gets predictions for batch of examples, optionally returning class probabilities.
@@ -65,17 +65,17 @@ async def _predict_async(
         :param return_pred_proba: if should return class probabilities, defaults to False
         :return: predictions + class probabilities, if requested
         """
-        query_id: str = await api.upload_predict_batch(self._api_key, self._model_id, batch)
-        await api.start_prediction(self._api_key, self._model_id, query_id)
+        query_id: str = api.upload_predict_batch(self._api_key, self._model_id, batch)
+        api.start_prediction(self._api_key, self._model_id, query_id)
 
         status: str | None = None
         result_url: str = ""
         while status != "done":
-            status, result_url = await api.get_prediction_status(
-                self._api_key, self._model_id, query_id
+            status, result_url = api.get_prediction_status(
+                self._api_key, query_id
             )
 
         # TODO handle get pred proba case
         return pd.read_csv(
-            await api.download_prediction_results(result_url),
+            api.download_prediction_results(result_url),
         ).values

From 967722b45e8161bc917ece7916a1f234c586863b Mon Sep 17 00:00:00 2001
From: taekang1618 <tony@cleanlab.ai>
Date: Mon, 24 Jul 2023 10:34:04 -0700
Subject: [PATCH 06/42] black

---
 cleanlab_studio/studio/inference.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/cleanlab_studio/studio/inference.py b/cleanlab_studio/studio/inference.py
index d1947531..45158167 100644
--- a/cleanlab_studio/studio/inference.py
+++ b/cleanlab_studio/studio/inference.py
@@ -71,9 +71,7 @@ def _predict_async(
         status: str | None = None
         result_url: str = ""
         while status != "done":
-            status, result_url = api.get_prediction_status(
-                self._api_key, query_id
-            )
+            status, result_url = api.get_prediction_status(self._api_key, query_id)
 
         # TODO handle get pred proba case
         return pd.read_csv(

From 3d63153033a0c12587588d756565a6a860697efb Mon Sep 17 00:00:00 2001
From: ryansingman <ryanjs@vt.edu>
Date: Mon, 24 Jul 2023 14:07:03 -0600
Subject: [PATCH 07/42] integrate into Studio class

---
 cleanlab_studio/studio/inference.py | 85 ++++++++++++++++-------------
 cleanlab_studio/studio/studio.py    |  6 +-
 2 files changed, 51 insertions(+), 40 deletions(-)

diff --git a/cleanlab_studio/studio/inference.py b/cleanlab_studio/studio/inference.py
index 45158167..f00bf940 100644
--- a/cleanlab_studio/studio/inference.py
+++ b/cleanlab_studio/studio/inference.py
@@ -1,6 +1,8 @@
 import abc
+import csv
+import functools
 import io
-from typing import Any, Awaitable, Tuple
+from typing import List
 
 import numpy as np
 import numpy.typing as npt
@@ -9,6 +11,10 @@
 from cleanlab_studio.internal.api import api
 
 
+TextBatch = List[str] | npt.NDArray[np.str_] | pd.Series
+TabularBatch = pd.DataFrame
+Batch = TextBatch | TabularBatch
+
 Predictions = npt.NDArray[np.int_] | npt.NDArray[np.str_]
 ClassProbablities = pd.DataFrame
 
@@ -21,49 +27,23 @@ def __init__(self, api_key: str, model_id: str):
         self._api_key = api_key
         self._model_id = model_id
 
-    @abc.abstractmethod
     def predict(
-        self, batch: Any, return_pred_proba: bool = False
-    ) -> Predictions | Tuple[Predictions, ClassProbablities]:
-        """Gets predictions for batch of examples, optionally returning class probabilities.
-
-        :param batch: batch of example to predict classes for
-        :param return_pred_proba: if should return class probabilities, defaults to False
-        :return: predictions + class probabilities, if requested
-        """
-        raise NotImplementedError
-
-    def _predict(
-        self, batch: io.StringIO, return_pred_proba: bool
-    ) -> Predictions | Tuple[Predictions, ClassProbablities]:
-        """Gets predictions for batch of examples, optionally returning class probabilities.
-
-        :param batch: batch of example to predict classes for, as in-memory CSV file
-        :param return_pred_proba: if should return class probabilities
-        :return: predictions + class probabilities, if requested
-        """
-        return self._predict_async(batch, return_pred_proba)
-
-    @abc.abstractmethod
-    def predict_async(
-        self, batch: Any, return_pred_proba: bool = False
-    ) -> Awaitable[Predictions] | Awaitable[Tuple[Predictions, ClassProbablities]]:
-        """Asynchronously gets predictions for batch of examples, optionally returning class probabilities.
+        self,
+        batch: Batch,
+    ) -> Predictions:
+        """Gets predictions for batch of examples.
 
         :param batch: batch of example to predict classes for
-        :param return_pred_proba: if should return class probabilities, defaults to False
-        :return: predictions + class probabilities, if requested
+        :return: predictions from batch
         """
-        raise NotImplementedError
+        csv_batch = self._convert_batch_to_csv(batch)
+        return self._predict(csv_batch)
 
-    def _predict_async(
-        self, batch: io.StringIO, return_pred_proba: bool
-    ) -> Predictions | Tuple[Predictions, ClassProbablities]:
-        """Asynchronously gets predictions for batch of examples, optionally returning class probabilities.
+    def _predict(self, batch: io.StringIO) -> Predictions:
+        """Gets predictions for batch of examples.
 
         :param batch: batch of example to predict classes for, as in-memory CSV file
-        :param return_pred_proba: if should return class probabilities, defaults to False
-        :return: predictions + class probabilities, if requested
+        :return: predictions from batch
         """
         query_id: str = api.upload_predict_batch(self._api_key, self._model_id, batch)
         api.start_prediction(self._api_key, self._model_id, query_id)
@@ -71,9 +51,36 @@ def _predict_async(
         status: str | None = None
         result_url: str = ""
         while status != "done":
-            status, result_url = api.get_prediction_status(self._api_key, query_id)
+            resp = api.get_prediction_status(self._api_key, query_id)
+            status = resp["status"]
+            result_url = resp["result_url"]
 
-        # TODO handle get pred proba case
         return pd.read_csv(
             api.download_prediction_results(result_url),
         ).values
+
+    @functools.singledispatchmethod
+    def _convert_batch_to_csv(self, batch: Batch) -> io.StringIO:
+        """Converts batch object to CSV string IO."""
+        sio = io.StringIO()
+
+        # handle text batches
+        if isinstance(batch, (list, np.ndarray, pd.Series)):
+            writer = csv.writer(sio)
+
+            # write header
+            writer.writerow(["label"])
+
+            # write labels to CSV
+            for label in batch:
+                writer.writerow([label])
+
+        # handle tabular batches
+        elif isinstance(batch, pd.DataFrame):
+            batch.to_csv(sio)
+
+        else:
+            raise TypeError(f"Invalid type of batch: {type(batch)}")
+
+        sio.seek(0)
+        return sio
diff --git a/cleanlab_studio/studio/studio.py b/cleanlab_studio/studio/studio.py
index c0eacc11..56d38a8e 100644
--- a/cleanlab_studio/studio/studio.py
+++ b/cleanlab_studio/studio/studio.py
@@ -4,7 +4,7 @@
 import numpy.typing as npt
 import pandas as pd
 
-from . import clean, upload
+from . import clean, upload, inference
 from cleanlab_studio.internal.api import api
 from cleanlab_studio.internal.util import init_dataset_source, check_none, check_not_none
 from cleanlab_studio.internal.settings import CleanlabSettings
@@ -231,6 +231,10 @@ def delete_project(self, project_id: str) -> None:
         api.delete_project(self._api_key, project_id)
         print(f"Successfully deleted project: {project_id}")
 
+    def get_model(self, model_id: str) -> inference.Model:
+        """Creates model object from model ID, to use for inference."""
+        return inference.Model(self._api_key, model_id)
+
     class Experimental:
         def __init__(self, outer):  # type: ignore
             self._outer = outer

From d81b2096afc3042f73a2047a3e76ae3f491da0bb Mon Sep 17 00:00:00 2001
From: ryansingman <ryanjs@vt.edu>
Date: Mon, 24 Jul 2023 14:23:50 -0600
Subject: [PATCH 08/42] remove base url override

---
 cleanlab_studio/internal/api/api.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/cleanlab_studio/internal/api/api.py b/cleanlab_studio/internal/api/api.py
index 573e5cfb..ce565c36 100644
--- a/cleanlab_studio/internal/api/api.py
+++ b/cleanlab_studio/internal/api/api.py
@@ -21,8 +21,6 @@
 from cleanlab_studio.internal.types import JSONDict
 from cleanlab_studio.version import __version__
 
-os.environ["CLEANLAB_API_BASE_URL"] = "http://localhost:8500/api"
-
 base_url = os.environ.get("CLEANLAB_API_BASE_URL", "https://api.cleanlab.ai/api")
 cli_base_url = f"{base_url}/cli/v0"
 upload_base_url = f"{base_url}/upload/v0"
@@ -338,8 +336,10 @@ def poll_progress(
 
 def upload_predict_batch(api_key: str, model_id: str, batch: io.StringIO) -> str:
     """Uploads prediction batch and returns query ID."""
+    url = f"{model_base_url}/{model_id}/upload"
+    print(f"upload {url=}")
     res = requests.post(
-        f"{model_base_url}/{model_id}/upload",
+        url,
         headers=_construct_headers(api_key),
     )
 

From 865cf032928373d70a5404e4fc400e98fc8eec88 Mon Sep 17 00:00:00 2001
From: taekang1618 <tony@cleanlab.ai>
Date: Mon, 24 Jul 2023 14:00:43 -0700
Subject: [PATCH 09/42] fix api endpoint for client to work

---
 cleanlab_studio/internal/api/api.py | 11 ++++++-----
 cleanlab_studio/studio/inference.py |  7 ++++---
 2 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/cleanlab_studio/internal/api/api.py b/cleanlab_studio/internal/api/api.py
index ce565c36..8ba68fe6 100644
--- a/cleanlab_studio/internal/api/api.py
+++ b/cleanlab_studio/internal/api/api.py
@@ -21,6 +21,7 @@
 from cleanlab_studio.internal.types import JSONDict
 from cleanlab_studio.version import __version__
 
+
 base_url = os.environ.get("CLEANLAB_API_BASE_URL", "https://api.cleanlab.ai/api")
 cli_base_url = f"{base_url}/cli/v0"
 upload_base_url = f"{base_url}/upload/v0"
@@ -375,12 +376,12 @@ def get_prediction_status(api_key: str, query_id: str) -> Dict[str, str]:
     result_url = prediction_results["results"]
     error_msg = prediction_results["error_msg"]
 
-    if prediction_results["status"] == "COMPLETE":
-        return {"status": status, "result_url": result_url}
-    elif prediction_results["status"] == "FAILED":
-        return {"status": status, "error_msg": error_msg}
+    if status == "COMPLETE":
+        return {"status": "done", "result_url": result_url}
+    elif status == "FAILED":
+        return {"status": "error", "error_msg": error_msg}
     else:
-        return {"status": status}
+        return {"status": "running"}
 
 
 def download_prediction_results(result_url: str) -> io.StringIO:
diff --git a/cleanlab_studio/studio/inference.py b/cleanlab_studio/studio/inference.py
index f00bf940..ce07dc9d 100644
--- a/cleanlab_studio/studio/inference.py
+++ b/cleanlab_studio/studio/inference.py
@@ -48,12 +48,13 @@ def _predict(self, batch: io.StringIO) -> Predictions:
         query_id: str = api.upload_predict_batch(self._api_key, self._model_id, batch)
         api.start_prediction(self._api_key, self._model_id, query_id)
 
-        status: str | None = None
-        result_url: str = ""
+        resp = api.get_prediction_status(self._api_key, query_id)
+        status: str | None = resp["status"]
         while status != "done":
             resp = api.get_prediction_status(self._api_key, query_id)
             status = resp["status"]
-            result_url = resp["result_url"]
+
+        result_url = resp["result_url"]
 
         return pd.read_csv(
             api.download_prediction_results(result_url),

From c88f90b945e3d7dcf1c6e2a8c3417f1bfd93114e Mon Sep 17 00:00:00 2001
From: taekang1618 <tony@cleanlab.ai>
Date: Mon, 24 Jul 2023 15:08:25 -0700
Subject: [PATCH 10/42] modify code to support text files without headers

---
 cleanlab_studio/internal/api/api.py | 16 +++++++++++++--
 cleanlab_studio/studio/inference.py | 21 ++++++++++---------
 tests/models/test_prediction.py     | 31 +++++++++++++++++++++++++++++
 3 files changed, 55 insertions(+), 13 deletions(-)
 create mode 100644 tests/models/test_prediction.py

diff --git a/cleanlab_studio/internal/api/api.py b/cleanlab_studio/internal/api/api.py
index 8ba68fe6..c1504516 100644
--- a/cleanlab_studio/internal/api/api.py
+++ b/cleanlab_studio/internal/api/api.py
@@ -1,6 +1,8 @@
 import io
 import os
 import time
+from itertools import chain
+from shutil import copyfileobj
 from typing import Callable, List, Optional, Tuple, Dict, Union, Any
 from cleanlab_studio.errors import APIError
 
@@ -338,7 +340,6 @@ def poll_progress(
 def upload_predict_batch(api_key: str, model_id: str, batch: io.StringIO) -> str:
     """Uploads prediction batch and returns query ID."""
     url = f"{model_base_url}/{model_id}/upload"
-    print(f"upload {url=}")
     res = requests.post(
         url,
         headers=_construct_headers(api_key),
@@ -347,8 +348,19 @@ def upload_predict_batch(api_key: str, model_id: str, batch: io.StringIO) -> str
     handle_api_error(res)
     presigned_url = res.json()["upload_url"]
     query_id = res.json()["query_id"]
+    header = res.json()["header"]
+    if header:
+        batch_header = batch.readline()
+        if batch_header == header:
+            input_batch = batch
+        else:
+            header_io = io.StringIO(header)
+            batch_header_io = io.StringIO(batch_header)
+            input_batch = io.StringIO("\n".join(chain(header_io, batch_header_io, batch)))
+    else:
+        input_batch = batch
 
-    requests.post(presigned_url["url"], data=presigned_url["fields"], files={"file": batch})
+    requests.post(presigned_url["url"], data=presigned_url["fields"], files={"file": input_batch})
 
     return query_id
 
diff --git a/cleanlab_studio/studio/inference.py b/cleanlab_studio/studio/inference.py
index ce07dc9d..6cf1832a 100644
--- a/cleanlab_studio/studio/inference.py
+++ b/cleanlab_studio/studio/inference.py
@@ -50,15 +50,17 @@ def _predict(self, batch: io.StringIO) -> Predictions:
 
         resp = api.get_prediction_status(self._api_key, query_id)
         status: str | None = resp["status"]
-        while status != "done":
+        while status == "running":
             resp = api.get_prediction_status(self._api_key, query_id)
             status = resp["status"]
 
-        result_url = resp["result_url"]
-
-        return pd.read_csv(
-            api.download_prediction_results(result_url),
-        ).values
+        if status == "error":
+            return resp["error_msg"]
+        else:
+            result_url = resp["result_url"]
+            return pd.read_csv(
+                api.download_prediction_results(result_url),
+            ).values
 
     @functools.singledispatchmethod
     def _convert_batch_to_csv(self, batch: Batch) -> io.StringIO:
@@ -69,12 +71,9 @@ def _convert_batch_to_csv(self, batch: Batch) -> io.StringIO:
         if isinstance(batch, (list, np.ndarray, pd.Series)):
             writer = csv.writer(sio)
 
-            # write header
-            writer.writerow(["label"])
-
             # write labels to CSV
-            for label in batch:
-                writer.writerow([label])
+            for input_data in batch:
+                writer.writerow([input_data])
 
         # handle tabular batches
         elif isinstance(batch, pd.DataFrame):
diff --git a/tests/models/test_prediction.py b/tests/models/test_prediction.py
new file mode 100644
index 00000000..bd60c50a
--- /dev/null
+++ b/tests/models/test_prediction.py
@@ -0,0 +1,31 @@
+import os
+
+os.environ["CLEANLAB_API_BASE_URL"] = "https://api.dev-bc26qf4m.cleanlab.ai/api"
+# os.environ["CLEANLAB_API_BASE_URL"] = "http://localhost:8500/api"
+
+from cleanlab_studio import Studio
+import pandas as pd
+
+
+API_KEY = "350b3ee6fbe64d21a6012ea281ce0ca1"
+MODEL_ID = "cea761848e5f449b85e34fe347696b53"
+# API_KEY = "75f2ab8c962c40169917136756c5d937"
+# MODEL_ID = "750dbdfb6549470192573b9646be40e9"
+BATCH = pd.read_csv("/Users/tony/test_files/text_amazon_reviews_test_small.csv")
+TEXT_BATCH = [
+    "This magazine was great for the times but as with all other technology magazines the new stuff isn't as good a lot of advertisments and reviews seem biased.",
+    "We ordered this magazine for our grandson (then 7 going on 30) who was/is deploy into technology. He really enjoyed every issue.",
+    "I didn't receive a full year.  I only receive the magazine twice.  It's a good magazine, I just didn't receive it as promised.",
+    "I was hoping for more technical than what was there. it seems to be more like 'look how cool this is' than a technical publication. It's like sport compact car, but for computers.",
+    "I only received one copy of the mag so I couldn't really find out if it was good reading or not",
+    "This magazine is just ok. I ended up subscribing to pc world instead. They are more for the technician and not just the cusumer.",
+    "There articles are alright, but they screw you on the amount you get as i only got 10 of the 12 months subcription. so be carefull unless you are on the auto renew.",
+    "Excellent product! I love reading through the magazine and learning about the cool new products out there and the cool programs!",
+    "I ordered this hoping to learn more about the latest gadgets, and I did learn some things but in over my head over all.  I do not enjoy this reading at all.",
+    "Love the magazine.  The price through Amazon is well worth it for the knowledge recieved and the subscription process is painless",
+    "I bought this subscription for my son. He is presently building a computer. He said it has lots of good and useful information in it.",
+]
+studio = Studio(API_KEY)
+model = studio.get_model(MODEL_ID)
+results = model.predict(TEXT_BATCH)
+print(results)

From 7db17d80b0a7d0c82c7cba62c9a903321552d17f Mon Sep 17 00:00:00 2001
From: taekang1618 <tony@cleanlab.ai>
Date: Mon, 24 Jul 2023 15:09:32 -0700
Subject: [PATCH 11/42] remove test file for local testing

---
 tests/models/test_prediction.py | 31 -------------------------------
 1 file changed, 31 deletions(-)
 delete mode 100644 tests/models/test_prediction.py

diff --git a/tests/models/test_prediction.py b/tests/models/test_prediction.py
deleted file mode 100644
index bd60c50a..00000000
--- a/tests/models/test_prediction.py
+++ /dev/null
@@ -1,31 +0,0 @@
-import os
-
-os.environ["CLEANLAB_API_BASE_URL"] = "https://api.dev-bc26qf4m.cleanlab.ai/api"
-# os.environ["CLEANLAB_API_BASE_URL"] = "http://localhost:8500/api"
-
-from cleanlab_studio import Studio
-import pandas as pd
-
-
-API_KEY = "350b3ee6fbe64d21a6012ea281ce0ca1"
-MODEL_ID = "cea761848e5f449b85e34fe347696b53"
-# API_KEY = "75f2ab8c962c40169917136756c5d937"
-# MODEL_ID = "750dbdfb6549470192573b9646be40e9"
-BATCH = pd.read_csv("/Users/tony/test_files/text_amazon_reviews_test_small.csv")
-TEXT_BATCH = [
-    "This magazine was great for the times but as with all other technology magazines the new stuff isn't as good a lot of advertisments and reviews seem biased.",
-    "We ordered this magazine for our grandson (then 7 going on 30) who was/is deploy into technology. He really enjoyed every issue.",
-    "I didn't receive a full year.  I only receive the magazine twice.  It's a good magazine, I just didn't receive it as promised.",
-    "I was hoping for more technical than what was there. it seems to be more like 'look how cool this is' than a technical publication. It's like sport compact car, but for computers.",
-    "I only received one copy of the mag so I couldn't really find out if it was good reading or not",
-    "This magazine is just ok. I ended up subscribing to pc world instead. They are more for the technician and not just the cusumer.",
-    "There articles are alright, but they screw you on the amount you get as i only got 10 of the 12 months subcription. so be carefull unless you are on the auto renew.",
-    "Excellent product! I love reading through the magazine and learning about the cool new products out there and the cool programs!",
-    "I ordered this hoping to learn more about the latest gadgets, and I did learn some things but in over my head over all.  I do not enjoy this reading at all.",
-    "Love the magazine.  The price through Amazon is well worth it for the knowledge recieved and the subscription process is painless",
-    "I bought this subscription for my son. He is presently building a computer. He said it has lots of good and useful information in it.",
-]
-studio = Studio(API_KEY)
-model = studio.get_model(MODEL_ID)
-results = model.predict(TEXT_BATCH)
-print(results)

From 2d27b3c24c536e09af8defba3c1ad791c2b5d490 Mon Sep 17 00:00:00 2001
From: taekang1618 <tony@cleanlab.ai>
Date: Mon, 24 Jul 2023 15:40:32 -0700
Subject: [PATCH 12/42] change response for upload api and remove logic for
 comparing headers of text input files with actual text data columns used for
 prediction

---
 cleanlab_studio/internal/api/api.py | 12 ++++------
 tests/models/test_prediction.py     | 34 +++++++++++++++++++++++++++++
 2 files changed, 38 insertions(+), 8 deletions(-)
 create mode 100644 tests/models/test_prediction.py

diff --git a/cleanlab_studio/internal/api/api.py b/cleanlab_studio/internal/api/api.py
index c1504516..df1c9ae7 100644
--- a/cleanlab_studio/internal/api/api.py
+++ b/cleanlab_studio/internal/api/api.py
@@ -348,15 +348,11 @@ def upload_predict_batch(api_key: str, model_id: str, batch: io.StringIO) -> str
     handle_api_error(res)
     presigned_url = res.json()["upload_url"]
     query_id = res.json()["query_id"]
+    modality = res.json()["modality"]
     header = res.json()["header"]
-    if header:
-        batch_header = batch.readline()
-        if batch_header == header:
-            input_batch = batch
-        else:
-            header_io = io.StringIO(header)
-            batch_header_io = io.StringIO(batch_header)
-            input_batch = io.StringIO("\n".join(chain(header_io, batch_header_io, batch)))
+    if modality == "text":
+        header_io = io.StringIO(header)
+        input_batch = io.StringIO("\n".join(chain(header_io, batch)))
     else:
         input_batch = batch
 
diff --git a/tests/models/test_prediction.py b/tests/models/test_prediction.py
new file mode 100644
index 00000000..3474b413
--- /dev/null
+++ b/tests/models/test_prediction.py
@@ -0,0 +1,34 @@
+import os
+
+os.environ["CLEANLAB_API_BASE_URL"] = "https://api.dev-bc26qf4m.cleanlab.ai/api"
+# os.environ["CLEANLAB_API_BASE_URL"] = "http://localhost:8500/api"
+
+from cleanlab_studio import Studio
+import pandas as pd
+
+
+API_KEY = "350b3ee6fbe64d21a6012ea281ce0ca1"
+MODEL_ID = "cea761848e5f449b85e34fe347696b53"
+# API_KEY = "75f2ab8c962c40169917136756c5d937"
+# MODEL_ID = "750dbdfb6549470192573b9646be40e9"
+BATCH = pd.read_csv(
+    "/Users/tony/test_files/text_amazon_reviews_test_small.csv", index_col=False, header=0
+).loc[0, :]
+print(BATCH)
+# TEXT_BATCH = pd.Series([
+#     "This magazine was great for the times but as with all other technology magazines the new stuff isn't as good a lot of advertisments and reviews seem biased.",
+#     "We ordered this magazine for our grandson (then 7 going on 30) who was/is deploy into technology. He really enjoyed every issue.",
+#     "I didn't receive a full year.  I only receive the magazine twice.  It's a good magazine, I just didn't receive it as promised.",
+#     "I was hoping for more technical than what was there. it seems to be more like 'look how cool this is' than a technical publication. It's like sport compact car, but for computers.",
+#     "I only received one copy of the mag so I couldn't really find out if it was good reading or not",
+#     "This magazine is just ok. I ended up subscribing to pc world instead. They are more for the technician and not just the cusumer.",
+#     "There articles are alright, but they screw you on the amount you get as i only got 10 of the 12 months subcription. so be carefull unless you are on the auto renew.",
+#     "Excellent product! I love reading through the magazine and learning about the cool new products out there and the cool programs!",
+#     "I ordered this hoping to learn more about the latest gadgets, and I did learn some things but in over my head over all.  I do not enjoy this reading at all.",
+#     "Love the magazine.  The price through Amazon is well worth it for the knowledge recieved and the subscription process is painless",
+#     "I bought this subscription for my son. He is presently building a computer. He said it has lots of good and useful information in it.",
+# ], name="review_text")
+studio = Studio(API_KEY)
+model = studio.get_model(MODEL_ID)
+results = model.predict(BATCH)
+print(results)

From 25d33acbb1910d1ad1fc64f16cb76e2e33e8a970 Mon Sep 17 00:00:00 2001
From: taekang1618 <tony@cleanlab.ai>
Date: Mon, 24 Jul 2023 16:12:29 -0700
Subject: [PATCH 13/42] modify invoke lambda api to send only query_id as param

---
 cleanlab_studio/internal/api/api.py |  4 ++--
 cleanlab_studio/studio/inference.py |  2 +-
 tests/models/test_prediction.py     | 17 +++++++----------
 3 files changed, 10 insertions(+), 13 deletions(-)

diff --git a/cleanlab_studio/internal/api/api.py b/cleanlab_studio/internal/api/api.py
index df1c9ae7..d36eb358 100644
--- a/cleanlab_studio/internal/api/api.py
+++ b/cleanlab_studio/internal/api/api.py
@@ -361,10 +361,10 @@ def upload_predict_batch(api_key: str, model_id: str, batch: io.StringIO) -> str
     return query_id
 
 
-def start_prediction(api_key: str, model_id: str, query_id: str) -> None:
+def start_prediction(api_key: str, query_id: str) -> None:
     """Starts prediction for query."""
     res = requests.post(
-        f"{model_base_url}/{model_id}/predict/{query_id}",
+        f"{model_base_url}/predict/{query_id}",
         headers=_construct_headers(api_key),
     )
 
diff --git a/cleanlab_studio/studio/inference.py b/cleanlab_studio/studio/inference.py
index 6cf1832a..ec6109da 100644
--- a/cleanlab_studio/studio/inference.py
+++ b/cleanlab_studio/studio/inference.py
@@ -46,7 +46,7 @@ def _predict(self, batch: io.StringIO) -> Predictions:
         :return: predictions from batch
         """
         query_id: str = api.upload_predict_batch(self._api_key, self._model_id, batch)
-        api.start_prediction(self._api_key, self._model_id, query_id)
+        api.start_prediction(self._api_key, query_id)
 
         resp = api.get_prediction_status(self._api_key, query_id)
         status: str | None = resp["status"]
diff --git a/tests/models/test_prediction.py b/tests/models/test_prediction.py
index 3474b413..a54d1d77 100644
--- a/tests/models/test_prediction.py
+++ b/tests/models/test_prediction.py
@@ -1,20 +1,17 @@
 import os
 
-os.environ["CLEANLAB_API_BASE_URL"] = "https://api.dev-bc26qf4m.cleanlab.ai/api"
-# os.environ["CLEANLAB_API_BASE_URL"] = "http://localhost:8500/api"
+# os.environ["CLEANLAB_API_BASE_URL"] = "https://api.dev-bc26qf4m.cleanlab.ai/api"
+os.environ["CLEANLAB_API_BASE_URL"] = "http://localhost:8500/api"
 
 from cleanlab_studio import Studio
 import pandas as pd
 
 
-API_KEY = "350b3ee6fbe64d21a6012ea281ce0ca1"
-MODEL_ID = "cea761848e5f449b85e34fe347696b53"
-# API_KEY = "75f2ab8c962c40169917136756c5d937"
-# MODEL_ID = "750dbdfb6549470192573b9646be40e9"
-BATCH = pd.read_csv(
-    "/Users/tony/test_files/text_amazon_reviews_test_small.csv", index_col=False, header=0
-).loc[0, :]
-print(BATCH)
+# API_KEY = "350b3ee6fbe64d21a6012ea281ce0ca1"
+# MODEL_ID = "cea761848e5f449b85e34fe347696b53"
+API_KEY = "75f2ab8c962c40169917136756c5d937"
+MODEL_ID = "750dbdfb6549470192573b9646be40e9"
+BATCH = pd.read_csv("/Users/tony/test_files/tabular_grades_test_small.csv")
 # TEXT_BATCH = pd.Series([
 #     "This magazine was great for the times but as with all other technology magazines the new stuff isn't as good a lot of advertisments and reviews seem biased.",
 #     "We ordered this magazine for our grandson (then 7 going on 30) who was/is deploy into technology. He really enjoyed every issue.",

From ba0a5f92ae7a289a9571c77fc937a9337ef7014d Mon Sep 17 00:00:00 2001
From: taekang1618 <tony@cleanlab.ai>
Date: Tue, 25 Jul 2023 09:49:53 -0700
Subject: [PATCH 14/42] remove test file

---
 cleanlab_studio/internal/api/api.py |  2 --
 tests/models/test_prediction.py     | 31 -----------------------------
 2 files changed, 33 deletions(-)
 delete mode 100644 tests/models/test_prediction.py

diff --git a/cleanlab_studio/internal/api/api.py b/cleanlab_studio/internal/api/api.py
index d36eb358..e945a906 100644
--- a/cleanlab_studio/internal/api/api.py
+++ b/cleanlab_studio/internal/api/api.py
@@ -2,11 +2,9 @@
 import os
 import time
 from itertools import chain
-from shutil import copyfileobj
 from typing import Callable, List, Optional, Tuple, Dict, Union, Any
 from cleanlab_studio.errors import APIError
 
-import aiohttp
 import requests
 from tqdm import tqdm
 import pandas as pd
diff --git a/tests/models/test_prediction.py b/tests/models/test_prediction.py
deleted file mode 100644
index a54d1d77..00000000
--- a/tests/models/test_prediction.py
+++ /dev/null
@@ -1,31 +0,0 @@
-import os
-
-# os.environ["CLEANLAB_API_BASE_URL"] = "https://api.dev-bc26qf4m.cleanlab.ai/api"
-os.environ["CLEANLAB_API_BASE_URL"] = "http://localhost:8500/api"
-
-from cleanlab_studio import Studio
-import pandas as pd
-
-
-# API_KEY = "350b3ee6fbe64d21a6012ea281ce0ca1"
-# MODEL_ID = "cea761848e5f449b85e34fe347696b53"
-API_KEY = "75f2ab8c962c40169917136756c5d937"
-MODEL_ID = "750dbdfb6549470192573b9646be40e9"
-BATCH = pd.read_csv("/Users/tony/test_files/tabular_grades_test_small.csv")
-# TEXT_BATCH = pd.Series([
-#     "This magazine was great for the times but as with all other technology magazines the new stuff isn't as good a lot of advertisments and reviews seem biased.",
-#     "We ordered this magazine for our grandson (then 7 going on 30) who was/is deploy into technology. He really enjoyed every issue.",
-#     "I didn't receive a full year.  I only receive the magazine twice.  It's a good magazine, I just didn't receive it as promised.",
-#     "I was hoping for more technical than what was there. it seems to be more like 'look how cool this is' than a technical publication. It's like sport compact car, but for computers.",
-#     "I only received one copy of the mag so I couldn't really find out if it was good reading or not",
-#     "This magazine is just ok. I ended up subscribing to pc world instead. They are more for the technician and not just the cusumer.",
-#     "There articles are alright, but they screw you on the amount you get as i only got 10 of the 12 months subcription. so be carefull unless you are on the auto renew.",
-#     "Excellent product! I love reading through the magazine and learning about the cool new products out there and the cool programs!",
-#     "I ordered this hoping to learn more about the latest gadgets, and I did learn some things but in over my head over all.  I do not enjoy this reading at all.",
-#     "Love the magazine.  The price through Amazon is well worth it for the knowledge recieved and the subscription process is painless",
-#     "I bought this subscription for my son. He is presently building a computer. He said it has lots of good and useful information in it.",
-# ], name="review_text")
-studio = Studio(API_KEY)
-model = studio.get_model(MODEL_ID)
-results = model.predict(BATCH)
-print(results)

From 9ab9d283357c24007c408e19fa7f77c16fe0dddb Mon Sep 17 00:00:00 2001
From: taekang1618 <tony@cleanlab.ai>
Date: Tue, 25 Jul 2023 10:34:11 -0700
Subject: [PATCH 15/42] fix mypy errors

---
 cleanlab_studio/internal/api/api.py |  3 ++-
 cleanlab_studio/studio/inference.py | 18 +++++++++---------
 2 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/cleanlab_studio/internal/api/api.py b/cleanlab_studio/internal/api/api.py
index e945a906..1e3e2f3c 100644
--- a/cleanlab_studio/internal/api/api.py
+++ b/cleanlab_studio/internal/api/api.py
@@ -345,7 +345,7 @@ def upload_predict_batch(api_key: str, model_id: str, batch: io.StringIO) -> str
 
     handle_api_error(res)
     presigned_url = res.json()["upload_url"]
-    query_id = res.json()["query_id"]
+    query_id: str = res.json()["query_id"]
     modality = res.json()["modality"]
     header = res.json()["header"]
     if modality == "text":
@@ -393,4 +393,5 @@ def get_prediction_status(api_key: str, query_id: str) -> Dict[str, str]:
 def download_prediction_results(result_url: str) -> io.StringIO:
     """Downloads prediction results from presigned URL."""
     res = requests.get(result_url)
+    print(res.text)
     return io.StringIO(res.text)
diff --git a/cleanlab_studio/studio/inference.py b/cleanlab_studio/studio/inference.py
index ec6109da..73fd2ce3 100644
--- a/cleanlab_studio/studio/inference.py
+++ b/cleanlab_studio/studio/inference.py
@@ -2,7 +2,7 @@
 import csv
 import functools
 import io
-from typing import List
+from typing import List, TypeAlias
 
 import numpy as np
 import numpy.typing as npt
@@ -11,9 +11,9 @@
 from cleanlab_studio.internal.api import api
 
 
-TextBatch = List[str] | npt.NDArray[np.str_] | pd.Series
-TabularBatch = pd.DataFrame
-Batch = TextBatch | TabularBatch
+TextBatch: TypeAlias = List[str] | npt.NDArray[np.str_] | pd.Series
+TabularBatch: TypeAlias = pd.DataFrame
+Batch: TypeAlias = TextBatch | TabularBatch
 
 Predictions = npt.NDArray[np.int_] | npt.NDArray[np.str_]
 ClassProbablities = pd.DataFrame
@@ -30,7 +30,7 @@ def __init__(self, api_key: str, model_id: str):
     def predict(
         self,
         batch: Batch,
-    ) -> Predictions:
+    ) -> str | Predictions:
         """Gets predictions for batch of examples.
 
         :param batch: batch of example to predict classes for
@@ -39,7 +39,7 @@ def predict(
         csv_batch = self._convert_batch_to_csv(batch)
         return self._predict(csv_batch)
 
-    def _predict(self, batch: io.StringIO) -> Predictions:
+    def _predict(self, batch: io.StringIO) -> str | Predictions:
         """Gets predictions for batch of examples.
 
         :param batch: batch of example to predict classes for, as in-memory CSV file
@@ -58,9 +58,9 @@ def _predict(self, batch: io.StringIO) -> Predictions:
             return resp["error_msg"]
         else:
             result_url = resp["result_url"]
-            return pd.read_csv(
-                api.download_prediction_results(result_url),
-            ).values
+            results: io.StringIO = api.download_prediction_results(result_url)
+            results_converted: Predictions = pd.read_csv(results).to_numpy()
+            return results_converted
 
     @functools.singledispatchmethod
     def _convert_batch_to_csv(self, batch: Batch) -> io.StringIO:

From 7db2438ee77cd1ebb04410d210c1459c96caac7c Mon Sep 17 00:00:00 2001
From: taekang1618 <tony@cleanlab.ai>
Date: Tue, 25 Jul 2023 10:40:36 -0700
Subject: [PATCH 16/42] remove TypeAlias

---
 cleanlab_studio/studio/inference.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/cleanlab_studio/studio/inference.py b/cleanlab_studio/studio/inference.py
index 73fd2ce3..fd999629 100644
--- a/cleanlab_studio/studio/inference.py
+++ b/cleanlab_studio/studio/inference.py
@@ -2,7 +2,7 @@
 import csv
 import functools
 import io
-from typing import List, TypeAlias
+from typing import List
 
 import numpy as np
 import numpy.typing as npt
@@ -11,9 +11,9 @@
 from cleanlab_studio.internal.api import api
 
 
-TextBatch: TypeAlias = List[str] | npt.NDArray[np.str_] | pd.Series
-TabularBatch: TypeAlias = pd.DataFrame
-Batch: TypeAlias = TextBatch | TabularBatch
+TextBatch = List[str] | npt.NDArray[np.str_] | pd.Series
+TabularBatch = pd.DataFrame
+Batch = TextBatch | TabularBatch
 
 Predictions = npt.NDArray[np.int_] | npt.NDArray[np.str_]
 ClassProbablities = pd.DataFrame

From 7b1a48aa700ee2429db1e6ac58b7b54772f321d1 Mon Sep 17 00:00:00 2001
From: taekang1618 <tony@cleanlab.ai>
Date: Tue, 25 Jul 2023 10:44:16 -0700
Subject: [PATCH 17/42] fix mypy for Batch type

---
 cleanlab_studio/studio/inference.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/cleanlab_studio/studio/inference.py b/cleanlab_studio/studio/inference.py
index fd999629..d492795c 100644
--- a/cleanlab_studio/studio/inference.py
+++ b/cleanlab_studio/studio/inference.py
@@ -2,7 +2,7 @@
 import csv
 import functools
 import io
-from typing import List
+from typing import List, Union
 
 import numpy as np
 import numpy.typing as npt
@@ -11,9 +11,9 @@
 from cleanlab_studio.internal.api import api
 
 
-TextBatch = List[str] | npt.NDArray[np.str_] | pd.Series
-TabularBatch = pd.DataFrame
-Batch = TextBatch | TabularBatch
+TextBatch = Union[List[str], npt.NDArray[np.str_], pd.Series]
+TabularBatch = Union[pd.DataFrame]
+Batch = Union[TextBatch, TabularBatch]
 
 Predictions = npt.NDArray[np.int_] | npt.NDArray[np.str_]
 ClassProbablities = pd.DataFrame

From b0abe970a677e487c7a6824c02eb1ded4858784f Mon Sep 17 00:00:00 2001
From: taekang1618 <tony@cleanlab.ai>
Date: Tue, 25 Jul 2023 10:47:52 -0700
Subject: [PATCH 18/42] User Union instead of | for multi generic typing

---
 cleanlab_studio/studio/inference.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cleanlab_studio/studio/inference.py b/cleanlab_studio/studio/inference.py
index d492795c..dc75dec4 100644
--- a/cleanlab_studio/studio/inference.py
+++ b/cleanlab_studio/studio/inference.py
@@ -15,7 +15,7 @@
 TabularBatch = Union[pd.DataFrame]
 Batch = Union[TextBatch, TabularBatch]
 
-Predictions = npt.NDArray[np.int_] | npt.NDArray[np.str_]
+Predictions = Union[npt.NDArray[np.int_], npt.NDArray[np.str_]]
 ClassProbablities = pd.DataFrame
 
 

From d5fb4d4be948d977f85ddc4c7a3202594e494297 Mon Sep 17 00:00:00 2001
From: taekang1618 <tony@cleanlab.ai>
Date: Tue, 25 Jul 2023 10:55:50 -0700
Subject: [PATCH 19/42] more typing fixes and timeout in prediction

---
 cleanlab_studio/studio/inference.py | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/cleanlab_studio/studio/inference.py b/cleanlab_studio/studio/inference.py
index dc75dec4..864baee3 100644
--- a/cleanlab_studio/studio/inference.py
+++ b/cleanlab_studio/studio/inference.py
@@ -2,7 +2,8 @@
 import csv
 import functools
 import io
-from typing import List, Union
+import time
+from typing import List, Union, Optional
 
 import numpy as np
 import numpy.typing as npt
@@ -30,7 +31,7 @@ def __init__(self, api_key: str, model_id: str):
     def predict(
         self,
         batch: Batch,
-    ) -> str | Predictions:
+    ) -> Union[str, Predictions]:
         """Gets predictions for batch of examples.
 
         :param batch: batch of example to predict classes for
@@ -39,7 +40,7 @@ def predict(
         csv_batch = self._convert_batch_to_csv(batch)
         return self._predict(csv_batch)
 
-    def _predict(self, batch: io.StringIO) -> str | Predictions:
+    def _predict(self, batch: io.StringIO) -> Union[str, Predictions]:
         """Gets predictions for batch of examples.
 
         :param batch: batch of example to predict classes for, as in-memory CSV file
@@ -49,8 +50,11 @@ def _predict(self, batch: io.StringIO) -> str | Predictions:
         api.start_prediction(self._api_key, query_id)
 
         resp = api.get_prediction_status(self._api_key, query_id)
-        status: str | None = resp["status"]
-        while status == "running":
+        status: Optional[str] = resp["status"]
+        # Set timeout to 10 minutes as inference won't take longer than 10 minutes typically and
+        # to prevent users from getting stuck in this loop indefinitely when there is a failure
+        timeout = time.time() + 60 * 10
+        while status == "running" or time.time() < timeout:
             resp = api.get_prediction_status(self._api_key, query_id)
             status = resp["status"]
 

From 0b2a73c81020e3a57a4c75f65506c47292c36f45 Mon Sep 17 00:00:00 2001
From: taekang1618 <tony@cleanlab.ai>
Date: Tue, 25 Jul 2023 11:23:20 -0700
Subject: [PATCH 20/42] change timeout to adn

---
 cleanlab_studio/studio/inference.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cleanlab_studio/studio/inference.py b/cleanlab_studio/studio/inference.py
index 864baee3..a485f895 100644
--- a/cleanlab_studio/studio/inference.py
+++ b/cleanlab_studio/studio/inference.py
@@ -54,7 +54,7 @@ def _predict(self, batch: io.StringIO) -> Union[str, Predictions]:
         # Set timeout to 10 minutes as inference won't take longer than 10 minutes typically and
         # to prevent users from getting stuck in this loop indefinitely when there is a failure
         timeout = time.time() + 60 * 10
-        while status == "running" or time.time() < timeout:
+        while status == "running" and time.time() < timeout:
             resp = api.get_prediction_status(self._api_key, query_id)
             status = resp["status"]
 

From 4846bf3adb407c94ff21c56269c38023ee78f231 Mon Sep 17 00:00:00 2001
From: taekang1618 <tony@cleanlab.ai>
Date: Tue, 25 Jul 2023 11:24:20 -0700
Subject: [PATCH 21/42] remove print statement

---
 cleanlab_studio/internal/api/api.py |  1 -
 tests/models/test_prediction.py     | 31 +++++++++++++++++++++++++++++
 2 files changed, 31 insertions(+), 1 deletion(-)
 create mode 100644 tests/models/test_prediction.py

diff --git a/cleanlab_studio/internal/api/api.py b/cleanlab_studio/internal/api/api.py
index 1e3e2f3c..a3577cfa 100644
--- a/cleanlab_studio/internal/api/api.py
+++ b/cleanlab_studio/internal/api/api.py
@@ -393,5 +393,4 @@ def get_prediction_status(api_key: str, query_id: str) -> Dict[str, str]:
 def download_prediction_results(result_url: str) -> io.StringIO:
     """Downloads prediction results from presigned URL."""
     res = requests.get(result_url)
-    print(res.text)
     return io.StringIO(res.text)
diff --git a/tests/models/test_prediction.py b/tests/models/test_prediction.py
new file mode 100644
index 00000000..0bf74a3d
--- /dev/null
+++ b/tests/models/test_prediction.py
@@ -0,0 +1,31 @@
+import os
+
+# os.environ["CLEANLAB_API_BASE_URL"] = "https://api.dev-bc26qf4m.cleanlab.ai/api"
+os.environ["CLEANLAB_API_BASE_URL"] = "http://localhost:8500/api"
+
+from cleanlab_studio import Studio
+import pandas as pd
+
+
+# API_KEY = "350b3ee6fbe64d21a6012ea281ce0ca1"
+# MODEL_ID = "cea761848e5f449b85e34fe347696b53"
+API_KEY = "75f2ab8c962c40169917136756c5d937"
+MODEL_ID = "750dbdfb6549470192573b9646be40e9"
+BATCH = pd.read_csv("/Users/tony/test_files/tabular_grades_test_small.csv")
+TEXT_BATCH = [
+    "This magazine was great for the times but as with all other technology magazines the new stuff isn't as good a lot of advertisments and reviews seem biased.",
+    "We ordered this magazine for our grandson (then 7 going on 30) who was/is deploy into technology. He really enjoyed every issue.",
+    "I didn't receive a full year.  I only receive the magazine twice.  It's a good magazine, I just didn't receive it as promised.",
+    "I was hoping for more technical than what was there. it seems to be more like 'look how cool this is' than a technical publication. It's like sport compact car, but for computers.",
+    "I only received one copy of the mag so I couldn't really find out if it was good reading or not",
+    "This magazine is just ok. I ended up subscribing to pc world instead. They are more for the technician and not just the cusumer.",
+    "There articles are alright, but they screw you on the amount you get as i only got 10 of the 12 months subcription. so be carefull unless you are on the auto renew.",
+    "Excellent product! I love reading through the magazine and learning about the cool new products out there and the cool programs!",
+    "I ordered this hoping to learn more about the latest gadgets, and I did learn some things but in over my head over all.  I do not enjoy this reading at all.",
+    "Love the magazine.  The price through Amazon is well worth it for the knowledge recieved and the subscription process is painless",
+    "I bought this subscription for my son. He is presently building a computer. He said it has lots of good and useful information in it.",
+]
+studio = Studio(API_KEY)
+model = studio.get_model(MODEL_ID)
+results = model.predict(BATCH)
+print(results)

From af6ce97a9210955f052e0bd9d6bd598a3a21e3fd Mon Sep 17 00:00:00 2001
From: taekang1618 <tony@cleanlab.ai>
Date: Tue, 25 Jul 2023 11:43:08 -0700
Subject: [PATCH 22/42] remove test files again

---
 tests/models/test_prediction.py | 31 -------------------------------
 1 file changed, 31 deletions(-)
 delete mode 100644 tests/models/test_prediction.py

diff --git a/tests/models/test_prediction.py b/tests/models/test_prediction.py
deleted file mode 100644
index 0bf74a3d..00000000
--- a/tests/models/test_prediction.py
+++ /dev/null
@@ -1,31 +0,0 @@
-import os
-
-# os.environ["CLEANLAB_API_BASE_URL"] = "https://api.dev-bc26qf4m.cleanlab.ai/api"
-os.environ["CLEANLAB_API_BASE_URL"] = "http://localhost:8500/api"
-
-from cleanlab_studio import Studio
-import pandas as pd
-
-
-# API_KEY = "350b3ee6fbe64d21a6012ea281ce0ca1"
-# MODEL_ID = "cea761848e5f449b85e34fe347696b53"
-API_KEY = "75f2ab8c962c40169917136756c5d937"
-MODEL_ID = "750dbdfb6549470192573b9646be40e9"
-BATCH = pd.read_csv("/Users/tony/test_files/tabular_grades_test_small.csv")
-TEXT_BATCH = [
-    "This magazine was great for the times but as with all other technology magazines the new stuff isn't as good a lot of advertisments and reviews seem biased.",
-    "We ordered this magazine for our grandson (then 7 going on 30) who was/is deploy into technology. He really enjoyed every issue.",
-    "I didn't receive a full year.  I only receive the magazine twice.  It's a good magazine, I just didn't receive it as promised.",
-    "I was hoping for more technical than what was there. it seems to be more like 'look how cool this is' than a technical publication. It's like sport compact car, but for computers.",
-    "I only received one copy of the mag so I couldn't really find out if it was good reading or not",
-    "This magazine is just ok. I ended up subscribing to pc world instead. They are more for the technician and not just the cusumer.",
-    "There articles are alright, but they screw you on the amount you get as i only got 10 of the 12 months subcription. so be carefull unless you are on the auto renew.",
-    "Excellent product! I love reading through the magazine and learning about the cool new products out there and the cool programs!",
-    "I ordered this hoping to learn more about the latest gadgets, and I did learn some things but in over my head over all.  I do not enjoy this reading at all.",
-    "Love the magazine.  The price through Amazon is well worth it for the knowledge recieved and the subscription process is painless",
-    "I bought this subscription for my son. He is presently building a computer. He said it has lots of good and useful information in it.",
-]
-studio = Studio(API_KEY)
-model = studio.get_model(MODEL_ID)
-results = model.predict(BATCH)
-print(results)

From a2a466c7f8cac6174e4b8af5d5fab6c1310070ab Mon Sep 17 00:00:00 2001
From: taekang1618 <tony@cleanlab.ai>
Date: Tue, 25 Jul 2023 16:08:14 -0700
Subject: [PATCH 23/42] fix code review comments

---
 cleanlab_studio/internal/api/api.py |  2 +-
 cleanlab_studio/studio/inference.py | 22 +++++++++++---------
 tests/models/test_prediction.py     | 31 +++++++++++++++++++++++++++++
 3 files changed, 45 insertions(+), 10 deletions(-)
 create mode 100644 tests/models/test_prediction.py

diff --git a/cleanlab_studio/internal/api/api.py b/cleanlab_studio/internal/api/api.py
index a3577cfa..4d117efa 100644
--- a/cleanlab_studio/internal/api/api.py
+++ b/cleanlab_studio/internal/api/api.py
@@ -393,4 +393,4 @@ def get_prediction_status(api_key: str, query_id: str) -> Dict[str, str]:
 def download_prediction_results(result_url: str) -> io.StringIO:
     """Downloads prediction results from presigned URL."""
     res = requests.get(result_url)
-    return io.StringIO(res.text)
+    return io.StringIO(res.raw)
diff --git a/cleanlab_studio/studio/inference.py b/cleanlab_studio/studio/inference.py
index a485f895..24312171 100644
--- a/cleanlab_studio/studio/inference.py
+++ b/cleanlab_studio/studio/inference.py
@@ -9,6 +9,7 @@
 import numpy.typing as npt
 import pandas as pd
 
+from cleanlab_studio.errors import APIError
 from cleanlab_studio.internal.api import api
 
 
@@ -31,16 +32,18 @@ def __init__(self, api_key: str, model_id: str):
     def predict(
         self,
         batch: Batch,
+        timeout: int = 600,
     ) -> Union[str, Predictions]:
         """Gets predictions for batch of examples.
 
         :param batch: batch of example to predict classes for
+        :param timeout: optional parameter to set timeout for predictions in seconds
         :return: predictions from batch
         """
         csv_batch = self._convert_batch_to_csv(batch)
-        return self._predict(csv_batch)
+        return self._predict(csv_batch, timeout)
 
-    def _predict(self, batch: io.StringIO) -> Union[str, Predictions]:
+    def _predict(self, batch: io.StringIO, timeout: int) -> Union[str, Predictions]:
         """Gets predictions for batch of examples.
 
         :param batch: batch of example to predict classes for, as in-memory CSV file
@@ -51,23 +54,24 @@ def _predict(self, batch: io.StringIO) -> Union[str, Predictions]:
 
         resp = api.get_prediction_status(self._api_key, query_id)
         status: Optional[str] = resp["status"]
-        # Set timeout to 10 minutes as inference won't take longer than 10 minutes typically and
-        # to prevent users from getting stuck in this loop indefinitely when there is a failure
-        timeout = time.time() + 60 * 10
+        # Set timeout to prevent users from getting stuck indefinitely when there is a failure
+        timeout = time.time() + timeout
         while status == "running" and time.time() < timeout:
             resp = api.get_prediction_status(self._api_key, query_id)
             status = resp["status"]
+            # Set time.sleep so that the while loop doesn't flood backend with api calls
+            time.sleep(3)
 
         if status == "error":
-            return resp["error_msg"]
+            raise APIError(resp["error_msg"])
         else:
             result_url = resp["result_url"]
-            results: io.StringIO = api.download_prediction_results(result_url)
+            results = api.download_prediction_results(result_url)
             results_converted: Predictions = pd.read_csv(results).to_numpy()
             return results_converted
 
-    @functools.singledispatchmethod
-    def _convert_batch_to_csv(self, batch: Batch) -> io.StringIO:
+    @staticmethod
+    def _convert_batch_to_csv(batch: Batch) -> io.StringIO:
         """Converts batch object to CSV string IO."""
         sio = io.StringIO()
 
diff --git a/tests/models/test_prediction.py b/tests/models/test_prediction.py
new file mode 100644
index 00000000..0bf74a3d
--- /dev/null
+++ b/tests/models/test_prediction.py
@@ -0,0 +1,31 @@
+import os
+
+# os.environ["CLEANLAB_API_BASE_URL"] = "https://api.dev-bc26qf4m.cleanlab.ai/api"
+os.environ["CLEANLAB_API_BASE_URL"] = "http://localhost:8500/api"
+
+from cleanlab_studio import Studio
+import pandas as pd
+
+
+# API_KEY = "350b3ee6fbe64d21a6012ea281ce0ca1"
+# MODEL_ID = "cea761848e5f449b85e34fe347696b53"
+API_KEY = "75f2ab8c962c40169917136756c5d937"
+MODEL_ID = "750dbdfb6549470192573b9646be40e9"
+BATCH = pd.read_csv("/Users/tony/test_files/tabular_grades_test_small.csv")
+TEXT_BATCH = [
+    "This magazine was great for the times but as with all other technology magazines the new stuff isn't as good a lot of advertisments and reviews seem biased.",
+    "We ordered this magazine for our grandson (then 7 going on 30) who was/is deploy into technology. He really enjoyed every issue.",
+    "I didn't receive a full year.  I only receive the magazine twice.  It's a good magazine, I just didn't receive it as promised.",
+    "I was hoping for more technical than what was there. it seems to be more like 'look how cool this is' than a technical publication. It's like sport compact car, but for computers.",
+    "I only received one copy of the mag so I couldn't really find out if it was good reading or not",
+    "This magazine is just ok. I ended up subscribing to pc world instead. They are more for the technician and not just the cusumer.",
+    "There articles are alright, but they screw you on the amount you get as i only got 10 of the 12 months subcription. so be carefull unless you are on the auto renew.",
+    "Excellent product! I love reading through the magazine and learning about the cool new products out there and the cool programs!",
+    "I ordered this hoping to learn more about the latest gadgets, and I did learn some things but in over my head over all.  I do not enjoy this reading at all.",
+    "Love the magazine.  The price through Amazon is well worth it for the knowledge recieved and the subscription process is painless",
+    "I bought this subscription for my son. He is presently building a computer. He said it has lots of good and useful information in it.",
+]
+studio = Studio(API_KEY)
+model = studio.get_model(MODEL_ID)
+results = model.predict(BATCH)
+print(results)

From a3ce52f061e7137b0a32f9ca1b42ed01e5d05914 Mon Sep 17 00:00:00 2001
From: taekang1618 <tony@cleanlab.ai>
Date: Tue, 25 Jul 2023 16:11:53 -0700
Subject: [PATCH 24/42] remove test files

---
 tests/models/test_prediction.py | 31 -------------------------------
 1 file changed, 31 deletions(-)
 delete mode 100644 tests/models/test_prediction.py

diff --git a/tests/models/test_prediction.py b/tests/models/test_prediction.py
deleted file mode 100644
index 0bf74a3d..00000000
--- a/tests/models/test_prediction.py
+++ /dev/null
@@ -1,31 +0,0 @@
-import os
-
-# os.environ["CLEANLAB_API_BASE_URL"] = "https://api.dev-bc26qf4m.cleanlab.ai/api"
-os.environ["CLEANLAB_API_BASE_URL"] = "http://localhost:8500/api"
-
-from cleanlab_studio import Studio
-import pandas as pd
-
-
-# API_KEY = "350b3ee6fbe64d21a6012ea281ce0ca1"
-# MODEL_ID = "cea761848e5f449b85e34fe347696b53"
-API_KEY = "75f2ab8c962c40169917136756c5d937"
-MODEL_ID = "750dbdfb6549470192573b9646be40e9"
-BATCH = pd.read_csv("/Users/tony/test_files/tabular_grades_test_small.csv")
-TEXT_BATCH = [
-    "This magazine was great for the times but as with all other technology magazines the new stuff isn't as good a lot of advertisments and reviews seem biased.",
-    "We ordered this magazine for our grandson (then 7 going on 30) who was/is deploy into technology. He really enjoyed every issue.",
-    "I didn't receive a full year.  I only receive the magazine twice.  It's a good magazine, I just didn't receive it as promised.",
-    "I was hoping for more technical than what was there. it seems to be more like 'look how cool this is' than a technical publication. It's like sport compact car, but for computers.",
-    "I only received one copy of the mag so I couldn't really find out if it was good reading or not",
-    "This magazine is just ok. I ended up subscribing to pc world instead. They are more for the technician and not just the cusumer.",
-    "There articles are alright, but they screw you on the amount you get as i only got 10 of the 12 months subcription. so be carefull unless you are on the auto renew.",
-    "Excellent product! I love reading through the magazine and learning about the cool new products out there and the cool programs!",
-    "I ordered this hoping to learn more about the latest gadgets, and I did learn some things but in over my head over all.  I do not enjoy this reading at all.",
-    "Love the magazine.  The price through Amazon is well worth it for the knowledge recieved and the subscription process is painless",
-    "I bought this subscription for my son. He is presently building a computer. He said it has lots of good and useful information in it.",
-]
-studio = Studio(API_KEY)
-model = studio.get_model(MODEL_ID)
-results = model.predict(BATCH)
-print(results)

From 9bcb15f25c34460cfe3e89e46236af762eb603e7 Mon Sep 17 00:00:00 2001
From: taekang1618 <tony@cleanlab.ai>
Date: Tue, 25 Jul 2023 16:31:56 -0700
Subject: [PATCH 25/42] for updating pr

---
 cleanlab_studio/studio/inference.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/cleanlab_studio/studio/inference.py b/cleanlab_studio/studio/inference.py
index 24312171..fc20b272 100644
--- a/cleanlab_studio/studio/inference.py
+++ b/cleanlab_studio/studio/inference.py
@@ -56,6 +56,7 @@ def _predict(self, batch: io.StringIO, timeout: int) -> Union[str, Predictions]:
         status: Optional[str] = resp["status"]
         # Set timeout to prevent users from getting stuck indefinitely when there is a failure
         timeout = time.time() + timeout
+
         while status == "running" and time.time() < timeout:
             resp = api.get_prediction_status(self._api_key, query_id)
             status = resp["status"]

From 8de5c4a8c83a7681287c8c816a02fa78a4e2f6bc Mon Sep 17 00:00:00 2001
From: taekang1618 <tony@cleanlab.ai>
Date: Tue, 25 Jul 2023 16:39:23 -0700
Subject: [PATCH 26/42] change timeout to new var name

---
 cleanlab_studio/studio/inference.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/cleanlab_studio/studio/inference.py b/cleanlab_studio/studio/inference.py
index fc20b272..45af2fb3 100644
--- a/cleanlab_studio/studio/inference.py
+++ b/cleanlab_studio/studio/inference.py
@@ -55,9 +55,9 @@ def _predict(self, batch: io.StringIO, timeout: int) -> Union[str, Predictions]:
         resp = api.get_prediction_status(self._api_key, query_id)
         status: Optional[str] = resp["status"]
         # Set timeout to prevent users from getting stuck indefinitely when there is a failure
-        timeout = time.time() + timeout
+        timeout_limit = time.time() + timeout
 
-        while status == "running" and time.time() < timeout:
+        while status == "running" and time.time() < timeout_limit:
             resp = api.get_prediction_status(self._api_key, query_id)
             status = resp["status"]
             # Set time.sleep so that the while loop doesn't flood backend with api calls

From fd758ea718b1529e0e9f8bbfc3b79b81fc51a005 Mon Sep 17 00:00:00 2001
From: taekang1618 <tony@cleanlab.ai>
Date: Wed, 26 Jul 2023 12:33:15 -0700
Subject: [PATCH 27/42] remove header replace logic

---
 cleanlab_studio/internal/api/api.py | 9 +--------
 cleanlab_studio/studio/inference.py | 3 +++
 2 files changed, 4 insertions(+), 8 deletions(-)

diff --git a/cleanlab_studio/internal/api/api.py b/cleanlab_studio/internal/api/api.py
index 4d117efa..68506c28 100644
--- a/cleanlab_studio/internal/api/api.py
+++ b/cleanlab_studio/internal/api/api.py
@@ -346,15 +346,8 @@ def upload_predict_batch(api_key: str, model_id: str, batch: io.StringIO) -> str
     handle_api_error(res)
     presigned_url = res.json()["upload_url"]
     query_id: str = res.json()["query_id"]
-    modality = res.json()["modality"]
-    header = res.json()["header"]
-    if modality == "text":
-        header_io = io.StringIO(header)
-        input_batch = io.StringIO("\n".join(chain(header_io, batch)))
-    else:
-        input_batch = batch
 
-    requests.post(presigned_url["url"], data=presigned_url["fields"], files={"file": input_batch})
+    requests.post(presigned_url["url"], data=presigned_url["fields"], files={"file": batch})
 
     return query_id
 
diff --git a/cleanlab_studio/studio/inference.py b/cleanlab_studio/studio/inference.py
index 45af2fb3..39a2aedb 100644
--- a/cleanlab_studio/studio/inference.py
+++ b/cleanlab_studio/studio/inference.py
@@ -80,6 +80,9 @@ def _convert_batch_to_csv(batch: Batch) -> io.StringIO:
         if isinstance(batch, (list, np.ndarray, pd.Series)):
             writer = csv.writer(sio)
 
+            # write header
+            writer.writerow(["text"])
+
             # write labels to CSV
             for input_data in batch:
                 writer.writerow([input_data])

From 658b5faa0da055cac47cb53cec8dcb1d88fc34a7 Mon Sep 17 00:00:00 2001
From: taekang1618 <tony@cleanlab.ai>
Date: Wed, 26 Jul 2023 13:59:47 -0700
Subject: [PATCH 28/42] modify predict function to take care of text inputs

---
 cleanlab_studio/internal/api/api.py | 6 +++---
 cleanlab_studio/studio/inference.py | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/cleanlab_studio/internal/api/api.py b/cleanlab_studio/internal/api/api.py
index 68506c28..387b26e4 100644
--- a/cleanlab_studio/internal/api/api.py
+++ b/cleanlab_studio/internal/api/api.py
@@ -352,10 +352,10 @@ def upload_predict_batch(api_key: str, model_id: str, batch: io.StringIO) -> str
     return query_id
 
 
-def start_prediction(api_key: str, query_id: str) -> None:
+def start_prediction(api_key: str, model_id: str, query_id: str) -> None:
     """Starts prediction for query."""
     res = requests.post(
-        f"{model_base_url}/predict/{query_id}",
+        f"{model_base_url}/{model_id}/predict/{query_id}",
         headers=_construct_headers(api_key),
     )
 
@@ -386,4 +386,4 @@ def get_prediction_status(api_key: str, query_id: str) -> Dict[str, str]:
 def download_prediction_results(result_url: str) -> io.StringIO:
     """Downloads prediction results from presigned URL."""
     res = requests.get(result_url)
-    return io.StringIO(res.raw)
+    return io.StringIO(res.text)
diff --git a/cleanlab_studio/studio/inference.py b/cleanlab_studio/studio/inference.py
index 39a2aedb..1e8f8d91 100644
--- a/cleanlab_studio/studio/inference.py
+++ b/cleanlab_studio/studio/inference.py
@@ -50,7 +50,7 @@ def _predict(self, batch: io.StringIO, timeout: int) -> Union[str, Predictions]:
         :return: predictions from batch
         """
         query_id: str = api.upload_predict_batch(self._api_key, self._model_id, batch)
-        api.start_prediction(self._api_key, query_id)
+        api.start_prediction(self._api_key, self._model_id, query_id)
 
         resp = api.get_prediction_status(self._api_key, query_id)
         status: Optional[str] = resp["status"]

From 612a522b716c4efd352451aca174922e1bf300ba Mon Sep 17 00:00:00 2001
From: taekang1618 <tony@cleanlab.ai>
Date: Thu, 27 Jul 2023 10:59:59 -0700
Subject: [PATCH 29/42] remove download api endpoint and supply url directly to
 pandas

---
 cleanlab_studio/internal/api/api.py | 6 ------
 cleanlab_studio/studio/inference.py | 3 +--
 2 files changed, 1 insertion(+), 8 deletions(-)

diff --git a/cleanlab_studio/internal/api/api.py b/cleanlab_studio/internal/api/api.py
index 387b26e4..c488c288 100644
--- a/cleanlab_studio/internal/api/api.py
+++ b/cleanlab_studio/internal/api/api.py
@@ -381,9 +381,3 @@ def get_prediction_status(api_key: str, query_id: str) -> Dict[str, str]:
         return {"status": "error", "error_msg": error_msg}
     else:
         return {"status": "running"}
-
-
-def download_prediction_results(result_url: str) -> io.StringIO:
-    """Downloads prediction results from presigned URL."""
-    res = requests.get(result_url)
-    return io.StringIO(res.text)
diff --git a/cleanlab_studio/studio/inference.py b/cleanlab_studio/studio/inference.py
index 1e8f8d91..04a10a22 100644
--- a/cleanlab_studio/studio/inference.py
+++ b/cleanlab_studio/studio/inference.py
@@ -67,8 +67,7 @@ def _predict(self, batch: io.StringIO, timeout: int) -> Union[str, Predictions]:
             raise APIError(resp["error_msg"])
         else:
             result_url = resp["result_url"]
-            results = api.download_prediction_results(result_url)
-            results_converted: Predictions = pd.read_csv(results).to_numpy()
+            results_converted: Predictions = pd.read_csv(result_url).to_numpy()
             return results_converted
 
     @staticmethod

From 40bffbf360e77da840f87eb4b2d3eef3f8b76b05 Mon Sep 17 00:00:00 2001
From: taekang1618 <tony@cleanlab.ai>
Date: Fri, 28 Jul 2023 12:41:24 -0700
Subject: [PATCH 30/42] update doctring to match documentation format

---
 cleanlab_studio/studio/inference.py | 12 ++++++++----
 cleanlab_studio/studio/studio.py    | 10 +++++++++-
 2 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/cleanlab_studio/studio/inference.py b/cleanlab_studio/studio/inference.py
index 04a10a22..cc039407 100644
--- a/cleanlab_studio/studio/inference.py
+++ b/cleanlab_studio/studio/inference.py
@@ -34,11 +34,15 @@ def predict(
         batch: Batch,
         timeout: int = 600,
     ) -> Union[str, Predictions]:
-        """Gets predictions for batch of examples.
+        """
+        Gets predictions for batch of examples.
 
-        :param batch: batch of example to predict classes for
-        :param timeout: optional parameter to set timeout for predictions in seconds
-        :return: predictions from batch
+        Args:
+            batch: batch of example to predict classes for
+            timeout: optional parameter to set timeout for predictions in seconds
+
+        Returns:
+            predictions from batch as a numpy array or an error message if predictions fail
         """
         csv_batch = self._convert_batch_to_csv(batch)
         return self._predict(csv_batch, timeout)
diff --git a/cleanlab_studio/studio/studio.py b/cleanlab_studio/studio/studio.py
index 7c0e9cb3..3485b889 100644
--- a/cleanlab_studio/studio/studio.py
+++ b/cleanlab_studio/studio/studio.py
@@ -291,7 +291,15 @@ def delete_project(self, project_id: str) -> None:
         print(f"Successfully deleted project: {project_id}")
 
     def get_model(self, model_id: str) -> inference.Model:
-        """Creates model object from model ID, to use for inference."""
+        """
+        Gets a model deployed by Cleanlab Studio.
+
+        Args:
+            model_id: ID of model to get. This ID should be fetched in the deployments page of the app UI.
+
+        Returns:
+            Model object with methods run predictions on new input data
+        """
         return inference.Model(self._api_key, model_id)
 
     class Experimental:

From d83e5e1d5b360c5f655aafe6c667b844243edcb3 Mon Sep 17 00:00:00 2001
From: ryansingman <ryanjs@vt.edu>
Date: Mon, 31 Jul 2023 14:23:28 -0600
Subject: [PATCH 31/42] fix predict timeout

---
 cleanlab_studio/studio/inference.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/cleanlab_studio/studio/inference.py b/cleanlab_studio/studio/inference.py
index cc039407..bffd5988 100644
--- a/cleanlab_studio/studio/inference.py
+++ b/cleanlab_studio/studio/inference.py
@@ -14,7 +14,7 @@
 
 
 TextBatch = Union[List[str], npt.NDArray[np.str_], pd.Series]
-TabularBatch = Union[pd.DataFrame]
+TabularBatch = pd.DataFrame
 Batch = Union[TextBatch, TabularBatch]
 
 Predictions = Union[npt.NDArray[np.int_], npt.NDArray[np.str_]]
@@ -62,13 +62,15 @@ def _predict(self, batch: io.StringIO, timeout: int) -> Union[str, Predictions]:
         timeout_limit = time.time() + timeout
 
         while status == "running" and time.time() < timeout_limit:
+            time.sleep(1)
+
             resp = api.get_prediction_status(self._api_key, query_id)
             status = resp["status"]
-            # Set time.sleep so that the while loop doesn't flood backend with api calls
-            time.sleep(3)
 
         if status == "error":
             raise APIError(resp["error_msg"])
+        elif status == "running":
+            raise TimeoutError("Timeout of {timeout}s expired while waiting for prediction")
         else:
             result_url = resp["result_url"]
             results_converted: Predictions = pd.read_csv(result_url).to_numpy()

From e2665188217c321203f2c73a15083314b1622be6 Mon Sep 17 00:00:00 2001
From: ryansingman <ryanjs@vt.edu>
Date: Mon, 31 Jul 2023 14:28:27 -0600
Subject: [PATCH 32/42] mypy fix

---
 cleanlab_studio/studio/inference.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/cleanlab_studio/studio/inference.py b/cleanlab_studio/studio/inference.py
index bffd5988..f6e51fbd 100644
--- a/cleanlab_studio/studio/inference.py
+++ b/cleanlab_studio/studio/inference.py
@@ -1,9 +1,8 @@
 import abc
 import csv
-import functools
 import io
 import time
-from typing import List, Union, Optional
+from typing import List, TypeAlias, Union, Optional
 
 import numpy as np
 import numpy.typing as npt
@@ -14,7 +13,7 @@
 
 
 TextBatch = Union[List[str], npt.NDArray[np.str_], pd.Series]
-TabularBatch = pd.DataFrame
+TabularBatch: TypeAlias = pd.DataFrame
 Batch = Union[TextBatch, TabularBatch]
 
 Predictions = Union[npt.NDArray[np.int_], npt.NDArray[np.str_]]

From 8679aade3678603aa2382fbaac2d1e33eb1385e2 Mon Sep 17 00:00:00 2001
From: ryansingman <ryanjs@vt.edu>
Date: Mon, 31 Jul 2023 14:30:08 -0600
Subject: [PATCH 33/42] mypy fix

---
 cleanlab_studio/studio/inference.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/cleanlab_studio/studio/inference.py b/cleanlab_studio/studio/inference.py
index f6e51fbd..df7ebe62 100644
--- a/cleanlab_studio/studio/inference.py
+++ b/cleanlab_studio/studio/inference.py
@@ -2,7 +2,8 @@
 import csv
 import io
 import time
-from typing import List, TypeAlias, Union, Optional
+from typing import List, Union, Optional
+from typing_extensions import TypeAlias
 
 import numpy as np
 import numpy.typing as npt

From c871c8c2eba7565e924e689f983c099be098434d Mon Sep 17 00:00:00 2001
From: ryansingman <ryanjs@vt.edu>
Date: Mon, 31 Jul 2023 14:34:55 -0600
Subject: [PATCH 34/42] add typing extensions req

---
 setup.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/setup.py b/setup.py
index 3dadb41e..873e81b4 100644
--- a/setup.py
+++ b/setup.py
@@ -58,6 +58,7 @@
         "jsonstreams>=0.6.0",
         "semver>=2.13.0,<3.0.0",
         "Pillow>=9.2.0",
+        "typing_extensions==4.2.0",
         "openpyxl==3.0.10",
         "validators>=0.20.0",
     ],

From 13b4c8caf938037d591ac15be86157f19eb3f00b Mon Sep 17 00:00:00 2001
From: ryansingman <ryanjs@vt.edu>
Date: Mon, 31 Jul 2023 14:53:36 -0600
Subject: [PATCH 35/42] fix incorrect return types, return predictions separate
 from class probs

---
 cleanlab_studio/studio/inference.py | 26 +++++++++++++++++---------
 1 file changed, 17 insertions(+), 9 deletions(-)

diff --git a/cleanlab_studio/studio/inference.py b/cleanlab_studio/studio/inference.py
index df7ebe62..99e780de 100644
--- a/cleanlab_studio/studio/inference.py
+++ b/cleanlab_studio/studio/inference.py
@@ -2,7 +2,7 @@
 import csv
 import io
 import time
-from typing import List, Union, Optional
+from typing import List, Optional, Tuple, Union
 from typing_extensions import TypeAlias
 
 import numpy as np
@@ -32,26 +32,33 @@ def __init__(self, api_key: str, model_id: str):
     def predict(
         self,
         batch: Batch,
+        return_pred_proba: bool = False,
         timeout: int = 600,
-    ) -> Union[str, Predictions]:
+    ) -> Union[Predictions, Tuple[Predictions, ClassProbablities]]:
         """
         Gets predictions for batch of examples.
 
         Args:
             batch: batch of example to predict classes for
+            return_pred_proba: if should return class probabilities for each example
             timeout: optional parameter to set timeout for predictions in seconds
 
         Returns:
-            predictions from batch as a numpy array or an error message if predictions fail
+            predictions from batch as a numpy array
         """
         csv_batch = self._convert_batch_to_csv(batch)
-        return self._predict(csv_batch, timeout)
+        predictions, class_probabilities = self._predict(csv_batch, timeout)
 
-    def _predict(self, batch: io.StringIO, timeout: int) -> Union[str, Predictions]:
+        if return_pred_proba:
+            return predictions, class_probabilities
+
+        return predictions
+
+    def _predict(self, batch: io.StringIO, timeout: int) -> Tuple[Predictions, ClassProbablities]:
         """Gets predictions for batch of examples.
 
         :param batch: batch of example to predict classes for, as in-memory CSV file
-        :return: predictions from batch
+        :return: predictions from batch, class probabilities
         """
         query_id: str = api.upload_predict_batch(self._api_key, self._model_id, batch)
         api.start_prediction(self._api_key, self._model_id, query_id)
@@ -70,11 +77,12 @@ def _predict(self, batch: io.StringIO, timeout: int) -> Union[str, Predictions]:
         if status == "error":
             raise APIError(resp["error_msg"])
         elif status == "running":
-            raise TimeoutError("Timeout of {timeout}s expired while waiting for prediction")
+            raise TimeoutError(f"Timeout of {timeout}s expired while waiting for prediction")
         else:
             result_url = resp["result_url"]
-            results_converted: Predictions = pd.read_csv(result_url).to_numpy()
-            return results_converted
+            results: pd.DataFrame = pd.read_csv(result_url)
+
+            return results.pop("Suggested Label").to_numpy(), results
 
     @staticmethod
     def _convert_batch_to_csv(batch: Batch) -> io.StringIO:

From 8b7b8deb0dee4bdecd1df8d19e918afbb402f3c4 Mon Sep 17 00:00:00 2001
From: ryansingman <ryanjs@vt.edu>
Date: Mon, 31 Jul 2023 14:57:19 -0600
Subject: [PATCH 36/42] mypy fix

---
 cleanlab_studio/studio/inference.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cleanlab_studio/studio/inference.py b/cleanlab_studio/studio/inference.py
index 99e780de..1660c3b5 100644
--- a/cleanlab_studio/studio/inference.py
+++ b/cleanlab_studio/studio/inference.py
@@ -18,7 +18,7 @@
 Batch = Union[TextBatch, TabularBatch]
 
 Predictions = Union[npt.NDArray[np.int_], npt.NDArray[np.str_]]
-ClassProbablities = pd.DataFrame
+ClassProbablities: TypeAlias = pd.DataFrame
 
 
 class Model(abc.ABC):

From 4261e5a2880004d57dcb93bf4bb9035347dc7bd0 Mon Sep 17 00:00:00 2001
From: ryansingman <ryanjs@vt.edu>
Date: Tue, 1 Aug 2023 11:51:18 -0600
Subject: [PATCH 37/42] clean up polling interface, angelas comments

---
 cleanlab_studio/internal/api/api.py | 14 ++------------
 cleanlab_studio/studio/inference.py | 30 ++++++++++++-----------------
 cleanlab_studio/studio/studio.py    |  2 +-
 3 files changed, 15 insertions(+), 31 deletions(-)

diff --git a/cleanlab_studio/internal/api/api.py b/cleanlab_studio/internal/api/api.py
index b4329997..064a9927 100644
--- a/cleanlab_studio/internal/api/api.py
+++ b/cleanlab_studio/internal/api/api.py
@@ -364,21 +364,11 @@ def start_prediction(api_key: str, model_id: str, query_id: str) -> None:
 
 
 def get_prediction_status(api_key: str, query_id: str) -> Dict[str, str]:
-    """Gets status of model prediction query."""
+    """Gets status of model prediction query. Returns status, and optionally the result_url or error message."""
     res = requests.get(
         f"{model_base_url}/predict/{query_id}",
         headers=_construct_headers(api_key),
     )
     handle_api_error(res)
 
-    prediction_results = res.json()
-    status = prediction_results["status"]
-    result_url = prediction_results["results"]
-    error_msg = prediction_results["error_msg"]
-
-    if status == "COMPLETE":
-        return {"status": "done", "result_url": result_url}
-    elif status == "FAILED":
-        return {"status": "error", "error_msg": error_msg}
-    else:
-        return {"status": "running"}
+    return res.json()
diff --git a/cleanlab_studio/studio/inference.py b/cleanlab_studio/studio/inference.py
index 1660c3b5..d895b967 100644
--- a/cleanlab_studio/studio/inference.py
+++ b/cleanlab_studio/studio/inference.py
@@ -39,22 +39,24 @@ def predict(
         Gets predictions for batch of examples.
 
         Args:
-            batch: batch of example to predict classes for
+            batch: batch of examples to predict classes for
             return_pred_proba: if should return class probabilities for each example
             timeout: optional parameter to set timeout for predictions in seconds
 
         Returns:
-            predictions from batch as a numpy array
+            predictions from batch as a numpy array, optionally also pandas dataframe of class probabilties
         """
         csv_batch = self._convert_batch_to_csv(batch)
-        predictions, class_probabilities = self._predict(csv_batch, timeout)
+        predictions, class_probabilities = self._predict_from_csv(csv_batch, timeout)
 
         if return_pred_proba:
             return predictions, class_probabilities
 
         return predictions
 
-    def _predict(self, batch: io.StringIO, timeout: int) -> Tuple[Predictions, ClassProbablities]:
+    def _predict_from_csv(
+        self, batch: io.StringIO, timeout: int
+    ) -> Tuple[Predictions, ClassProbablities]:
         """Gets predictions for batch of examples.
 
         :param batch: batch of example to predict classes for, as in-memory CSV file
@@ -63,26 +65,18 @@ def _predict(self, batch: io.StringIO, timeout: int) -> Tuple[Predictions, Class
         query_id: str = api.upload_predict_batch(self._api_key, self._model_id, batch)
         api.start_prediction(self._api_key, self._model_id, query_id)
 
-        resp = api.get_prediction_status(self._api_key, query_id)
-        status: Optional[str] = resp["status"]
         # Set timeout to prevent users from getting stuck indefinitely when there is a failure
         timeout_limit = time.time() + timeout
-
-        while status == "running" and time.time() < timeout_limit:
+        while time.time() < timeout_limit:
+            resp = api.get_prediction_status(self._api_key, query_id)
             time.sleep(1)
 
-            resp = api.get_prediction_status(self._api_key, query_id)
-            status = resp["status"]
+            if result_url := resp.get("result_url"):
+                results: pd.DataFrame = pd.read_csv(result_url)
+                return results.pop("Suggested Label").to_numpy(), results
 
-        if status == "error":
-            raise APIError(resp["error_msg"])
-        elif status == "running":
-            raise TimeoutError(f"Timeout of {timeout}s expired while waiting for prediction")
         else:
-            result_url = resp["result_url"]
-            results: pd.DataFrame = pd.read_csv(result_url)
-
-            return results.pop("Suggested Label").to_numpy(), results
+            raise TimeoutError(f"Timeout of {timeout}s expired while waiting for prediction")
 
     @staticmethod
     def _convert_batch_to_csv(batch: Batch) -> io.StringIO:
diff --git a/cleanlab_studio/studio/studio.py b/cleanlab_studio/studio/studio.py
index 3485b889..a506ed82 100644
--- a/cleanlab_studio/studio/studio.py
+++ b/cleanlab_studio/studio/studio.py
@@ -298,7 +298,7 @@ def get_model(self, model_id: str) -> inference.Model:
             model_id: ID of model to get. This ID should be fetched in the deployments page of the app UI.
 
         Returns:
-            Model object with methods run predictions on new input data
+            Model object with methods to run predictions on new input data
         """
         return inference.Model(self._api_key, model_id)
 

From 24b47339672c16d83f47fe60fdc8682616b202b8 Mon Sep 17 00:00:00 2001
From: ryansingman <ryanjs@vt.edu>
Date: Tue, 1 Aug 2023 11:52:53 -0600
Subject: [PATCH 38/42] fix sleep placement in poll loop

---
 cleanlab_studio/studio/inference.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/cleanlab_studio/studio/inference.py b/cleanlab_studio/studio/inference.py
index d895b967..8114d0b2 100644
--- a/cleanlab_studio/studio/inference.py
+++ b/cleanlab_studio/studio/inference.py
@@ -69,12 +69,13 @@ def _predict_from_csv(
         timeout_limit = time.time() + timeout
         while time.time() < timeout_limit:
             resp = api.get_prediction_status(self._api_key, query_id)
-            time.sleep(1)
 
             if result_url := resp.get("result_url"):
                 results: pd.DataFrame = pd.read_csv(result_url)
                 return results.pop("Suggested Label").to_numpy(), results
 
+            time.sleep(1)
+
         else:
             raise TimeoutError(f"Timeout of {timeout}s expired while waiting for prediction")
 

From 260386827101eb2b69cc95facfd55ece9aa04996 Mon Sep 17 00:00:00 2001
From: ryansingman <ryanjs@vt.edu>
Date: Tue, 1 Aug 2023 11:56:26 -0600
Subject: [PATCH 39/42] mypy fix

---
 cleanlab_studio/internal/api/api.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/cleanlab_studio/internal/api/api.py b/cleanlab_studio/internal/api/api.py
index 064a9927..72699eec 100644
--- a/cleanlab_studio/internal/api/api.py
+++ b/cleanlab_studio/internal/api/api.py
@@ -1,8 +1,7 @@
 import io
 import os
 import time
-from itertools import chain
-from typing import Callable, List, Optional, Tuple, Dict, Union, Any
+from typing import Callable, cast, List, Optional, Tuple, Dict, Union, Any
 from cleanlab_studio.errors import APIError
 
 import requests
@@ -371,4 +370,4 @@ def get_prediction_status(api_key: str, query_id: str) -> Dict[str, str]:
     )
     handle_api_error(res)
 
-    return res.json()
+    return cast(dict, res.json())

From f7cdefab0431d487e6344299db694535e54457a3 Mon Sep 17 00:00:00 2001
From: ryansingman <ryanjs@vt.edu>
Date: Tue, 1 Aug 2023 11:58:05 -0600
Subject: [PATCH 40/42] mypy fix

---
 cleanlab_studio/internal/api/api.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cleanlab_studio/internal/api/api.py b/cleanlab_studio/internal/api/api.py
index 72699eec..aa8177c9 100644
--- a/cleanlab_studio/internal/api/api.py
+++ b/cleanlab_studio/internal/api/api.py
@@ -370,4 +370,4 @@ def get_prediction_status(api_key: str, query_id: str) -> Dict[str, str]:
     )
     handle_api_error(res)
 
-    return cast(dict, res.json())
+    return cast(Dict[str, str], res.json())

From f1d5102e497eebeef39029663f2c78b5b84ac1b6 Mon Sep 17 00:00:00 2001
From: ryansingman <ryanjs@vt.edu>
Date: Tue, 1 Aug 2023 21:11:40 -0600
Subject: [PATCH 41/42] fix results name

---
 cleanlab_studio/studio/inference.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cleanlab_studio/studio/inference.py b/cleanlab_studio/studio/inference.py
index 8114d0b2..c12495ec 100644
--- a/cleanlab_studio/studio/inference.py
+++ b/cleanlab_studio/studio/inference.py
@@ -70,7 +70,7 @@ def _predict_from_csv(
         while time.time() < timeout_limit:
             resp = api.get_prediction_status(self._api_key, query_id)
 
-            if result_url := resp.get("result_url"):
+            if result_url := resp.get("results"):
                 results: pd.DataFrame = pd.read_csv(result_url)
                 return results.pop("Suggested Label").to_numpy(), results
 

From af4d02fda5d2f0ecf002983981d9d516f35a4147 Mon Sep 17 00:00:00 2001
From: ryansingman <ryanjs@vt.edu>
Date: Wed, 2 Aug 2023 11:43:34 -0600
Subject: [PATCH 42/42] fix nits

---
 cleanlab_studio/internal/api/api.py | 2 +-
 cleanlab_studio/studio/inference.py | 3 +--
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/cleanlab_studio/internal/api/api.py b/cleanlab_studio/internal/api/api.py
index aa8177c9..918c0257 100644
--- a/cleanlab_studio/internal/api/api.py
+++ b/cleanlab_studio/internal/api/api.py
@@ -363,7 +363,7 @@ def start_prediction(api_key: str, model_id: str, query_id: str) -> None:
 
 
 def get_prediction_status(api_key: str, query_id: str) -> Dict[str, str]:
-    """Gets status of model prediction query. Returns status, and optionally the result_url or error message."""
+    """Gets status of model prediction query. Returns status, and optionally the result url or error message."""
     res = requests.get(
         f"{model_base_url}/predict/{query_id}",
         headers=_construct_headers(api_key),
diff --git a/cleanlab_studio/studio/inference.py b/cleanlab_studio/studio/inference.py
index c12495ec..7f84fc0c 100644
--- a/cleanlab_studio/studio/inference.py
+++ b/cleanlab_studio/studio/inference.py
@@ -2,14 +2,13 @@
 import csv
 import io
 import time
-from typing import List, Optional, Tuple, Union
+from typing import List, Tuple, Union
 from typing_extensions import TypeAlias
 
 import numpy as np
 import numpy.typing as npt
 import pandas as pd
 
-from cleanlab_studio.errors import APIError
 from cleanlab_studio.internal.api import api