From c842e8312570e04aefcbb886e7159d225f5b8d4e Mon Sep 17 00:00:00 2001
From: Ilya Kreymer <ikreymer@gmail.com>
Date: Sat, 11 Nov 2023 12:23:23 -0800
Subject: [PATCH] add types, fix tests to check all successful states

---
 backend/btrixcloud/operator.py                | 40 ++++++++-----------
 backend/requirements.txt                      |  3 ++
 backend/test/test_crawlconfigs.py             |  4 +-
 backend/test/test_webhooks.py                 |  4 +-
 .../test_workflow_auto_add_to_collection.py   |  4 +-
 5 files changed, 26 insertions(+), 29 deletions(-)

diff --git a/backend/btrixcloud/operator.py b/backend/btrixcloud/operator.py
index 49b8331766..af85f9458a 100644
--- a/backend/btrixcloud/operator.py
+++ b/backend/btrixcloud/operator.py
@@ -649,9 +649,9 @@ async def set_state(self, state, status, crawl_id, allowed_from, **kwargs):
         from starting to running:
          - starting -> running
 
-        from running to complete or partial_complete:
+        from running to complete or complete[:stopReason]:
+         - running -> complete[:stopReason]
          - running -> complete
-         - running -> partial_complete
 
         from starting or running to waiting for capacity (pods pending) and back:
          - starting -> waiting_capacity
@@ -659,8 +659,8 @@ async def set_state(self, state, status, crawl_id, allowed_from, **kwargs):
          - waiting_capacity -> running
 
         from any state to canceled or failed:
-         - not complete or partial_complete -> canceled
-         - not complete or partial_complete -> failed
+         - not complete[:stopReason] -> canceled
+         - not complete[:stopReason] -> failed
         """
         if not allowed_from or status.state in allowed_from:
             res = await self.crawl_ops.update_crawl_state_if_allowed(
@@ -1280,15 +1280,14 @@ async def add_file_to_crawl(self, cc_data, crawl, redis):
 
         return True
 
-    async def is_crawl_stopping(self, crawl: CrawlSpec, status: CrawlStatus) -> None:
+    async def is_crawl_stopping(
+        self, crawl: CrawlSpec, status: CrawlStatus
+    ) -> Optional[str]:
         """check if crawl is stopping and set reason"""
-
         # if user requested stop, then enter stopping phase
         if crawl.stopping:
             print("Graceful Stop: User requested stop")
-            status.stopping = True
-            status.stopReason = "user-stop"
-            return
+            return "user-stop"
 
         # check timeout if timeout time exceeds elapsed time
         if crawl.timeout:
@@ -1300,28 +1299,21 @@ async def is_crawl_stopping(self, crawl: CrawlSpec, status: CrawlStatus) -> None
                 print(
                     f"Graceful Stop: Crawl running time exceeded {crawl.timeout} second timeout"
                 )
-                status.stopping = True
-                status.stopReason = "time-limit"
-                return
+                return "time-limit"
 
         # crawl size limit
         if crawl.max_crawl_size and status.size > crawl.max_crawl_size:
             print(f"Graceful Stop: Maximum crawl size {crawl.max_crawl_size} hit")
-            status.stopping = True
-            status.stopReason = "size-limit"
-            return
+            return "size-limit"
 
         # check exec time quotas and stop if reached limit
         if await self.org_ops.exec_mins_quota_reached(crawl.oid):
-            status.stopping = True
-            status.stopReason = "exec-time-quota"
-            return
+            return "exec-time-quota"
 
         if self.max_pages_per_crawl and status.pagesFound >= self.max_pages_per_crawl:
-            # will stop on its own
-            status.stopping = False
-            status.stopReason = "page-limit"
-            return
+            return "page-limit"
+
+        return None
 
     async def get_redis_crawl_stats(self, redis: Redis, crawl_id: str):
         """get page stats"""
@@ -1368,7 +1360,9 @@ async def update_crawl_state(
                 pod_info = status.podStatus[key]
                 pod_info.used.storage = value
 
-        await self.is_crawl_stopping(crawl, status)
+        if not status.stopReason:
+            status.stopReason = await self.is_crawl_stopping(crawl, status)
+            status.stopping = status.stopReason is not None
 
         # mark crawl as stopping
         if status.stopping:
diff --git a/backend/requirements.txt b/backend/requirements.txt
index 365e03d44c..d16d5f9d33 100644
--- a/backend/requirements.txt
+++ b/backend/requirements.txt
@@ -24,3 +24,6 @@ backoff>=2.2.1
 python-slugify>=8.0.1
 mypy_boto3_s3
 types_aiobotocore_s3
+types-redis
+types-python-slugify
+types-pyYAML
diff --git a/backend/test/test_crawlconfigs.py b/backend/test/test_crawlconfigs.py
index a50ab55c84..a2e4b42319 100644
--- a/backend/test/test_crawlconfigs.py
+++ b/backend/test/test_crawlconfigs.py
@@ -2,7 +2,7 @@
 
 import requests
 
-from .conftest import API_PREFIX
+from .conftest import API_PREFIX, SUCCESSFUL_STATES
 
 
 cid = None
@@ -361,7 +361,7 @@ def test_incremental_workflow_total_size_and_last_crawl_stats(
             headers=crawler_auth_headers,
         )
         data = r.json()
-        if data["state"] == "complete":
+        if data["state"] in SUCCESSFUL_STATES:
             break
         time.sleep(5)
 
diff --git a/backend/test/test_webhooks.py b/backend/test/test_webhooks.py
index 17ded4d71b..4e74d7142d 100644
--- a/backend/test/test_webhooks.py
+++ b/backend/test/test_webhooks.py
@@ -4,7 +4,7 @@
 
 import requests
 
-from .conftest import API_PREFIX
+from .conftest import API_PREFIX, SUCCESSFUL_STATES
 from .utils import read_in_chunks
 
 _webhook_event_id = None
@@ -191,7 +191,7 @@ def test_webhooks_sent(
             headers=admin_auth_headers,
         )
         data = r.json()
-        if data["state"] == "complete":
+        if data["state"] in SUCCESSFUL_STATES:
             break
         time.sleep(5)
 
diff --git a/backend/test/test_workflow_auto_add_to_collection.py b/backend/test/test_workflow_auto_add_to_collection.py
index bf3a16bcd7..1fd6e186e8 100644
--- a/backend/test/test_workflow_auto_add_to_collection.py
+++ b/backend/test/test_workflow_auto_add_to_collection.py
@@ -1,7 +1,7 @@
 import requests
 import time
 
-from .conftest import API_PREFIX
+from .conftest import API_PREFIX, SUCCESSFUL_STATES
 
 
 def test_workflow_crawl_auto_added_to_collection(
@@ -50,7 +50,7 @@ def test_workflow_crawl_auto_added_subsequent_runs(
             headers=crawler_auth_headers,
         )
         data = r.json()
-        if data["state"] == "complete":
+        if data["state"] in SUCCESSFUL_STATES:
             break
         time.sleep(5)