Skip to content

Commit

Permalink
add types, fix tests to check all successful states
Browse files Browse the repository at this point in the history
  • Loading branch information
ikreymer committed Nov 11, 2023
1 parent 40c16a7 commit c842e83
Show file tree
Hide file tree
Showing 5 changed files with 26 additions and 29 deletions.
40 changes: 17 additions & 23 deletions backend/btrixcloud/operator.py
Original file line number Diff line number Diff line change
Expand Up @@ -649,18 +649,18 @@ async def set_state(self, state, status, crawl_id, allowed_from, **kwargs):
from starting to running:
- starting -> running
from running to complete or partial_complete:
from running to complete or complete[:stopReason]:
- running -> complete[:stopReason]
- running -> complete
- running -> partial_complete
from starting or running to waiting for capacity (pods pending) and back:
- starting -> waiting_capacity
- running -> waiting_capacity
- waiting_capacity -> running
from any state to canceled or failed:
- not complete or partial_complete -> canceled
- not complete or partial_complete -> failed
- not complete[:stopReason] -> canceled
- not complete[:stopReason] -> failed
"""
if not allowed_from or status.state in allowed_from:
res = await self.crawl_ops.update_crawl_state_if_allowed(
Expand Down Expand Up @@ -1280,15 +1280,14 @@ async def add_file_to_crawl(self, cc_data, crawl, redis):

return True

async def is_crawl_stopping(self, crawl: CrawlSpec, status: CrawlStatus) -> None:
async def is_crawl_stopping(
self, crawl: CrawlSpec, status: CrawlStatus
) -> Optional[str]:
"""check if crawl is stopping and set reason"""

# if user requested stop, then enter stopping phase
if crawl.stopping:
print("Graceful Stop: User requested stop")
status.stopping = True
status.stopReason = "user-stop"
return
return "user-stop"

# check timeout if timeout time exceeds elapsed time
if crawl.timeout:
Expand All @@ -1300,28 +1299,21 @@ async def is_crawl_stopping(self, crawl: CrawlSpec, status: CrawlStatus) -> None
print(
f"Graceful Stop: Crawl running time exceeded {crawl.timeout} second timeout"
)
status.stopping = True
status.stopReason = "time-limit"
return
return "time-limit"

# crawl size limit
if crawl.max_crawl_size and status.size > crawl.max_crawl_size:
print(f"Graceful Stop: Maximum crawl size {crawl.max_crawl_size} hit")
status.stopping = True
status.stopReason = "size-limit"
return
return "size-limit"

# check exec time quotas and stop if reached limit
if await self.org_ops.exec_mins_quota_reached(crawl.oid):
status.stopping = True
status.stopReason = "exec-time-quota"
return
return "exec-time-quota"

if self.max_pages_per_crawl and status.pagesFound >= self.max_pages_per_crawl:
# will stop on its own
status.stopping = False
status.stopReason = "page-limit"
return
return "page-limit"

return None

async def get_redis_crawl_stats(self, redis: Redis, crawl_id: str):
"""get page stats"""
Expand Down Expand Up @@ -1368,7 +1360,9 @@ async def update_crawl_state(
pod_info = status.podStatus[key]
pod_info.used.storage = value

await self.is_crawl_stopping(crawl, status)
if not status.stopReason:
status.stopReason = await self.is_crawl_stopping(crawl, status)
status.stopping = status.stopReason is not None

# mark crawl as stopping
if status.stopping:
Expand Down
3 changes: 3 additions & 0 deletions backend/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,6 @@ backoff>=2.2.1
python-slugify>=8.0.1
mypy_boto3_s3
types_aiobotocore_s3
types-redis
types-python-slugify
types-pyYAML
4 changes: 2 additions & 2 deletions backend/test/test_crawlconfigs.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import requests

from .conftest import API_PREFIX
from .conftest import API_PREFIX, SUCCESSFUL_STATES


cid = None
Expand Down Expand Up @@ -361,7 +361,7 @@ def test_incremental_workflow_total_size_and_last_crawl_stats(
headers=crawler_auth_headers,
)
data = r.json()
if data["state"] == "complete":
if data["state"] in SUCCESSFUL_STATES:
break
time.sleep(5)

Expand Down
4 changes: 2 additions & 2 deletions backend/test/test_webhooks.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

import requests

from .conftest import API_PREFIX
from .conftest import API_PREFIX, SUCCESSFUL_STATES
from .utils import read_in_chunks

_webhook_event_id = None
Expand Down Expand Up @@ -191,7 +191,7 @@ def test_webhooks_sent(
headers=admin_auth_headers,
)
data = r.json()
if data["state"] == "complete":
if data["state"] in SUCCESSFUL_STATES:
break
time.sleep(5)

Expand Down
4 changes: 2 additions & 2 deletions backend/test/test_workflow_auto_add_to_collection.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import requests
import time

from .conftest import API_PREFIX
from .conftest import API_PREFIX, SUCCESSFUL_STATES


def test_workflow_crawl_auto_added_to_collection(
Expand Down Expand Up @@ -50,7 +50,7 @@ def test_workflow_crawl_auto_added_subsequent_runs(
headers=crawler_auth_headers,
)
data = r.json()
if data["state"] == "complete":
if data["state"] in SUCCESSFUL_STATES:
break
time.sleep(5)

Expand Down

0 comments on commit c842e83

Please sign in to comment.