Skip to content

Commit

Permalink
add /pagesSearch endpoint after all
Browse files Browse the repository at this point in the history
  • Loading branch information
ikreymer committed Feb 20, 2025
1 parent 0e1cab9 commit 9dd3a13
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 4 deletions.
3 changes: 2 additions & 1 deletion backend/btrixcloud/basecrawls.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,8 @@ async def get_crawl_out(
oid = res.get("oid")
if oid:
res["pagesQueryUrl"] = (
get_origin(headers) + f"/api/orgs/{oid}/crawls/{crawlid}/pages"
get_origin(headers)
+ f"/api/orgs/{oid}/crawls/{crawlid}/pagesSearch"
)

crawl = CrawlOutWithResources.from_dict(res)
Expand Down
4 changes: 3 additions & 1 deletion backend/btrixcloud/operator/bgjobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,9 @@ async def finalize_background_job(self, data: MCDecoratorSyncData) -> dict:
spec = data.object["spec"]
success = status.get("succeeded") == spec.get("parallelism")
if not success:
print("Succeeded: {status.get('succeeded')}, Num Pods: {spec.get('parallelism')}")
print(
"Succeeded: {status.get('succeeded')}, Num Pods: {spec.get('parallelism')}"
)
completion_time = status.get("completionTime")

finalized = True
Expand Down
34 changes: 32 additions & 2 deletions backend/btrixcloud/pages.py
Original file line number Diff line number Diff line change
Expand Up @@ -1185,7 +1185,6 @@ async def get_crawl_pages_list(
page: int = 1,
sortBy: Optional[str] = None,
sortDirection: Optional[int] = -1,
includeTotal=False,
):
"""Retrieve paginated list of pages"""
formatted_approved: Optional[List[Union[bool, None]]] = None
Expand All @@ -1202,10 +1201,41 @@ async def get_crawl_pages_list(
page=page,
sort_by=sortBy,
sort_direction=sortDirection,
include_total=includeTotal,
include_total=True,
)
return paginated_format(pages, total, page, pageSize)

@app.get(
"/orgs/{oid}/crawls/{crawl_id}/pagesSearch",
tags=["pages", "crawls"],
response_model=PageOutItemsResponse,
)
async def get_search_pages_list(
crawl_id: str,
org: Organization = Depends(org_crawl_dep),
search: Optional[str] = None,
url: Optional[str] = None,
ts: Optional[datetime] = None,
isSeed: Optional[bool] = None,
depth: Optional[int] = None,
pageSize: int = DEFAULT_PAGE_SIZE,
page: int = 1,
):
"""Retrieve paginated list of pages"""
pages, _ = await ops.list_pages(
crawl_ids=[crawl_id],
search=search,
url=url,
ts=ts,
is_seed=isSeed,
depth=depth,
org=org,
page_size=pageSize,
page=page,
include_total=False,
)
return {"items": pages}

@app.get(
"/orgs/{oid}/collections/{coll_id}/public/pages",
tags=["pages", "collections"],
Expand Down

0 comments on commit 9dd3a13

Please sign in to comment.