From 1d6d122ca7bd9663cea0b788fba9ed0d1f9d8235 Mon Sep 17 00:00:00 2001 From: Tessa Walsh Date: Tue, 21 Jan 2025 14:29:51 -0500 Subject: [PATCH 01/22] Add snapshotCount to archived items --- backend/btrixcloud/colls.py | 2 +- .../migrations/migration_0037_upload_pages.py | 2 +- ...migration_0040_archived_item_page_count.py | 6 ++--- backend/btrixcloud/models.py | 2 ++ backend/btrixcloud/operator/crawls.py | 2 +- backend/btrixcloud/pages.py | 26 ++++++++++++------- 6 files changed, 25 insertions(+), 15 deletions(-) diff --git a/backend/btrixcloud/colls.py b/backend/btrixcloud/colls.py index 4c1efe719c..b41d64bde4 100644 --- a/backend/btrixcloud/colls.py +++ b/backend/btrixcloud/colls.py @@ -582,7 +582,7 @@ async def update_collection_counts_and_tags(self, collection_id: UUID): total_size += file.size try: - _, crawl_pages = await self.page_ops.list_pages( + _, crawl_pages = await self.page_ops.list_pages_snapshots( crawl.id, org, page_size=1_000_000 ) page_count += crawl_pages diff --git a/backend/btrixcloud/migrations/migration_0037_upload_pages.py b/backend/btrixcloud/migrations/migration_0037_upload_pages.py index 62bfe98237..cc0f056985 100644 --- a/backend/btrixcloud/migrations/migration_0037_upload_pages.py +++ b/backend/btrixcloud/migrations/migration_0037_upload_pages.py @@ -32,7 +32,7 @@ async def org_upload_pages_already_added(self, oid: UUID) -> bool: mdb_crawls = self.mdb["crawls"] async for upload in mdb_crawls.find({"oid": oid, "type": "upload"}): upload_id = upload["_id"] - _, total = await self.page_ops.list_pages(upload_id) + _, total = await self.page_ops.list_page_snapshots(upload_id) if total > 0: return True return False diff --git a/backend/btrixcloud/migrations/migration_0040_archived_item_page_count.py b/backend/btrixcloud/migrations/migration_0040_archived_item_page_count.py index 2f72fc39bd..f57f77d579 100644 --- a/backend/btrixcloud/migrations/migration_0040_archived_item_page_count.py +++ b/backend/btrixcloud/migrations/migration_0040_archived_item_page_count.py @@ -31,13 +31,13 @@ async def migrate_up(self): ) return - async for crawl_raw in crawls_mdb.find({"pageCount": None}): + async for crawl_raw in crawls_mdb.find({}): crawl_id = crawl_raw["_id"] try: - await self.page_ops.set_archived_item_page_count(crawl_id) + await self.page_ops.set_archived_item_page_snapshot_counts(crawl_id) # pylint: disable=broad-exception-caught except Exception as err: print( - f"Error saving pageCount for archived item {crawl_id}: {err}", + f"Error saving page/snapshot counts for archived item {crawl_id}: {err}", flush=True, ) diff --git a/backend/btrixcloud/models.py b/backend/btrixcloud/models.py index 9bcd7557c3..1bc1336d4f 100644 --- a/backend/btrixcloud/models.py +++ b/backend/btrixcloud/models.py @@ -798,6 +798,7 @@ class BaseCrawl(CoreCrawlable, BaseMongoModel): reviewStatus: ReviewStatus = None pageCount: Optional[int] = 0 + snapshotCount: Optional[int] = 0 filePageCount: Optional[int] = 0 errorPageCount: Optional[int] = 0 @@ -875,6 +876,7 @@ class CrawlOut(BaseMongoModel): lastQAStarted: Optional[datetime] = None pageCount: Optional[int] = 0 + snapshotCount: Optional[int] = 0 filePageCount: Optional[int] = 0 errorPageCount: Optional[int] = 0 diff --git a/backend/btrixcloud/operator/crawls.py b/backend/btrixcloud/operator/crawls.py index d2b84da5b4..7485b36ca9 100644 --- a/backend/btrixcloud/operator/crawls.py +++ b/backend/btrixcloud/operator/crawls.py @@ -1534,7 +1534,7 @@ async def do_crawl_finished_tasks( ) if state in SUCCESSFUL_STATES and crawl.oid: - await self.page_ops.set_archived_item_page_count(crawl.id) + await self.page_ops.set_archived_item_page_snapshot_counts(crawl.id) await self.org_ops.inc_org_bytes_stored( crawl.oid, status.filesAddedSize, "crawl" ) diff --git a/backend/btrixcloud/pages.py b/backend/btrixcloud/pages.py index f796d94a49..62d4eb4241 100644 --- a/backend/btrixcloud/pages.py +++ b/backend/btrixcloud/pages.py @@ -92,7 +92,7 @@ async def add_crawl_pages_to_db_from_wacz(self, crawl_id: str, batch_size=100): if pages_buffer: await self._add_pages_to_db(crawl_id, pages_buffer) - await self.set_archived_item_page_count(crawl_id) + await self.set_archived_item_page_snapshot_counts(crawl_id) print(f"Added pages for crawl {crawl_id} to db", flush=True) # pylint: disable=broad-exception-caught, raise-missing-from @@ -435,7 +435,7 @@ async def delete_page_notes( return {"deleted": True} - async def list_pages( + async def list_page_snapshots( self, crawl_id: str, org: Optional[Organization] = None, @@ -453,7 +453,7 @@ async def list_pages( sort_by: Optional[str] = None, sort_direction: Optional[int] = -1, ) -> Tuple[Union[List[PageOut], List[PageOutWithSingleQA]], int]: - """List all pages in crawl""" + """List all page snapshots in crawl""" # pylint: disable=duplicate-code, too-many-locals, too-many-branches, too-many-statements # Zero-index page for query page = page - 1 @@ -663,12 +663,20 @@ def get_crawl_type_from_pages_route(self, request: Request): return crawl_type - async def set_archived_item_page_count(self, crawl_id: str): - """Store archived item page count in crawl document""" - _, page_count = await self.list_pages(crawl_id) + def get_unique_page_count(self, crawl_id: str): + """Get count of unique page URLs in archived item""" + unique_pages = await self.pages.distinct("url", {"crawl_id": crawl_id}) + return len(unique_pages) or 0 + + async def set_archived_item_page_snapshot_counts(self, crawl_id: str): + """Store archived item page and snapshot counts in crawl document""" + _, snapshot_count = await self.list_page_snapshots(crawl_id) + + page_count = await self.get_unique_page_count(crawl_id) await self.crawls.find_one_and_update( - {"_id": crawl_id}, {"$set": {"pageCount": page_count}} + {"_id": crawl_id}, + {"$set": {"snapshotCount": snapshot_count, "pageCount": page_count}}, ) @@ -866,7 +874,7 @@ async def get_pages_list( if approved: formatted_approved = str_list_to_bools(approved.split(",")) - pages, total = await ops.list_pages( + pages, total = await ops.list_page_snapshots( crawl_id=crawl_id, org=org, reviewed=reviewed, @@ -906,7 +914,7 @@ async def get_pages_list_with_qa( if approved: formatted_approved = str_list_to_bools(approved.split(",")) - pages, total = await ops.list_pages( + pages, total = await ops.list_page_snapshots( crawl_id=crawl_id, org=org, qa_run_id=qa_run_id, From f93ecdc0bb26e33e49e710d8c17146d66e8d0ac5 Mon Sep 17 00:00:00 2001 From: Tessa Walsh Date: Tue, 21 Jan 2025 14:41:16 -0500 Subject: [PATCH 02/22] Rationalize counts for collections, add migration to recalc --- backend/btrixcloud/colls.py | 13 +++++- backend/btrixcloud/db.py | 9 ++-- .../migration_0041_pages_snapshots.py | 41 +++++++++++++++++++ backend/btrixcloud/models.py | 3 ++ backend/btrixcloud/pages.py | 2 +- 5 files changed, 62 insertions(+), 6 deletions(-) create mode 100644 backend/btrixcloud/migrations/migration_0041_pages_snapshots.py diff --git a/backend/btrixcloud/colls.py b/backend/btrixcloud/colls.py index b41d64bde4..d20558632a 100644 --- a/backend/btrixcloud/colls.py +++ b/backend/btrixcloud/colls.py @@ -566,6 +566,7 @@ async def update_collection_counts_and_tags(self, collection_id: UUID): """Set current crawl info in config when crawl begins""" crawl_count = 0 page_count = 0 + snapshot_count = 0 total_size = 0 tags = [] @@ -582,10 +583,17 @@ async def update_collection_counts_and_tags(self, collection_id: UUID): total_size += file.size try: - _, crawl_pages = await self.page_ops.list_pages_snapshots( + _, crawl_snapshots = await self.page_ops.list_page_snapshots( crawl.id, org, page_size=1_000_000 ) - page_count += crawl_pages + snapshot_count += crawl_snapshots + # pylint: disable=broad-exception-caught + except Exception: + pass + + try: + crawl_page_count = await self.page_ops.get_unique_page_count(crawl.id) + page_count += crawl_page_count # pylint: disable=broad-exception-caught except Exception: pass @@ -601,6 +609,7 @@ async def update_collection_counts_and_tags(self, collection_id: UUID): "$set": { "crawlCount": crawl_count, "pageCount": page_count, + "snapshotCount": snapshot_count, "totalSize": total_size, "tags": sorted_tags, } diff --git a/backend/btrixcloud/db.py b/backend/btrixcloud/db.py index bd889be25a..57bde65382 100644 --- a/backend/btrixcloud/db.py +++ b/backend/btrixcloud/db.py @@ -17,7 +17,7 @@ from .migrations import BaseMigration -CURR_DB_VERSION = "0040" +CURR_DB_VERSION = "0041" # ============================================================================ @@ -96,7 +96,7 @@ async def update_and_prepare_db( await ping_db(mdb) print("Database setup started", flush=True) if await run_db_migrations( - mdb, user_manager, page_ops, org_ops, background_job_ops + mdb, user_manager, page_ops, org_ops, background_job_ops, coll_ops ): await drop_indexes(mdb) @@ -118,7 +118,9 @@ async def update_and_prepare_db( # ============================================================================ # pylint: disable=too-many-locals -async def run_db_migrations(mdb, user_manager, page_ops, org_ops, background_job_ops): +async def run_db_migrations( + mdb, user_manager, page_ops, org_ops, background_job_ops, coll_ops +): """Run database migrations.""" # if first run, just set version and exit @@ -155,6 +157,7 @@ async def run_db_migrations(mdb, user_manager, page_ops, org_ops, background_job page_ops=page_ops, org_ops=org_ops, background_job_ops=background_job_ops, + coll_ops=coll_ops, ) if await migration.run(): migrations_run = True diff --git a/backend/btrixcloud/migrations/migration_0041_pages_snapshots.py b/backend/btrixcloud/migrations/migration_0041_pages_snapshots.py new file mode 100644 index 0000000000..458b42abf8 --- /dev/null +++ b/backend/btrixcloud/migrations/migration_0041_pages_snapshots.py @@ -0,0 +1,41 @@ +""" +Migration 0041 - Rationalize page and snapshot counts +""" + +from btrixcloud.colls import CollectionOps +from btrixcloud.migrations import BaseMigration + + +MIGRATION_VERSION = "0041" + + +class Migration(BaseMigration): + """Migration class.""" + + # pylint: disable=unused-argument + def __init__(self, mdb, **kwargs): + super().__init__(mdb, migration_version=MIGRATION_VERSION) + + self.coll_ops = kwargs.get("coll_ops") + + async def migrate_up(self): + """Perform migration up. + + Recalculate collections to get new page and snapshot counts + """ + colls_mdb = self.mdb["collections"] + + if self.coll_ops is None: + print( + "Unable to set collection page and snapshot counts, missing coll_ops", + flush=True, + ) + return + + async for coll in colls_mdb.collections.find({}): + coll_id = coll["_id"] + try: + await self.coll_ops.update_collection_counts_and_tags(coll_id) + # pylint: disable=broad-exception-caught + except Exception as err: + print(f"Unable to update collection {coll_id}: {err}", flush=True) diff --git a/backend/btrixcloud/models.py b/backend/btrixcloud/models.py index 1bc1336d4f..d16309c0dd 100644 --- a/backend/btrixcloud/models.py +++ b/backend/btrixcloud/models.py @@ -1252,6 +1252,7 @@ class Collection(BaseMongoModel): crawlCount: Optional[int] = 0 pageCount: Optional[int] = 0 + snapshotCount: Optional[int] = 0 totalSize: Optional[int] = 0 dateEarliest: Optional[datetime] = None @@ -1305,6 +1306,7 @@ class CollOut(BaseMongoModel): crawlCount: Optional[int] = 0 pageCount: Optional[int] = 0 + snapshotCount: Optional[int] = 0 totalSize: Optional[int] = 0 dateEarliest: Optional[datetime] = None @@ -1341,6 +1343,7 @@ class PublicCollOut(BaseMongoModel): crawlCount: Optional[int] = 0 pageCount: Optional[int] = 0 + snapshotCount: Optional[int] = 0 totalSize: Optional[int] = 0 dateEarliest: Optional[datetime] = None diff --git a/backend/btrixcloud/pages.py b/backend/btrixcloud/pages.py index 62d4eb4241..19ed7a8195 100644 --- a/backend/btrixcloud/pages.py +++ b/backend/btrixcloud/pages.py @@ -663,7 +663,7 @@ def get_crawl_type_from_pages_route(self, request: Request): return crawl_type - def get_unique_page_count(self, crawl_id: str): + async def get_unique_page_count(self, crawl_id: str): """Get count of unique page URLs in archived item""" unique_pages = await self.pages.distinct("url", {"crawl_id": crawl_id}) return len(unique_pages) or 0 From ce8bbdef9e604003dbac0a71f94f7437ea7d3f6a Mon Sep 17 00:00:00 2001 From: Tessa Walsh Date: Tue, 21 Jan 2025 14:45:14 -0500 Subject: [PATCH 03/22] Add snapshotCounts to org metrics endpoint --- backend/btrixcloud/models.py | 3 +++ backend/btrixcloud/orgs.py | 11 +++++++++++ backend/test/test_org.py | 1 + 3 files changed, 15 insertions(+) diff --git a/backend/btrixcloud/models.py b/backend/btrixcloud/models.py index d16309c0dd..822d83f4ec 100644 --- a/backend/btrixcloud/models.py +++ b/backend/btrixcloud/models.py @@ -1924,6 +1924,9 @@ class OrgMetrics(BaseModel): pageCount: int crawlPageCount: int uploadPageCount: int + snapshotCount: int + crawlSnapshotCount: int + uploadSnapshotCount: int profileCount: int workflowsRunningCount: int maxConcurrentCrawls: int diff --git a/backend/btrixcloud/orgs.py b/backend/btrixcloud/orgs.py index 3486cc29a4..123e32850b 100644 --- a/backend/btrixcloud/orgs.py +++ b/backend/btrixcloud/orgs.py @@ -944,6 +944,10 @@ async def get_org_metrics(self, org: Organization) -> dict[str, int]: crawl_page_count = 0 upload_page_count = 0 + snapshot_count = 0 + crawl_snapshot_count = 0 + upload_snapshot_count = 0 + async for item_data in self.crawls_db.find({"oid": org.id}): item = BaseCrawl.from_dict(item_data) if item.state not in SUCCESSFUL_STATES: @@ -952,11 +956,15 @@ async def get_org_metrics(self, org: Organization) -> dict[str, int]: if item.type == "crawl": crawl_count += 1 crawl_page_count += item.pageCount or 0 + crawl_snapshot_count += item.snapshotCount or 0 if item.type == "upload": upload_count += 1 upload_page_count += item.pageCount or 0 + upload_snapshot_count += item.snapshotCount or 0 if item.pageCount: page_count += item.pageCount + if item.snapshotCount: + snapshot_count += item.snapshotCount profile_count = await self.profiles_db.count_documents({"oid": org.id}) workflows_running_count = await self.crawls_db.count_documents( @@ -982,6 +990,9 @@ async def get_org_metrics(self, org: Organization) -> dict[str, int]: "pageCount": page_count, "crawlPageCount": crawl_page_count, "uploadPageCount": upload_page_count, + "snapshotCount": snapshot_count, + "crawlSnapshotCount": crawl_snapshot_count, + "uploadSnapshotCount": upload_snapshot_count, "profileCount": profile_count, "workflowsRunningCount": workflows_running_count, "maxConcurrentCrawls": max_concurrent_crawls, diff --git a/backend/test/test_org.py b/backend/test/test_org.py index 57c0b8fcce..e1c9715a6d 100644 --- a/backend/test/test_org.py +++ b/backend/test/test_org.py @@ -569,6 +569,7 @@ def test_org_metrics(crawler_auth_headers, default_org_id): assert data["uploadCount"] >= 0 assert data["archivedItemCount"] == data["crawlCount"] + data["uploadCount"] assert data["pageCount"] > 0 + assert data["snapshotCount"] > 0 assert data["profileCount"] >= 0 assert data["workflowsRunningCount"] >= 0 assert data["workflowsQueuedCount"] >= 0 From da4de38b7243257112fb34cb85632c463f4c1871 Mon Sep 17 00:00:00 2001 From: Tessa Walsh Date: Tue, 21 Jan 2025 14:51:47 -0500 Subject: [PATCH 04/22] Update tests --- backend/test/test_collections.py | 26 ++++++++++++++++++-------- backend/test/test_run_crawl.py | 6 ++++-- backend/test/test_uploads.py | 6 ++++-- 3 files changed, 26 insertions(+), 12 deletions(-) diff --git a/backend/test/test_collections.py b/backend/test/test_collections.py index 751b554ffe..da914a81fc 100644 --- a/backend/test/test_collections.py +++ b/backend/test/test_collections.py @@ -82,6 +82,7 @@ def test_create_collection( assert data["caption"] == CAPTION assert data["crawlCount"] == 1 assert data["pageCount"] > 0 + assert data["snapshotCount"] > 0 assert data["totalSize"] > 0 modified = data["modified"] assert modified @@ -181,6 +182,7 @@ def test_update_collection( assert data["caption"] == UPDATED_CAPTION assert data["crawlCount"] == 1 assert data["pageCount"] > 0 + assert data["snapshotCount"] > 0 assert data["totalSize"] > 0 global modified modified = data["modified"] @@ -270,6 +272,7 @@ def test_add_remove_crawl_from_collection( assert data["id"] == _coll_id assert data["crawlCount"] == 2 assert data["pageCount"] > 0 + assert data["snapshotCount"] > 0 assert data["totalSize"] > 0 assert data["modified"] >= modified assert data["tags"] == ["wr-test-2", "wr-test-1"] @@ -294,6 +297,7 @@ def test_add_remove_crawl_from_collection( assert data["id"] == _coll_id assert data["crawlCount"] == 0 assert data["pageCount"] == 0 + assert data["snapshotCount"] == 0 assert data["totalSize"] == 0 assert data["modified"] >= modified assert data.get("tags", []) == [] @@ -324,6 +328,7 @@ def test_add_remove_crawl_from_collection( assert data["id"] == _coll_id assert data["crawlCount"] == 2 assert data["pageCount"] > 0 + assert data["snapshotCount"] > 0 assert data["totalSize"] > 0 assert data["modified"] >= modified assert data["tags"] == ["wr-test-2", "wr-test-1"] @@ -346,6 +351,7 @@ def test_get_collection(crawler_auth_headers, default_org_id): assert data["caption"] == UPDATED_CAPTION assert data["crawlCount"] == 2 assert data["pageCount"] > 0 + assert data["snapshotCount"] > 0 assert data["totalSize"] > 0 assert data["modified"] >= modified assert data["tags"] == ["wr-test-2", "wr-test-1"] @@ -369,6 +375,7 @@ def test_get_collection_replay(crawler_auth_headers, default_org_id): assert data["caption"] == UPDATED_CAPTION assert data["crawlCount"] == 2 assert data["pageCount"] > 0 + assert data["snapshotCount"] > 0 assert data["totalSize"] > 0 assert data["modified"] >= modified assert data["tags"] == ["wr-test-2", "wr-test-1"] @@ -488,6 +495,7 @@ def test_add_upload_to_collection(crawler_auth_headers, default_org_id): assert data["id"] == _coll_id assert data["crawlCount"] == 3 assert data["pageCount"] > 0 + assert data["snapshotCount"] > 0 assert data["totalSize"] > 0 assert data["modified"] assert data["tags"] == ["wr-test-2", "wr-test-1"] @@ -548,6 +556,7 @@ def test_list_collections( assert first_coll["caption"] == UPDATED_CAPTION assert first_coll["crawlCount"] == 3 assert first_coll["pageCount"] > 0 + assert first_coll["snapshotCount"] > 0 assert first_coll["totalSize"] > 0 assert first_coll["modified"] assert first_coll["tags"] == ["wr-test-2", "wr-test-1"] @@ -564,6 +573,7 @@ def test_list_collections( assert second_coll.get("description") is None assert second_coll["crawlCount"] == 1 assert second_coll["pageCount"] > 0 + assert second_coll["snapshotCount"] > 0 assert second_coll["totalSize"] > 0 assert second_coll["modified"] assert second_coll["tags"] == ["wr-test-2"] @@ -584,6 +594,7 @@ def test_remove_upload_from_collection(crawler_auth_headers, default_org_id): assert data["id"] == _coll_id assert data["crawlCount"] == 2 assert data["pageCount"] > 0 + assert data["snapshotCount"] > 0 assert data["totalSize"] > 0 assert data["modified"] >= modified assert data.get("tags") == ["wr-test-2", "wr-test-1"] @@ -914,6 +925,7 @@ def test_list_public_collections( assert collection["dateLatest"] assert collection["crawlCount"] > 0 assert collection["pageCount"] > 0 + assert collection["snapshotCount"] > 0 assert collection["totalSize"] > 0 # Test non-existing slug - it should return a 404 but not reveal @@ -1072,13 +1084,7 @@ def test_list_public_colls_home_url_thumbnail(): # Check we get expected data for each public collection # and nothing we don't expect non_public_fields = ( - "oid", - "modified", - "crawlCount", - "pageCount", - "totalSize", "tags", - "access", "homeUrlPageId", ) non_public_image_fields = ("originalFilename", "userid", "userName", "created") @@ -1100,9 +1106,10 @@ def test_list_public_colls_home_url_thumbnail(): assert coll["dateLatest"] assert coll["crawlCount"] > 0 assert coll["pageCount"] > 0 + assert coll["snapshotCount"] > 0 assert coll["totalSize"] > 0 - for field in NON_PUBLIC_COLL_FIELDS: + for field in non_public_coll_fields: assert field not in coll if coll["id"] == _public_coll_id: @@ -1122,7 +1129,7 @@ def test_list_public_colls_home_url_thumbnail(): assert thumbnail["size"] assert thumbnail["mime"] - for field in NON_PUBLIC_IMAGE_FIELDS: + for field in non_public_image_fields: assert field not in thumbnail if coll["id"] == _second_public_coll_id: @@ -1150,6 +1157,7 @@ def test_get_public_collection(default_org_id): assert coll["dateLatest"] assert coll["crawlCount"] > 0 assert coll["pageCount"] > 0 + assert coll["snapshotCount"] > 0 assert coll["totalSize"] > 0 for field in NON_PUBLIC_COLL_FIELDS: @@ -1229,6 +1237,7 @@ def test_get_public_collection_unlisted(crawler_auth_headers, default_org_id): assert coll["dateLatest"] assert coll["crawlCount"] > 0 assert coll["pageCount"] > 0 + assert coll["snapshotCount"] > 0 assert coll["totalSize"] > 0 assert coll["defaultThumbnailName"] == "orange-default.avif" assert coll["allowPublicDownload"] @@ -1270,6 +1279,7 @@ def test_get_public_collection_unlisted_org_profile_disabled( assert coll["dateLatest"] assert coll["crawlCount"] > 0 assert coll["pageCount"] > 0 + assert coll["snapshotCount"] > 0 assert coll["totalSize"] > 0 assert coll["defaultThumbnailName"] == "orange-default.avif" assert coll["allowPublicDownload"] diff --git a/backend/test/test_run_crawl.py b/backend/test/test_run_crawl.py index 26f5574681..98b062fc62 100644 --- a/backend/test/test_run_crawl.py +++ b/backend/test/test_run_crawl.py @@ -877,13 +877,15 @@ def test_re_add_crawl_pages(crawler_auth_headers, default_org_id, crawler_crawl_ ) assert r.status_code == 403 - # Check that pageCount was stored on crawl + # Check that pageCount and snapshotCount were stored on crawl r = requests.get( f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawler_crawl_id}", headers=crawler_auth_headers, ) assert r.status_code == 200 - assert r.json()["pageCount"] > 0 + data = r.json() + assert data["pageCount"] > 0 + assert data["snapshotCount"] > 0 def test_crawl_page_notes(crawler_auth_headers, default_org_id, crawler_crawl_id): diff --git a/backend/test/test_uploads.py b/backend/test/test_uploads.py index 5a55b36c5c..0a8473f6c3 100644 --- a/backend/test/test_uploads.py +++ b/backend/test/test_uploads.py @@ -274,13 +274,15 @@ def test_get_upload_pages(admin_auth_headers, default_org_id, upload_id): assert page.get("modified") is None assert page.get("approved") is None - # Check that pageCount was stored on upload + # Check that pageCount and snapshotCount stored on upload r = requests.get( f"{API_PREFIX}/orgs/{default_org_id}/uploads/{upload_id}", headers=admin_auth_headers, ) assert r.status_code == 200 - assert r.json()["pageCount"] > 0 + data = r.json() + assert data["pageCount"] > 0 + assert data["snapshotCount"] > 0 def test_replace_upload( From 1718cfa90447edd72d5b110e3a5183e1879a76f0 Mon Sep 17 00:00:00 2001 From: Tessa Walsh Date: Tue, 21 Jan 2025 14:55:31 -0500 Subject: [PATCH 05/22] Recalculate collection counts on import --- backend/btrixcloud/orgs.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/backend/btrixcloud/orgs.py b/backend/btrixcloud/orgs.py index 123e32850b..93309120c0 100644 --- a/backend/btrixcloud/orgs.py +++ b/backend/btrixcloud/orgs.py @@ -1322,11 +1322,15 @@ async def import_org( await self.pages_db.insert_one(PageWithAllQA.from_dict(page).to_dict()) # collections - for collection in org_data.get("collections", []): - collection = json_stream.to_standard_types(collection) - if not collection.get("slug"): - collection["slug"] = slug_from_name(collection["name"]) - await self.colls_db.insert_one(Collection.from_dict(collection).to_dict()) + for coll_raw in org_data.get("collections", []): + coll_raw = json_stream.to_standard_types(coll_raw) + + if not coll_raw.get("slug"): + coll_raw["slug"] = slug_from_name(coll_raw["name"]) + + collection = Collection.from_dict(coll_raw) + await self.colls_db.insert_one(collection.to_dict()) + await self.coll_ops.update_collection_counts_and_tags(collection.id) async def delete_org_and_data( self, org: Organization, user_manager: UserManager From 462e42720a692b7c273e7f12a88de7b1f12a3e22 Mon Sep 17 00:00:00 2001 From: Tessa Walsh Date: Tue, 21 Jan 2025 14:59:56 -0500 Subject: [PATCH 06/22] Fix linting --- backend/btrixcloud/colls.py | 1 + backend/btrixcloud/db.py | 2 +- backend/btrixcloud/migrations/migration_0041_pages_snapshots.py | 1 - 3 files changed, 2 insertions(+), 2 deletions(-) diff --git a/backend/btrixcloud/colls.py b/backend/btrixcloud/colls.py index d20558632a..ebbf2a109e 100644 --- a/backend/btrixcloud/colls.py +++ b/backend/btrixcloud/colls.py @@ -627,6 +627,7 @@ async def recalculate_org_collection_dates(self, org: Organization): async def update_collection_dates(self, coll_id: UUID): """Update collection earliest and latest dates from page timestamps""" + # pylint: disable=too-many-locals coll = await self.get_collection(coll_id) crawl_ids = await self.get_collection_crawl_ids(coll_id) diff --git a/backend/btrixcloud/db.py b/backend/btrixcloud/db.py index 57bde65382..a16964626f 100644 --- a/backend/btrixcloud/db.py +++ b/backend/btrixcloud/db.py @@ -117,7 +117,7 @@ async def update_and_prepare_db( # ============================================================================ -# pylint: disable=too-many-locals +# pylint: disable=too-many-locals, too-many-arguments async def run_db_migrations( mdb, user_manager, page_ops, org_ops, background_job_ops, coll_ops ): diff --git a/backend/btrixcloud/migrations/migration_0041_pages_snapshots.py b/backend/btrixcloud/migrations/migration_0041_pages_snapshots.py index 458b42abf8..e024f3fd90 100644 --- a/backend/btrixcloud/migrations/migration_0041_pages_snapshots.py +++ b/backend/btrixcloud/migrations/migration_0041_pages_snapshots.py @@ -2,7 +2,6 @@ Migration 0041 - Rationalize page and snapshot counts """ -from btrixcloud.colls import CollectionOps from btrixcloud.migrations import BaseMigration From 5215649d3b8aef7ba0847b2e20907b83605d09b6 Mon Sep 17 00:00:00 2001 From: Tessa Walsh Date: Tue, 21 Jan 2025 15:03:12 -0500 Subject: [PATCH 07/22] Add pylint comment --- backend/btrixcloud/colls.py | 1 + 1 file changed, 1 insertion(+) diff --git a/backend/btrixcloud/colls.py b/backend/btrixcloud/colls.py index ebbf2a109e..20ae07f0b6 100644 --- a/backend/btrixcloud/colls.py +++ b/backend/btrixcloud/colls.py @@ -564,6 +564,7 @@ async def recalculate_org_collection_counts_tags(self, org: Organization): async def update_collection_counts_and_tags(self, collection_id: UUID): """Set current crawl info in config when crawl begins""" + # pylint: disable=too-many-locals crawl_count = 0 page_count = 0 snapshot_count = 0 From 6eaa9aeb007d29586fedddab02714393a5aa96f8 Mon Sep 17 00:00:00 2001 From: Tessa Walsh Date: Tue, 21 Jan 2025 15:24:32 -0500 Subject: [PATCH 08/22] Fix collection test --- backend/test/test_collections.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/test/test_collections.py b/backend/test/test_collections.py index da914a81fc..290141d623 100644 --- a/backend/test/test_collections.py +++ b/backend/test/test_collections.py @@ -1109,7 +1109,7 @@ def test_list_public_colls_home_url_thumbnail(): assert coll["snapshotCount"] > 0 assert coll["totalSize"] > 0 - for field in non_public_coll_fields: + for field in non_public_fields: assert field not in coll if coll["id"] == _public_coll_id: From a43fff22de76b8926fd19efffc52d0289f4b808d Mon Sep 17 00:00:00 2001 From: Tessa Walsh Date: Tue, 21 Jan 2025 16:14:01 -0500 Subject: [PATCH 09/22] Ensure page counts are based on unique URLs in that object specifically --- backend/btrixcloud/colls.py | 14 ++++++-------- backend/btrixcloud/main.py | 2 +- backend/btrixcloud/ops.py | 2 +- backend/btrixcloud/orgs.py | 26 ++++++++++++++++---------- backend/btrixcloud/pages.py | 10 ++++++---- 5 files changed, 30 insertions(+), 24 deletions(-) diff --git a/backend/btrixcloud/colls.py b/backend/btrixcloud/colls.py index 20ae07f0b6..f71313d2af 100644 --- a/backend/btrixcloud/colls.py +++ b/backend/btrixcloud/colls.py @@ -566,11 +566,12 @@ async def update_collection_counts_and_tags(self, collection_id: UUID): """Set current crawl info in config when crawl begins""" # pylint: disable=too-many-locals crawl_count = 0 - page_count = 0 snapshot_count = 0 total_size = 0 tags = [] + crawl_ids = [] + coll = await self.get_collection(collection_id) org = await self.orgs.get_org_by_id(coll.oid) @@ -592,18 +593,15 @@ async def update_collection_counts_and_tags(self, collection_id: UUID): except Exception: pass - try: - crawl_page_count = await self.page_ops.get_unique_page_count(crawl.id) - page_count += crawl_page_count - # pylint: disable=broad-exception-caught - except Exception: - pass - if crawl.tags: tags.extend(crawl.tags) + crawl_ids.append(crawl.id) + sorted_tags = [tag for tag, count in Counter(tags).most_common()] + page_count = await self.page_ops.get_unique_page_count(crawl_ids) + await self.collections.find_one_and_update( {"_id": collection_id}, { diff --git a/backend/btrixcloud/main.py b/backend/btrixcloud/main.py index 927a03dcb8..507db08f02 100644 --- a/backend/btrixcloud/main.py +++ b/backend/btrixcloud/main.py @@ -255,7 +255,7 @@ def main() -> None: crawls.set_page_ops(page_ops) upload_ops.set_page_ops(page_ops) - org_ops.set_ops(base_crawl_ops, profiles, coll_ops, background_job_ops) + org_ops.set_ops(base_crawl_ops, profiles, coll_ops, background_job_ops, page_ops) user_manager.set_ops(org_ops, crawl_config_ops, base_crawl_ops) diff --git a/backend/btrixcloud/ops.py b/backend/btrixcloud/ops.py index bee24d00c5..bcdb493db5 100644 --- a/backend/btrixcloud/ops.py +++ b/backend/btrixcloud/ops.py @@ -97,7 +97,7 @@ def init_ops() -> Tuple[ background_job_ops.set_ops(crawl_ops, profile_ops) - org_ops.set_ops(base_crawl_ops, profile_ops, coll_ops, background_job_ops) + org_ops.set_ops(base_crawl_ops, profile_ops, coll_ops, background_job_ops, page_ops) user_manager.set_ops(org_ops, crawl_config_ops, base_crawl_ops) diff --git a/backend/btrixcloud/orgs.py b/backend/btrixcloud/orgs.py index 93309120c0..3417cdd036 100644 --- a/backend/btrixcloud/orgs.py +++ b/backend/btrixcloud/orgs.py @@ -96,9 +96,10 @@ from .profiles import ProfileOps from .users import UserManager from .background_jobs import BackgroundJobOps + from .pages import PageOps else: InviteOps = BaseCrawlOps = ProfileOps = CollectionOps = object - BackgroundJobOps = UserManager = object + BackgroundJobOps = UserManager = PageOps = object DEFAULT_ORG = os.environ.get("DEFAULT_ORG", "My Organization") @@ -156,13 +157,15 @@ def set_ops( profile_ops: ProfileOps, coll_ops: CollectionOps, background_job_ops: BackgroundJobOps, + page_ops: PageOps, ) -> None: - """Set base crawl ops""" + """Set additional ops classes""" # pylint: disable=attribute-defined-outside-init self.base_crawl_ops = base_crawl_ops self.profile_ops = profile_ops self.coll_ops = coll_ops self.background_job_ops = background_job_ops + self.page_ops = page_ops def set_default_primary_storage(self, storage: StorageRef): """set default primary storage""" @@ -940,14 +943,13 @@ async def get_org_metrics(self, org: Organization) -> dict[str, int]: crawl_count = 0 upload_count = 0 - page_count = 0 - crawl_page_count = 0 - upload_page_count = 0 - snapshot_count = 0 crawl_snapshot_count = 0 upload_snapshot_count = 0 + crawl_ids = [] + upload_ids = [] + async for item_data in self.crawls_db.find({"oid": org.id}): item = BaseCrawl.from_dict(item_data) if item.state not in SUCCESSFUL_STATES: @@ -955,17 +957,21 @@ async def get_org_metrics(self, org: Organization) -> dict[str, int]: archived_item_count += 1 if item.type == "crawl": crawl_count += 1 - crawl_page_count += item.pageCount or 0 crawl_snapshot_count += item.snapshotCount or 0 + crawl_ids.append(item.id) if item.type == "upload": upload_count += 1 - upload_page_count += item.pageCount or 0 upload_snapshot_count += item.snapshotCount or 0 - if item.pageCount: - page_count += item.pageCount + upload_ids.append(item.id) if item.snapshotCount: snapshot_count += item.snapshotCount + all_archived_item_ids = crawl_ids + upload_ids + + page_count = await self.page_ops.get_unique_page_count(all_archived_item_ids) + crawl_page_count = await self.page_ops.get_unique_page_count(crawl_ids) + upload_page_count = await self.page_ops.get_unique_page_count(upload_ids) + profile_count = await self.profiles_db.count_documents({"oid": org.id}) workflows_running_count = await self.crawls_db.count_documents( {"oid": org.id, "state": {"$in": RUNNING_STATES}} diff --git a/backend/btrixcloud/pages.py b/backend/btrixcloud/pages.py index 19ed7a8195..3a664d82a2 100644 --- a/backend/btrixcloud/pages.py +++ b/backend/btrixcloud/pages.py @@ -663,16 +663,18 @@ def get_crawl_type_from_pages_route(self, request: Request): return crawl_type - async def get_unique_page_count(self, crawl_id: str): - """Get count of unique page URLs in archived item""" - unique_pages = await self.pages.distinct("url", {"crawl_id": crawl_id}) + async def get_unique_page_count(self, crawl_ids: List[str]) -> int: + """Get count of unique page URLs across list of archived items""" + unique_pages = await self.pages.distinct( + "url", {"crawl_id": {"$in": crawl_ids}} + ) return len(unique_pages) or 0 async def set_archived_item_page_snapshot_counts(self, crawl_id: str): """Store archived item page and snapshot counts in crawl document""" _, snapshot_count = await self.list_page_snapshots(crawl_id) - page_count = await self.get_unique_page_count(crawl_id) + page_count = await self.get_unique_page_count([crawl_id]) await self.crawls.find_one_and_update( {"_id": crawl_id}, From 94b7a2f6ac4e7dae4273645d44e3348f405e1e54 Mon Sep 17 00:00:00 2001 From: Tessa Walsh Date: Tue, 21 Jan 2025 16:16:06 -0500 Subject: [PATCH 10/22] Add pylint comment --- backend/btrixcloud/orgs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/btrixcloud/orgs.py b/backend/btrixcloud/orgs.py index 3417cdd036..3132e8c354 100644 --- a/backend/btrixcloud/orgs.py +++ b/backend/btrixcloud/orgs.py @@ -111,7 +111,7 @@ # ============================================================================ -# pylint: disable=too-many-public-methods, too-many-instance-attributes, too-many-locals +# pylint: disable=too-many-public-methods, too-many-instance-attributes, too-many-locals, too-many-arguments class OrgOps: """Organization API operations""" From 2ac44f4633a603d4bd77b3ebbe96558735bf74e2 Mon Sep 17 00:00:00 2001 From: Tessa Walsh Date: Tue, 21 Jan 2025 16:26:58 -0500 Subject: [PATCH 11/22] Update collections when archived items are deleted --- backend/btrixcloud/basecrawls.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/backend/btrixcloud/basecrawls.py b/backend/btrixcloud/basecrawls.py index 99b065975a..a50342388b 100644 --- a/backend/btrixcloud/basecrawls.py +++ b/backend/btrixcloud/basecrawls.py @@ -300,6 +300,7 @@ async def delete_crawls( ) -> tuple[int, dict[UUID, dict[str, int]], bool]: """Delete a list of crawls by id for given org""" cids_to_update: dict[UUID, dict[str, int]] = {} + collection_ids_to_update = set() size = 0 @@ -325,6 +326,10 @@ async def delete_crawls( await self.page_ops.delete_crawl_pages(crawl_id, org.id) + if crawl.collectionIds: + for coll_id in crawl.collectionIds: + collection_ids_to_update.add(coll_id) + if type_ == "crawl": await self.delete_all_crawl_qa_files(crawl_id, org) @@ -361,6 +366,10 @@ async def delete_crawls( await self.orgs.set_last_crawl_finished(org.id) + if collection_ids_to_update: + for coll_id in collection_ids_to_update: + self.colls.update_collection_counts_and_tags(coll_id) + quota_reached = self.orgs.storage_quota_reached(org) return res.deleted_count, cids_to_update, quota_reached From cad2301a73b319c423d1219670cdf5c67f8b15eb Mon Sep 17 00:00:00 2001 From: Tessa Walsh Date: Tue, 21 Jan 2025 16:36:40 -0500 Subject: [PATCH 12/22] Add pylint comment --- backend/btrixcloud/basecrawls.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/btrixcloud/basecrawls.py b/backend/btrixcloud/basecrawls.py index a50342388b..7f9cbf23bc 100644 --- a/backend/btrixcloud/basecrawls.py +++ b/backend/btrixcloud/basecrawls.py @@ -49,7 +49,7 @@ # ============================================================================ -# pylint: disable=too-many-instance-attributes, too-many-public-methods, too-many-lines +# pylint: disable=too-many-instance-attributes, too-many-public-methods, too-many-lines, too-many-branches class BaseCrawlOps: """operations that apply to all crawls""" From 4af8ea6a991a63300a7b8c4a3da5e31eb35e7287 Mon Sep 17 00:00:00 2001 From: Tessa Walsh Date: Tue, 21 Jan 2025 16:38:45 -0500 Subject: [PATCH 13/22] Add missing await --- backend/btrixcloud/basecrawls.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/btrixcloud/basecrawls.py b/backend/btrixcloud/basecrawls.py index 7f9cbf23bc..fb8a07cd9e 100644 --- a/backend/btrixcloud/basecrawls.py +++ b/backend/btrixcloud/basecrawls.py @@ -368,7 +368,7 @@ async def delete_crawls( if collection_ids_to_update: for coll_id in collection_ids_to_update: - self.colls.update_collection_counts_and_tags(coll_id) + await self.colls.update_collection_counts_and_tags(coll_id) quota_reached = self.orgs.storage_quota_reached(org) From 09387f5e11aee69f611541103f87875847e6f85f Mon Sep 17 00:00:00 2001 From: sua yoo Date: Tue, 21 Jan 2025 18:10:58 -0800 Subject: [PATCH 14/22] feat: Update collection sorting & metadata (#2324) - Fixes https://github.com/webrecorder/browsertrix/issues/2321 - Resolves https://github.com/webrecorder/browsertrix/issues/2323 Follows https://github.com/webrecorder/browsertrix/pull/2327, should be rebased and merged afterwards ## Changes - Refactors dashboard and org profile preview to use private API endpoint, to fix public collections not showing the org visibility is hidden - Enables sorting collections by `dateLatest`, sorts public collections by `dateLatest` by default - Enables sorting collections by page count - Shows collection period (i.e. `dateEarliest` to `dateLatest`) in collections list - Shows same collection metadata in private and public views, updates private view info bar - Fixes "Update Org Profile" action item showing for crawler roles --------- Co-authored-by: Tessa Walsh --- backend/btrixcloud/colls.py | 26 +++- .../collections/collection-metadata-dialog.ts | 5 +- .../src/layouts/collections/metadataColumn.ts | 57 ++++++++ frontend/src/pages/collections/collection.ts | 36 +----- .../org/archived-item-qa/archived-item-qa.ts | 4 +- frontend/src/pages/org/collection-detail.ts | 122 +++++++++--------- frontend/src/pages/org/collections-list.ts | 69 ++++++---- frontend/src/pages/org/dashboard.ts | 104 +++++++-------- frontend/src/pages/org/profile.ts | 34 ++++- frontend/src/strings/ui.ts | 3 + frontend/src/strings/utils.ts | 25 ++++ frontend/src/types/api.ts | 4 +- frontend/src/types/collection.ts | 6 +- frontend/src/types/utils.ts | 6 +- frontend/src/utils/pluralize.ts | 26 ++++ 15 files changed, 329 insertions(+), 198 deletions(-) create mode 100644 frontend/src/layouts/collections/metadataColumn.ts create mode 100644 frontend/src/strings/utils.ts diff --git a/backend/btrixcloud/colls.py b/backend/btrixcloud/colls.py index f71313d2af..09a0a6158c 100644 --- a/backend/btrixcloud/colls.py +++ b/backend/btrixcloud/colls.py @@ -396,7 +396,7 @@ async def list_collections( page = page - 1 skip = page * page_size - match_query: dict[str, object] = {"oid": org.id} + match_query: Dict[str, Union[str, UUID, int, object]] = {"oid": org.id} if name: match_query["name"] = name @@ -409,15 +409,33 @@ async def list_collections( elif access: match_query["access"] = access - aggregate = [{"$match": match_query}] + aggregate: List[Dict[str, Union[str, UUID, int, object]]] = [ + {"$match": match_query} + ] if sort_by: - if sort_by not in ("modified", "name", "description", "totalSize"): + if sort_by not in ( + "created", + "modified", + "dateLatest", + "name", + "crawlCount", + "pageCount", + "totalSize", + "description", + "caption", + ): raise HTTPException(status_code=400, detail="invalid_sort_by") if sort_direction not in (1, -1): raise HTTPException(status_code=400, detail="invalid_sort_direction") - aggregate.extend([{"$sort": {sort_by: sort_direction}}]) + sort_query = {sort_by: sort_direction} + + # add secondary sort keys: + if sort_by == "dateLatest": + sort_query["dateEarliest"] = sort_direction + + aggregate.extend([{"$sort": sort_query}]) aggregate.extend( [ diff --git a/frontend/src/features/collections/collection-metadata-dialog.ts b/frontend/src/features/collections/collection-metadata-dialog.ts index fcbb6b7da4..160ce1e468 100644 --- a/frontend/src/features/collections/collection-metadata-dialog.ts +++ b/frontend/src/features/collections/collection-metadata-dialog.ts @@ -135,12 +135,13 @@ export class CollectionMetadataDialog extends BtrixElement { ${msg( - "Write a short description that summarizes this collection. If the collection is public, this description will be visible next to the collection name.", + "Write a short description that summarizes this collection. If the collection is shareable, this will appear next to the collection name.", )} ${this.collection ? nothing : msg( - "You can write a longer description in the 'About' section after creating the collection.", + html`You can add a longer description in the “About” + section after creating the collection.`, )} TemplateResult | string; + }) { + return html` + + ${when( + collection, + render, + () => html``, + )} + + `; + }; +} + +export function metadataColumn(collection?: Collection | PublicCollection) { + const metadataItem = metadataItemWithCollection(collection); + + return html` + + ${metadataItem({ + label: msg("Collection Period"), + render: (col) => html` + + ${monthYearDateRange(col.dateEarliest, col.dateLatest)} + + `, + })} + ${metadataItem({ + label: msg("Pages in Collection"), + render: (col) => + `${localize.number(col.pageCount)} ${pluralOf("pages", col.pageCount)}`, + })} + ${metadataItem({ + label: msg("Total Page Snapshots"), + render: (col) => + `${localize.number(col.snapshotCount)} ${pluralOf("snapshots", col.snapshotCount)}`, + })} + + `; +} diff --git a/frontend/src/pages/collections/collection.ts b/frontend/src/pages/collections/collection.ts index b7d6b24314..5d51379e83 100644 --- a/frontend/src/pages/collections/collection.ts +++ b/frontend/src/pages/collections/collection.ts @@ -1,4 +1,4 @@ -import { localized, msg, str } from "@lit/localize"; +import { localized, msg } from "@lit/localize"; import { Task, TaskStatus } from "@lit/task"; import { html, type TemplateResult } from "lit"; import { customElement, property } from "lit/decorators.js"; @@ -6,6 +6,7 @@ import { ifDefined } from "lit/directives/if-defined.js"; import { when } from "lit/directives/when.js"; import { BtrixElement } from "@/classes/BtrixElement"; +import { metadataColumn } from "@/layouts/collections/metadataColumn"; import { page } from "@/layouts/page"; import { RouteNamespace } from "@/routes"; import type { PublicCollection } from "@/types/collection"; @@ -211,39 +212,8 @@ export class Collection extends BtrixElement { `; } - // TODO Consolidate with collection-detail.ts private renderAbout(collection: PublicCollection) { - const dateRange = () => { - if (!collection.dateEarliest || !collection.dateLatest) { - return msg("n/a"); - } - const format: Intl.DateTimeFormatOptions = { - month: "long", - year: "numeric", - }; - const dateEarliest = this.localize.date(collection.dateEarliest, format); - const dateLatest = this.localize.date(collection.dateLatest, format); - - if (dateEarliest === dateLatest) return dateLatest; - - return msg(str`${dateEarliest} to ${dateLatest}`, { - desc: "Date range formatted to show full month name and year", - }); - }; - - const metadata = html` - - - ${dateRange()} - - - ${this.localize.number(collection.pageCount)} - - - ${this.localize.bytes(collection.totalSize)} - - - `; + const metadata = metadataColumn(collection); if (collection.description) { return html` diff --git a/frontend/src/pages/org/archived-item-qa/archived-item-qa.ts b/frontend/src/pages/org/archived-item-qa/archived-item-qa.ts index 5d5ea682e3..a2192d309b 100644 --- a/frontend/src/pages/org/archived-item-qa/archived-item-qa.ts +++ b/frontend/src/pages/org/archived-item-qa/archived-item-qa.ts @@ -38,6 +38,7 @@ import type { } from "@/types/api"; import type { ArchivedItem, ArchivedItemPageComment } from "@/types/crawler"; import type { ArchivedItemQAPage, QARun } from "@/types/qa"; +import { SortDirection as APISortDirection } from "@/types/utils"; import { isActive, isSuccessfullyFinished, @@ -553,7 +554,8 @@ export class ArchivedItemQA extends BtrixElement { .pages=${this.pages} .orderBy=${{ field: this.sortPagesBy.sortBy, - direction: (this.sortPagesBy.sortDirection === -1 + direction: (this.sortPagesBy.sortDirection === + APISortDirection.Descending ? "desc" : "asc") as SortDirection, }} diff --git a/frontend/src/pages/org/collection-detail.ts b/frontend/src/pages/org/collection-detail.ts index 9073a47284..ce7ad24c87 100644 --- a/frontend/src/pages/org/collection-detail.ts +++ b/frontend/src/pages/org/collection-detail.ts @@ -14,13 +14,21 @@ import type { MarkdownEditor } from "@/components/ui/markdown-editor"; import type { PageChangeEvent } from "@/components/ui/pagination"; import { SelectCollectionAccess } from "@/features/collections/select-collection-access"; import type { ShareCollection } from "@/features/collections/share-collection"; +import { + metadataColumn, + metadataItemWithCollection, +} from "@/layouts/collections/metadataColumn"; import { pageHeader, pageNav, type Breadcrumb } from "@/layouts/pageHeader"; import type { APIPaginatedList, APIPaginationQuery, APISortQuery, } from "@/types/api"; -import { CollectionAccess, type Collection } from "@/types/collection"; +import { + CollectionAccess, + type Collection, + type PublicCollection, +} from "@/types/collection"; import type { ArchivedItem, Crawl, Upload } from "@/types/crawler"; import type { CrawlState } from "@/types/crawlState"; import { pluralOf } from "@/utils/pluralize"; @@ -43,7 +51,7 @@ export class CollectionDetail extends BtrixElement { collectionId!: string; @property({ type: String }) - collectionTab: Tab = Tab.Replay; + collectionTab: Tab | null = Tab.Replay; @state() private collection?: Collection; @@ -105,6 +113,9 @@ export class CollectionDetail extends BtrixElement { void this.fetchCollection(); void this.fetchArchivedItems({ page: 1 }); } + if (changedProperties.has("collectionTab") && this.collectionTab === null) { + this.collectionTab = Tab.Replay; + } } protected async updated( @@ -472,16 +483,6 @@ export class CollectionDetail extends BtrixElement { (col) => `${this.localize.number(col.crawlCount)} ${pluralOf("items", col.crawlCount)}`, )} - ${this.renderDetailItem(msg("Total Size"), (col) => - this.localize.bytes(col.totalSize || 0, { - unitDisplay: "narrow", - }), - )} - ${this.renderDetailItem( - msg("Total Pages"), - (col) => - `${this.localize.number(col.pageCount)} ${pluralOf("pages", col.pageCount)}`, - )} ${when(this.collection?.created, (created) => // Collections created before 49516bc4 is released may not have date in db created @@ -495,12 +496,13 @@ export class CollectionDetail extends BtrixElement { year="numeric" hour="numeric" minute="numeric" + time-zone-name="short" >`, ) : nothing, )} ${this.renderDetailItem( - msg("Last Updated"), + msg("Last Modified"), (col) => html``, )} @@ -517,67 +520,58 @@ export class CollectionDetail extends BtrixElement { private renderDetailItem( label: string | TemplateResult, - renderContent: (collection: Collection) => TemplateResult | string, + renderContent: (collection: PublicCollection) => TemplateResult | string, ) { - return html` - - ${when( - this.collection, - () => renderContent(this.collection!), - () => html``, - )} - - `; + return metadataItemWithCollection(this.collection)({ + label, + render: renderContent, + }); } - // TODO Consolidate with collection.ts private renderAbout() { - const dateRange = (collection: Collection) => { - if (!collection.dateEarliest || !collection.dateLatest) { - return msg("n/a"); - } - const format: Intl.DateTimeFormatOptions = { - month: "long", - year: "numeric", - }; - const dateEarliest = this.localize.date(collection.dateEarliest, format); - const dateLatest = this.localize.date(collection.dateLatest, format); - - if (dateEarliest === dateLatest) return dateLatest; - - return msg(str`${dateEarliest} to ${dateLatest}`, { - desc: "Date range formatted to show full month name and year", - }); - }; - const skeleton = html``; - - const metadata = html` - - - ${this.collection ? dateRange(this.collection) : skeleton} - - - `; + const metadata = metadataColumn(this.collection); return html`
-
-

- ${msg("Description")} -

+
+
+

+ ${msg("About This Collection")} +

+ +
+

+ ${msg( + html`Describe your collection in long-form rich text (e.g. + bold and italicized text.)`, + )} +

+

+ ${msg( + html`If this collection is shareable, this will appear in + the “About This Collection” section of the shared + collection.`, + )} +

+
+ +
+
${when( this.collection?.description && !this.isEditingDescription, () => html` - (this.isEditingDescription = true)} - > - - ${msg("Edit Description")} - + + (this.isEditingDescription = true)} + > + + `, )}
@@ -602,7 +596,7 @@ export class CollectionDetail extends BtrixElement { ` : html`
-

+

${msg("No description provided.")}

= { - modified: { - label: msg("Last Updated"), - defaultDirection: "desc", - }, name: { label: msg("Name"), - defaultDirection: "asc", + defaultDirection: SortDirection.Ascending, + }, + dateLatest: { + label: msg("Collection Period"), + defaultDirection: SortDirection.Descending, + }, + crawlCount: { + label: msg("Archived Items"), + defaultDirection: SortDirection.Descending, + }, + pageCount: { + label: msg("Pages"), + defaultDirection: SortDirection.Descending, }, totalSize: { label: msg("Size"), - defaultDirection: "desc", + defaultDirection: SortDirection.Descending, + }, + modified: { + label: msg("Last Modified"), + defaultDirection: SortDirection.Descending, }, }; const MIN_SEARCH_LENGTH = 2; @@ -269,7 +287,7 @@ export class CollectionsList extends BtrixElement { @click=${() => { this.orderBy = { ...this.orderBy, - direction: this.orderBy.direction === "asc" ? "desc" : "asc", + direction: -1 * this.orderBy.direction, }; }} > @@ -363,24 +381,22 @@ export class CollectionsList extends BtrixElement { return html` ${msg("Collection Access")} - ${msg("Name")} - - ${msg("Archived Items")} - - ${msg("Total Size")} + ${msg(html`Name & Collection Period`)} - ${msg("Total Pages")} + ${msg("Archived Items")} + ${msg("Pages")} + ${msg("Size")} - ${msg("Last Updated")} + ${msg("Last Modified")} ${msg("Row Actions")} @@ -514,30 +530,31 @@ export class CollectionsList extends BtrixElement { href=${`${this.navigate.orgBasePath}/collections/view/${col.id}`} @click=${this.navigate.link} > - ${col.name} +
${col.name}
+
+ ${monthYearDateRange(col.dateEarliest, col.dateLatest)} +
${this.localize.number(col.crawlCount, { notation: "compact" })} ${pluralOf("items", col.crawlCount)} + + ${this.localize.number(col.pageCount, { notation: "compact" })} + ${pluralOf("pages", col.pageCount)} + ${this.localize.bytes(col.totalSize || 0, { unitDisplay: "narrow", })} - - ${this.localize.number(col.pageCount, { notation: "compact" })} - ${pluralOf("pages", col.pageCount)} - @@ -783,7 +800,7 @@ export class CollectionsList extends BtrixElement { this.collections?.pageSize || INITIAL_PAGE_SIZE, sortBy: this.orderBy.field, - sortDirection: this.orderBy.direction === "desc" ? -1 : 1, + sortDirection: this.orderBy.direction, }, { arrayFormat: "comma", diff --git a/frontend/src/pages/org/dashboard.ts b/frontend/src/pages/org/dashboard.ts index 56d17f8b39..2d3b06f762 100644 --- a/frontend/src/pages/org/dashboard.ts +++ b/frontend/src/pages/org/dashboard.ts @@ -1,10 +1,11 @@ import { localized, msg } from "@lit/localize"; import { Task } from "@lit/task"; import type { SlSelectEvent } from "@shoelace-style/shoelace"; -import { html, type PropertyValues, type TemplateResult } from "lit"; +import { html, nothing, type PropertyValues, type TemplateResult } from "lit"; import { customElement, property, state } from "lit/decorators.js"; import { ifDefined } from "lit/directives/if-defined.js"; import { when } from "lit/directives/when.js"; +import queryString from "query-string"; import type { SelectNewDialogEvent } from "."; @@ -13,8 +14,9 @@ import { ClipboardController } from "@/controllers/clipboard"; import { pageHeading } from "@/layouts/page"; import { pageHeader } from "@/layouts/pageHeader"; import { RouteNamespace } from "@/routes"; -import type { PublicCollection } from "@/types/collection"; -import type { PublicOrgCollections } from "@/types/org"; +import type { APIPaginatedList, APISortQuery } from "@/types/api"; +import { CollectionAccess, type Collection } from "@/types/collection"; +import { SortDirection } from "@/types/utils"; import { humanizeExecutionSeconds } from "@/utils/executionTimeFormatter"; import { tw } from "@/utils/tailwind"; @@ -56,16 +58,13 @@ export class Dashboard extends BtrixElement { }; private readonly publicCollections = new Task(this, { - task: async ([slug, metrics]) => { - if (!slug) throw new Error("slug required"); + task: async ([orgId]) => { + if (!orgId) throw new Error("orgId required"); - if (!metrics) return undefined; - if (!metrics.publicCollectionsCount) return []; - - const collections = await this.fetchCollections({ slug }); + const collections = await this.getPublicCollections({ orgId }); return collections; }, - args: () => [this.orgSlugState, this.metrics] as const, + args: () => [this.orgId] as const, }); willUpdate(changedProperties: PropertyValues & Map) { @@ -334,15 +333,17 @@ export class Dashboard extends BtrixElement { ${msg("Copy Link to Profile")} ` - : html` - - - - ${msg("Update Org Visibility")} - - `, + : this.appState.isAdmin + ? html` + + + + ${msg("Update Org Profile")} + + ` + : nothing, )} @@ -368,29 +369,16 @@ export class Dashboard extends BtrixElement { let button: TemplateResult; if (this.metrics.collectionsCount) { - if (this.org.enablePublicProfile) { - button = html` - { - this.navigate.to(`${this.navigate.orgBasePath}/collections`); - }} - > - - ${msg("Manage Collections")} - - `; - } else { - button = html` - { - this.navigate.to(`${this.navigate.orgBasePath}/settings`); - }} - > - - ${msg("Update Org Visibility")} - - `; - } + button = html` + { + this.navigate.to(`${this.navigate.orgBasePath}/collections`); + }} + > + + ${msg("Manage Collections")} + + `; } else { button = html` { - const resp = await fetch(`/api/public/orgs/${slug}/collections`, { - headers: { "Content-Type": "application/json" }, - }); - - switch (resp.status) { - case 200: - return ((await resp.json()) as PublicOrgCollections).collections; - case 404: - return []; - default: - throw resp.status; - } + private async getPublicCollections({ orgId }: { orgId: string }) { + const params: APISortQuery & { + access: CollectionAccess; + } = { + sortBy: "dateLatest", + sortDirection: SortDirection.Descending, + access: CollectionAccess.Public, + }; + const query = queryString.stringify(params); + + const data = await this.api.fetch>( + `/orgs/${orgId}/collections?${query}`, + ); + + return data.items; } } diff --git a/frontend/src/pages/org/profile.ts b/frontend/src/pages/org/profile.ts index fdcbb73a10..7dc9936fb0 100644 --- a/frontend/src/pages/org/profile.ts +++ b/frontend/src/pages/org/profile.ts @@ -3,10 +3,14 @@ import { Task } from "@lit/task"; import { html, nothing } from "lit"; import { customElement, property, state } from "lit/decorators.js"; import { when } from "lit/directives/when.js"; +import queryString from "query-string"; import { BtrixElement } from "@/classes/BtrixElement"; import { page, pageHeading } from "@/layouts/page"; +import type { APIPaginatedList, APISortQuery } from "@/types/api"; +import { CollectionAccess, type Collection } from "@/types/collection"; import type { OrgData, PublicOrgCollections } from "@/types/org"; +import { SortDirection } from "@/types/utils"; @localized() @customElement("btrix-org-profile") @@ -242,7 +246,13 @@ export class OrgProfile extends BtrixElement { }: { slug: string; }): Promise { - const resp = await fetch(`/api/public/orgs/${slug}/collections`, { + const params: APISortQuery = { + sortBy: "dateLatest", + sortDirection: SortDirection.Descending, + }; + const query = queryString.stringify(params); + + const resp = await fetch(`/api/public/orgs/${slug}/collections?${query}`, { headers: { "Content-Type": "application/json" }, }); @@ -277,6 +287,9 @@ export class OrgProfile extends BtrixElement { } const org = await this.api.fetch(`/orgs/${userOrg.id}`); + const collections = await this.getUserPublicCollections({ + orgId: this.orgId, + }); return { org: { @@ -285,10 +298,27 @@ export class OrgProfile extends BtrixElement { url: org.publicUrl || "", verified: false, // TODO }, - collections: [], // TODO + collections, }; } catch { return null; } } + + private async getUserPublicCollections({ orgId }: { orgId: string }) { + const params: APISortQuery & { + access: CollectionAccess; + } = { + sortBy: "dateLatest", + sortDirection: SortDirection.Descending, + access: CollectionAccess.Public, + }; + const query = queryString.stringify(params); + + const data = await this.api.fetch>( + `/orgs/${orgId}/collections?${query}`, + ); + + return data.items; + } } diff --git a/frontend/src/strings/ui.ts b/frontend/src/strings/ui.ts index 95c560639a..5a426397b6 100644 --- a/frontend/src/strings/ui.ts +++ b/frontend/src/strings/ui.ts @@ -1,6 +1,9 @@ import { msg } from "@lit/localize"; import { html, type TemplateResult } from "lit"; +export const noData = "--"; +export const notApplicable = msg("n/a"); + // TODO Refactor all generic confirmation messages to use utility export const deleteConfirmation = (name: string | TemplateResult) => msg(html` diff --git a/frontend/src/strings/utils.ts b/frontend/src/strings/utils.ts new file mode 100644 index 0000000000..46ad5e7a5c --- /dev/null +++ b/frontend/src/strings/utils.ts @@ -0,0 +1,25 @@ +import { msg, str } from "@lit/localize"; + +import { noData } from "@/strings/ui"; +import localize from "@/utils/localize"; + +export const monthYearDateRange = ( + startDate?: string | null, + endDate?: string | null, +): string => { + if (!startDate || !endDate) { + return noData; + } + const format: Intl.DateTimeFormatOptions = { + month: "long", + year: "numeric", + }; + const startMonthYear = localize.date(startDate, format); + const endMonthYear = localize.date(endDate, format); + + if (startMonthYear === endMonthYear) return endMonthYear; + + return msg(str`${startMonthYear} to ${endMonthYear}`, { + desc: "Date range formatted to show full month name and year", + }); +}; diff --git a/frontend/src/types/api.ts b/frontend/src/types/api.ts index b016944aad..173d406562 100644 --- a/frontend/src/types/api.ts +++ b/frontend/src/types/api.ts @@ -30,7 +30,7 @@ export type APIPaginationQuery = { pageSize?: number; }; -export type APISortQuery = { - sortBy?: string; +export type APISortQuery> = { + sortBy?: keyof T; sortDirection?: SortDirection; }; diff --git a/frontend/src/types/collection.ts b/frontend/src/types/collection.ts index 1c0887bd52..f3d6899a57 100644 --- a/frontend/src/types/collection.ts +++ b/frontend/src/types/collection.ts @@ -11,6 +11,8 @@ export const publicCollectionSchema = z.object({ slug: z.string(), oid: z.string(), name: z.string(), + created: z.string().datetime(), + modified: z.string().datetime(), caption: z.string().nullable(), description: z.string().nullable(), resources: z.array(z.string()), @@ -25,6 +27,7 @@ export const publicCollectionSchema = z.object({ defaultThumbnailName: z.string().nullable(), crawlCount: z.number(), pageCount: z.number(), + snapshotCount: z.number(), totalSize: z.number(), allowPublicDownload: z.boolean(), homeUrl: z.string().url().nullable(), @@ -34,9 +37,6 @@ export const publicCollectionSchema = z.object({ export type PublicCollection = z.infer; export const collectionSchema = publicCollectionSchema.extend({ - id: z.string(), - created: z.string().datetime(), - modified: z.string().datetime(), tags: z.array(z.string()), access: z.nativeEnum(CollectionAccess), }); diff --git a/frontend/src/types/utils.ts b/frontend/src/types/utils.ts index d65cc73589..3d15b69317 100644 --- a/frontend/src/types/utils.ts +++ b/frontend/src/types/utils.ts @@ -22,5 +22,7 @@ export type Range = Exclude< Enumerate >; -/** 1 or -1, but will accept any number for easier typing where this is used **/ -export type SortDirection = -1 | 1 | (number & {}); +export enum SortDirection { + Descending = -1, + Ascending = 1, +} diff --git a/frontend/src/utils/pluralize.ts b/frontend/src/utils/pluralize.ts index ebbb592e87..29192e8e4a 100644 --- a/frontend/src/utils/pluralize.ts +++ b/frontend/src/utils/pluralize.ts @@ -91,6 +91,32 @@ const plurals = { id: "pages.plural.other", }), }, + snapshots: { + zero: msg("snapshots", { + desc: 'plural form of "snapshot" for zero snapshots', + id: "snapshots.plural.zero", + }), + one: msg("snapshot", { + desc: 'singular form for "snapshot"', + id: "snapshots.plural.one", + }), + two: msg("snapshots", { + desc: 'plural form of "snapshot" for two snapshots', + id: "snapshots.plural.two", + }), + few: msg("snapshots", { + desc: 'plural form of "snapshot" for few snapshots', + id: "snapshots.plural.few", + }), + many: msg("snapshots", { + desc: 'plural form of "snapshot" for many snapshots', + id: "snapshots.plural.many", + }), + other: msg("snapshots", { + desc: 'plural form of "snapshot" for multiple/other snapshots', + id: "snapshots.plural.other", + }), + }, comments: { zero: msg("comments", { desc: 'plural form of "comment" for zero comments', From 1c5018154559d64a6359100e3dfee7a30083d9cc Mon Sep 17 00:00:00 2001 From: Tessa Walsh Date: Wed, 22 Jan 2025 10:51:50 -0500 Subject: [PATCH 15/22] Change backend terminology: use pageCount and uniquePageCount --- backend/btrixcloud/colls.py | 10 +++--- .../migrations/migration_0037_upload_pages.py | 2 +- ...migration_0040_archived_item_page_count.py | 4 +-- .../migration_0041_pages_snapshots.py | 11 ++++--- backend/btrixcloud/models.py | 16 +++++----- backend/btrixcloud/operator/crawls.py | 2 +- backend/btrixcloud/orgs.py | 28 ++++++++-------- backend/btrixcloud/pages.py | 20 ++++++------ backend/test/test_collections.py | 32 +++++++++---------- backend/test/test_org.py | 2 +- backend/test/test_run_crawl.py | 4 +-- backend/test/test_uploads.py | 4 +-- 12 files changed, 70 insertions(+), 65 deletions(-) diff --git a/backend/btrixcloud/colls.py b/backend/btrixcloud/colls.py index 09a0a6158c..3553884160 100644 --- a/backend/btrixcloud/colls.py +++ b/backend/btrixcloud/colls.py @@ -584,7 +584,7 @@ async def update_collection_counts_and_tags(self, collection_id: UUID): """Set current crawl info in config when crawl begins""" # pylint: disable=too-many-locals crawl_count = 0 - snapshot_count = 0 + page_count = 0 total_size = 0 tags = [] @@ -603,10 +603,10 @@ async def update_collection_counts_and_tags(self, collection_id: UUID): total_size += file.size try: - _, crawl_snapshots = await self.page_ops.list_page_snapshots( + _, crawl_page_count = await self.page_ops.list_pages( crawl.id, org, page_size=1_000_000 ) - snapshot_count += crawl_snapshots + page_count += crawl_page_count # pylint: disable=broad-exception-caught except Exception: pass @@ -618,7 +618,7 @@ async def update_collection_counts_and_tags(self, collection_id: UUID): sorted_tags = [tag for tag, count in Counter(tags).most_common()] - page_count = await self.page_ops.get_unique_page_count(crawl_ids) + unique_page_count = await self.page_ops.get_unique_page_count(crawl_ids) await self.collections.find_one_and_update( {"_id": collection_id}, @@ -626,7 +626,7 @@ async def update_collection_counts_and_tags(self, collection_id: UUID): "$set": { "crawlCount": crawl_count, "pageCount": page_count, - "snapshotCount": snapshot_count, + "uniquePageCount": unique_page_count, "totalSize": total_size, "tags": sorted_tags, } diff --git a/backend/btrixcloud/migrations/migration_0037_upload_pages.py b/backend/btrixcloud/migrations/migration_0037_upload_pages.py index cc0f056985..62bfe98237 100644 --- a/backend/btrixcloud/migrations/migration_0037_upload_pages.py +++ b/backend/btrixcloud/migrations/migration_0037_upload_pages.py @@ -32,7 +32,7 @@ async def org_upload_pages_already_added(self, oid: UUID) -> bool: mdb_crawls = self.mdb["crawls"] async for upload in mdb_crawls.find({"oid": oid, "type": "upload"}): upload_id = upload["_id"] - _, total = await self.page_ops.list_page_snapshots(upload_id) + _, total = await self.page_ops.list_pages(upload_id) if total > 0: return True return False diff --git a/backend/btrixcloud/migrations/migration_0040_archived_item_page_count.py b/backend/btrixcloud/migrations/migration_0040_archived_item_page_count.py index f57f77d579..5ed031b700 100644 --- a/backend/btrixcloud/migrations/migration_0040_archived_item_page_count.py +++ b/backend/btrixcloud/migrations/migration_0040_archived_item_page_count.py @@ -34,10 +34,10 @@ async def migrate_up(self): async for crawl_raw in crawls_mdb.find({}): crawl_id = crawl_raw["_id"] try: - await self.page_ops.set_archived_item_page_snapshot_counts(crawl_id) + await self.page_ops.set_archived_item_page_counts(crawl_id) # pylint: disable=broad-exception-caught except Exception as err: print( - f"Error saving page/snapshot counts for archived item {crawl_id}: {err}", + f"Error saving page counts for archived item {crawl_id}: {err}", flush=True, ) diff --git a/backend/btrixcloud/migrations/migration_0041_pages_snapshots.py b/backend/btrixcloud/migrations/migration_0041_pages_snapshots.py index e024f3fd90..745777cce1 100644 --- a/backend/btrixcloud/migrations/migration_0041_pages_snapshots.py +++ b/backend/btrixcloud/migrations/migration_0041_pages_snapshots.py @@ -1,5 +1,5 @@ """ -Migration 0041 - Rationalize page and snapshot counts +Migration 0041 - Rationalize page counts """ from btrixcloud.migrations import BaseMigration @@ -20,13 +20,13 @@ def __init__(self, mdb, **kwargs): async def migrate_up(self): """Perform migration up. - Recalculate collections to get new page and snapshot counts + Recalculate collections to get new page and unique page counts """ colls_mdb = self.mdb["collections"] if self.coll_ops is None: print( - "Unable to set collection page and snapshot counts, missing coll_ops", + "Unable to set collection page counts, missing coll_ops", flush=True, ) return @@ -37,4 +37,7 @@ async def migrate_up(self): await self.coll_ops.update_collection_counts_and_tags(coll_id) # pylint: disable=broad-exception-caught except Exception as err: - print(f"Unable to update collection {coll_id}: {err}", flush=True) + print( + f"Unable to update page counts for collection {coll_id}: {err}", + flush=True, + ) diff --git a/backend/btrixcloud/models.py b/backend/btrixcloud/models.py index 822d83f4ec..e0e57f200a 100644 --- a/backend/btrixcloud/models.py +++ b/backend/btrixcloud/models.py @@ -798,7 +798,7 @@ class BaseCrawl(CoreCrawlable, BaseMongoModel): reviewStatus: ReviewStatus = None pageCount: Optional[int] = 0 - snapshotCount: Optional[int] = 0 + uniquePageCount: Optional[int] = 0 filePageCount: Optional[int] = 0 errorPageCount: Optional[int] = 0 @@ -876,7 +876,7 @@ class CrawlOut(BaseMongoModel): lastQAStarted: Optional[datetime] = None pageCount: Optional[int] = 0 - snapshotCount: Optional[int] = 0 + uniquePageCount: Optional[int] = 0 filePageCount: Optional[int] = 0 errorPageCount: Optional[int] = 0 @@ -1252,7 +1252,7 @@ class Collection(BaseMongoModel): crawlCount: Optional[int] = 0 pageCount: Optional[int] = 0 - snapshotCount: Optional[int] = 0 + uniquePageCount: Optional[int] = 0 totalSize: Optional[int] = 0 dateEarliest: Optional[datetime] = None @@ -1306,7 +1306,7 @@ class CollOut(BaseMongoModel): crawlCount: Optional[int] = 0 pageCount: Optional[int] = 0 - snapshotCount: Optional[int] = 0 + uniquePageCount: Optional[int] = 0 totalSize: Optional[int] = 0 dateEarliest: Optional[datetime] = None @@ -1343,7 +1343,7 @@ class PublicCollOut(BaseMongoModel): crawlCount: Optional[int] = 0 pageCount: Optional[int] = 0 - snapshotCount: Optional[int] = 0 + uniquePageCount: Optional[int] = 0 totalSize: Optional[int] = 0 dateEarliest: Optional[datetime] = None @@ -1924,9 +1924,9 @@ class OrgMetrics(BaseModel): pageCount: int crawlPageCount: int uploadPageCount: int - snapshotCount: int - crawlSnapshotCount: int - uploadSnapshotCount: int + uniquePageCount: int + crawlUniquePageCount: int + uploadUniquePageCount: int profileCount: int workflowsRunningCount: int maxConcurrentCrawls: int diff --git a/backend/btrixcloud/operator/crawls.py b/backend/btrixcloud/operator/crawls.py index 7485b36ca9..86c326b7a8 100644 --- a/backend/btrixcloud/operator/crawls.py +++ b/backend/btrixcloud/operator/crawls.py @@ -1534,7 +1534,7 @@ async def do_crawl_finished_tasks( ) if state in SUCCESSFUL_STATES and crawl.oid: - await self.page_ops.set_archived_item_page_snapshot_counts(crawl.id) + await self.page_ops.set_archived_item_page_counts(crawl.id) await self.org_ops.inc_org_bytes_stored( crawl.oid, status.filesAddedSize, "crawl" ) diff --git a/backend/btrixcloud/orgs.py b/backend/btrixcloud/orgs.py index 3132e8c354..7c9f7558c0 100644 --- a/backend/btrixcloud/orgs.py +++ b/backend/btrixcloud/orgs.py @@ -943,9 +943,9 @@ async def get_org_metrics(self, org: Organization) -> dict[str, int]: crawl_count = 0 upload_count = 0 - snapshot_count = 0 - crawl_snapshot_count = 0 - upload_snapshot_count = 0 + page_count = 0 + crawl_page_count = 0 + upload_page_count = 0 crawl_ids = [] upload_ids = [] @@ -957,20 +957,22 @@ async def get_org_metrics(self, org: Organization) -> dict[str, int]: archived_item_count += 1 if item.type == "crawl": crawl_count += 1 - crawl_snapshot_count += item.snapshotCount or 0 + crawl_page_count += item.pageCount or 0 crawl_ids.append(item.id) if item.type == "upload": upload_count += 1 - upload_snapshot_count += item.snapshotCount or 0 + upload_page_count += item.pageCount or 0 upload_ids.append(item.id) - if item.snapshotCount: - snapshot_count += item.snapshotCount + if item.pageCount: + page_count += item.pageCount all_archived_item_ids = crawl_ids + upload_ids - page_count = await self.page_ops.get_unique_page_count(all_archived_item_ids) - crawl_page_count = await self.page_ops.get_unique_page_count(crawl_ids) - upload_page_count = await self.page_ops.get_unique_page_count(upload_ids) + unique_page_count = await self.page_ops.get_unique_page_count( + all_archived_item_ids + ) + crawl_unique_page_count = await self.page_ops.get_unique_page_count(crawl_ids) + upload_unique_page_count = await self.page_ops.get_unique_page_count(upload_ids) profile_count = await self.profiles_db.count_documents({"oid": org.id}) workflows_running_count = await self.crawls_db.count_documents( @@ -996,9 +998,9 @@ async def get_org_metrics(self, org: Organization) -> dict[str, int]: "pageCount": page_count, "crawlPageCount": crawl_page_count, "uploadPageCount": upload_page_count, - "snapshotCount": snapshot_count, - "crawlSnapshotCount": crawl_snapshot_count, - "uploadSnapshotCount": upload_snapshot_count, + "uniquePageCount": unique_page_count, + "crawlUniquePageCount": crawl_unique_page_count, + "uploadUniquePageCount": upload_unique_page_count, "profileCount": profile_count, "workflowsRunningCount": workflows_running_count, "maxConcurrentCrawls": max_concurrent_crawls, diff --git a/backend/btrixcloud/pages.py b/backend/btrixcloud/pages.py index 3a664d82a2..4149a3e9d3 100644 --- a/backend/btrixcloud/pages.py +++ b/backend/btrixcloud/pages.py @@ -92,7 +92,7 @@ async def add_crawl_pages_to_db_from_wacz(self, crawl_id: str, batch_size=100): if pages_buffer: await self._add_pages_to_db(crawl_id, pages_buffer) - await self.set_archived_item_page_snapshot_counts(crawl_id) + await self.set_archived_item_page_counts(crawl_id) print(f"Added pages for crawl {crawl_id} to db", flush=True) # pylint: disable=broad-exception-caught, raise-missing-from @@ -435,7 +435,7 @@ async def delete_page_notes( return {"deleted": True} - async def list_page_snapshots( + async def list_pages( self, crawl_id: str, org: Optional[Organization] = None, @@ -453,7 +453,7 @@ async def list_page_snapshots( sort_by: Optional[str] = None, sort_direction: Optional[int] = -1, ) -> Tuple[Union[List[PageOut], List[PageOutWithSingleQA]], int]: - """List all page snapshots in crawl""" + """List all pages in crawl""" # pylint: disable=duplicate-code, too-many-locals, too-many-branches, too-many-statements # Zero-index page for query page = page - 1 @@ -670,15 +670,15 @@ async def get_unique_page_count(self, crawl_ids: List[str]) -> int: ) return len(unique_pages) or 0 - async def set_archived_item_page_snapshot_counts(self, crawl_id: str): - """Store archived item page and snapshot counts in crawl document""" - _, snapshot_count = await self.list_page_snapshots(crawl_id) + async def set_archived_item_page_counts(self, crawl_id: str): + """Store archived item page and unique page counts in crawl document""" + _, page_count = await self.list_pages(crawl_id) - page_count = await self.get_unique_page_count([crawl_id]) + unique_page_count = await self.get_unique_page_count([crawl_id]) await self.crawls.find_one_and_update( {"_id": crawl_id}, - {"$set": {"snapshotCount": snapshot_count, "pageCount": page_count}}, + {"$set": {"uniquePageCount": unique_page_count, "pageCount": page_count}}, ) @@ -876,7 +876,7 @@ async def get_pages_list( if approved: formatted_approved = str_list_to_bools(approved.split(",")) - pages, total = await ops.list_page_snapshots( + pages, total = await ops.list_pages( crawl_id=crawl_id, org=org, reviewed=reviewed, @@ -916,7 +916,7 @@ async def get_pages_list_with_qa( if approved: formatted_approved = str_list_to_bools(approved.split(",")) - pages, total = await ops.list_page_snapshots( + pages, total = await ops.list_pages( crawl_id=crawl_id, org=org, qa_run_id=qa_run_id, diff --git a/backend/test/test_collections.py b/backend/test/test_collections.py index 290141d623..e219134b95 100644 --- a/backend/test/test_collections.py +++ b/backend/test/test_collections.py @@ -82,7 +82,7 @@ def test_create_collection( assert data["caption"] == CAPTION assert data["crawlCount"] == 1 assert data["pageCount"] > 0 - assert data["snapshotCount"] > 0 + assert data["uniquePageCount"] > 0 assert data["totalSize"] > 0 modified = data["modified"] assert modified @@ -182,7 +182,7 @@ def test_update_collection( assert data["caption"] == UPDATED_CAPTION assert data["crawlCount"] == 1 assert data["pageCount"] > 0 - assert data["snapshotCount"] > 0 + assert data["uniquePageCount"] > 0 assert data["totalSize"] > 0 global modified modified = data["modified"] @@ -272,7 +272,7 @@ def test_add_remove_crawl_from_collection( assert data["id"] == _coll_id assert data["crawlCount"] == 2 assert data["pageCount"] > 0 - assert data["snapshotCount"] > 0 + assert data["uniquePageCount"] > 0 assert data["totalSize"] > 0 assert data["modified"] >= modified assert data["tags"] == ["wr-test-2", "wr-test-1"] @@ -297,7 +297,7 @@ def test_add_remove_crawl_from_collection( assert data["id"] == _coll_id assert data["crawlCount"] == 0 assert data["pageCount"] == 0 - assert data["snapshotCount"] == 0 + assert data["uniquePageCount"] == 0 assert data["totalSize"] == 0 assert data["modified"] >= modified assert data.get("tags", []) == [] @@ -328,7 +328,7 @@ def test_add_remove_crawl_from_collection( assert data["id"] == _coll_id assert data["crawlCount"] == 2 assert data["pageCount"] > 0 - assert data["snapshotCount"] > 0 + assert data["uniquePageCount"] > 0 assert data["totalSize"] > 0 assert data["modified"] >= modified assert data["tags"] == ["wr-test-2", "wr-test-1"] @@ -351,7 +351,7 @@ def test_get_collection(crawler_auth_headers, default_org_id): assert data["caption"] == UPDATED_CAPTION assert data["crawlCount"] == 2 assert data["pageCount"] > 0 - assert data["snapshotCount"] > 0 + assert data["uniquePageCount"] > 0 assert data["totalSize"] > 0 assert data["modified"] >= modified assert data["tags"] == ["wr-test-2", "wr-test-1"] @@ -375,7 +375,7 @@ def test_get_collection_replay(crawler_auth_headers, default_org_id): assert data["caption"] == UPDATED_CAPTION assert data["crawlCount"] == 2 assert data["pageCount"] > 0 - assert data["snapshotCount"] > 0 + assert data["uniquePageCount"] > 0 assert data["totalSize"] > 0 assert data["modified"] >= modified assert data["tags"] == ["wr-test-2", "wr-test-1"] @@ -495,7 +495,7 @@ def test_add_upload_to_collection(crawler_auth_headers, default_org_id): assert data["id"] == _coll_id assert data["crawlCount"] == 3 assert data["pageCount"] > 0 - assert data["snapshotCount"] > 0 + assert data["uniquePageCount"] > 0 assert data["totalSize"] > 0 assert data["modified"] assert data["tags"] == ["wr-test-2", "wr-test-1"] @@ -556,7 +556,7 @@ def test_list_collections( assert first_coll["caption"] == UPDATED_CAPTION assert first_coll["crawlCount"] == 3 assert first_coll["pageCount"] > 0 - assert first_coll["snapshotCount"] > 0 + assert first_coll["uniquePageCount"] > 0 assert first_coll["totalSize"] > 0 assert first_coll["modified"] assert first_coll["tags"] == ["wr-test-2", "wr-test-1"] @@ -573,7 +573,7 @@ def test_list_collections( assert second_coll.get("description") is None assert second_coll["crawlCount"] == 1 assert second_coll["pageCount"] > 0 - assert second_coll["snapshotCount"] > 0 + assert second_coll["uniquePageCount"] > 0 assert second_coll["totalSize"] > 0 assert second_coll["modified"] assert second_coll["tags"] == ["wr-test-2"] @@ -594,7 +594,7 @@ def test_remove_upload_from_collection(crawler_auth_headers, default_org_id): assert data["id"] == _coll_id assert data["crawlCount"] == 2 assert data["pageCount"] > 0 - assert data["snapshotCount"] > 0 + assert data["uniquePageCount"] > 0 assert data["totalSize"] > 0 assert data["modified"] >= modified assert data.get("tags") == ["wr-test-2", "wr-test-1"] @@ -925,7 +925,7 @@ def test_list_public_collections( assert collection["dateLatest"] assert collection["crawlCount"] > 0 assert collection["pageCount"] > 0 - assert collection["snapshotCount"] > 0 + assert collection["uniquePageCount"] > 0 assert collection["totalSize"] > 0 # Test non-existing slug - it should return a 404 but not reveal @@ -1106,7 +1106,7 @@ def test_list_public_colls_home_url_thumbnail(): assert coll["dateLatest"] assert coll["crawlCount"] > 0 assert coll["pageCount"] > 0 - assert coll["snapshotCount"] > 0 + assert coll["uniquePageCount"] > 0 assert coll["totalSize"] > 0 for field in non_public_fields: @@ -1157,7 +1157,7 @@ def test_get_public_collection(default_org_id): assert coll["dateLatest"] assert coll["crawlCount"] > 0 assert coll["pageCount"] > 0 - assert coll["snapshotCount"] > 0 + assert coll["uniquePageCount"] > 0 assert coll["totalSize"] > 0 for field in NON_PUBLIC_COLL_FIELDS: @@ -1237,7 +1237,7 @@ def test_get_public_collection_unlisted(crawler_auth_headers, default_org_id): assert coll["dateLatest"] assert coll["crawlCount"] > 0 assert coll["pageCount"] > 0 - assert coll["snapshotCount"] > 0 + assert coll["uniquePageCount"] > 0 assert coll["totalSize"] > 0 assert coll["defaultThumbnailName"] == "orange-default.avif" assert coll["allowPublicDownload"] @@ -1279,7 +1279,7 @@ def test_get_public_collection_unlisted_org_profile_disabled( assert coll["dateLatest"] assert coll["crawlCount"] > 0 assert coll["pageCount"] > 0 - assert coll["snapshotCount"] > 0 + assert coll["uniquePageCount"] > 0 assert coll["totalSize"] > 0 assert coll["defaultThumbnailName"] == "orange-default.avif" assert coll["allowPublicDownload"] diff --git a/backend/test/test_org.py b/backend/test/test_org.py index e1c9715a6d..665f556b8c 100644 --- a/backend/test/test_org.py +++ b/backend/test/test_org.py @@ -569,7 +569,7 @@ def test_org_metrics(crawler_auth_headers, default_org_id): assert data["uploadCount"] >= 0 assert data["archivedItemCount"] == data["crawlCount"] + data["uploadCount"] assert data["pageCount"] > 0 - assert data["snapshotCount"] > 0 + assert data["uniquePageCount"] > 0 assert data["profileCount"] >= 0 assert data["workflowsRunningCount"] >= 0 assert data["workflowsQueuedCount"] >= 0 diff --git a/backend/test/test_run_crawl.py b/backend/test/test_run_crawl.py index 98b062fc62..20318a1638 100644 --- a/backend/test/test_run_crawl.py +++ b/backend/test/test_run_crawl.py @@ -877,7 +877,7 @@ def test_re_add_crawl_pages(crawler_auth_headers, default_org_id, crawler_crawl_ ) assert r.status_code == 403 - # Check that pageCount and snapshotCount were stored on crawl + # Check that pageCount and uniquePageCount were stored on crawl r = requests.get( f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawler_crawl_id}", headers=crawler_auth_headers, @@ -885,7 +885,7 @@ def test_re_add_crawl_pages(crawler_auth_headers, default_org_id, crawler_crawl_ assert r.status_code == 200 data = r.json() assert data["pageCount"] > 0 - assert data["snapshotCount"] > 0 + assert data["uniquePageCount"] > 0 def test_crawl_page_notes(crawler_auth_headers, default_org_id, crawler_crawl_id): diff --git a/backend/test/test_uploads.py b/backend/test/test_uploads.py index 0a8473f6c3..ab3e816561 100644 --- a/backend/test/test_uploads.py +++ b/backend/test/test_uploads.py @@ -274,7 +274,7 @@ def test_get_upload_pages(admin_auth_headers, default_org_id, upload_id): assert page.get("modified") is None assert page.get("approved") is None - # Check that pageCount and snapshotCount stored on upload + # Check that pageCount and uniquePageCount stored on upload r = requests.get( f"{API_PREFIX}/orgs/{default_org_id}/uploads/{upload_id}", headers=admin_auth_headers, @@ -282,7 +282,7 @@ def test_get_upload_pages(admin_auth_headers, default_org_id, upload_id): assert r.status_code == 200 data = r.json() assert data["pageCount"] > 0 - assert data["snapshotCount"] > 0 + assert data["uniquePageCount"] > 0 def test_replace_upload( From cc8cac2cfb9256cdc548efada695741d7d8abe02 Mon Sep 17 00:00:00 2001 From: sua yoo Date: Wed, 22 Jan 2025 13:24:54 -0800 Subject: [PATCH 16/22] update field --- frontend/src/layouts/collections/metadataColumn.ts | 8 ++++---- frontend/src/pages/org/collections-list.ts | 2 +- frontend/src/types/collection.ts | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/frontend/src/layouts/collections/metadataColumn.ts b/frontend/src/layouts/collections/metadataColumn.ts index 82eb53d6a5..76a01ff2e9 100644 --- a/frontend/src/layouts/collections/metadataColumn.ts +++ b/frontend/src/layouts/collections/metadataColumn.ts @@ -43,14 +43,14 @@ export function metadataColumn(collection?: Collection | PublicCollection) { `, })} ${metadataItem({ - label: msg("Pages in Collection"), + label: msg("Unique Pages in Collection"), render: (col) => - `${localize.number(col.pageCount)} ${pluralOf("pages", col.pageCount)}`, + `${localize.number(col.uniquePageCount)} ${pluralOf("pages", col.uniquePageCount)}`, })} ${metadataItem({ - label: msg("Total Page Snapshots"), + label: msg("Total Pages Crawled"), render: (col) => - `${localize.number(col.snapshotCount)} ${pluralOf("snapshots", col.snapshotCount)}`, + `${localize.number(col.pageCount)} ${pluralOf("pages", col.pageCount)}`, })} `; diff --git a/frontend/src/pages/org/collections-list.ts b/frontend/src/pages/org/collections-list.ts index c4bd0ac00c..0c9b6e110a 100644 --- a/frontend/src/pages/org/collections-list.ts +++ b/frontend/src/pages/org/collections-list.ts @@ -542,7 +542,7 @@ export class CollectionsList extends BtrixElement { ${this.localize.number(col.pageCount, { notation: "compact" })} - ${pluralOf("pages", col.pageCount)} + ${pluralOf("pages", col.uniquePageCount)} ${this.localize.bytes(col.totalSize || 0, { diff --git a/frontend/src/types/collection.ts b/frontend/src/types/collection.ts index f3d6899a57..4ec6f8fe6c 100644 --- a/frontend/src/types/collection.ts +++ b/frontend/src/types/collection.ts @@ -26,8 +26,8 @@ export const publicCollectionSchema = z.object({ .nullable(), defaultThumbnailName: z.string().nullable(), crawlCount: z.number(), + uniquePageCount: z.number(), pageCount: z.number(), - snapshotCount: z.number(), totalSize: z.number(), allowPublicDownload: z.boolean(), homeUrl: z.string().url().nullable(), From cdd0d2284d9692d52c40460ef887da0d5b0cefea Mon Sep 17 00:00:00 2001 From: sua yoo Date: Wed, 22 Jan 2025 13:27:15 -0800 Subject: [PATCH 17/22] replace instances of snapshot --- .../select-collection-start-page.ts | 6 ++--- frontend/src/utils/pluralize.ts | 26 ------------------- 2 files changed, 3 insertions(+), 29 deletions(-) diff --git a/frontend/src/features/collections/select-collection-start-page.ts b/frontend/src/features/collections/select-collection-start-page.ts index 2515fcf6af..f6bd33d290 100644 --- a/frontend/src/features/collections/select-collection-start-page.ts +++ b/frontend/src/features/collections/select-collection-start-page.ts @@ -143,10 +143,10 @@ export class SelectCollectionStartPage extends BtrixElement {
${this.renderPageSearch()} Date: Wed, 22 Jan 2025 14:42:12 -0800 Subject: [PATCH 18/22] update total pages --- .../src/features/collections/select-collection-access.ts | 3 ++- frontend/src/pages/org/collection-detail.ts | 5 +++++ frontend/src/pages/org/collections-list.ts | 8 +++++--- frontend/src/types/crawler.ts | 1 + 4 files changed, 13 insertions(+), 4 deletions(-) diff --git a/frontend/src/features/collections/select-collection-access.ts b/frontend/src/features/collections/select-collection-access.ts index ce12d49292..e4f2ce45b7 100644 --- a/frontend/src/features/collections/select-collection-access.ts +++ b/frontend/src/features/collections/select-collection-access.ts @@ -111,7 +111,8 @@ export class SelectCollectionAccess extends BtrixElement {
  • ${msg("Summary")}
  • ${msg("About")}
  • ${msg("Collection Period")}
  • -
  • ${msg("Total Pages")}
  • +
  • ${msg("Unique Pages in Collection")}
  • +
  • ${msg("Total Pages Crawled")}
  • ${msg("Collection Size")}
  • diff --git a/frontend/src/pages/org/collection-detail.ts b/frontend/src/pages/org/collection-detail.ts index ce7ad24c87..99d97a542b 100644 --- a/frontend/src/pages/org/collection-detail.ts +++ b/frontend/src/pages/org/collection-detail.ts @@ -483,6 +483,11 @@ export class CollectionDetail extends BtrixElement { (col) => `${this.localize.number(col.crawlCount)} ${pluralOf("items", col.crawlCount)}`, )} + ${this.renderDetailItem( + msg("Total Pages"), + (col) => + `${this.localize.number(col.pageCount)} ${pluralOf("pages", col.crawlCount)}`, + )} ${when(this.collection?.created, (created) => // Collections created before 49516bc4 is released may not have date in db created diff --git a/frontend/src/pages/org/collections-list.ts b/frontend/src/pages/org/collections-list.ts index 0c9b6e110a..cbdfb401a0 100644 --- a/frontend/src/pages/org/collections-list.ts +++ b/frontend/src/pages/org/collections-list.ts @@ -64,7 +64,7 @@ const sortableFields: Record< defaultDirection: SortDirection.Descending, }, pageCount: { - label: msg("Pages"), + label: msg("Total Pages"), defaultDirection: SortDirection.Descending, }, totalSize: { @@ -393,7 +393,9 @@ export class CollectionsList extends BtrixElement { ${msg("Archived Items")} - ${msg("Pages")} + ${msg("Total Pages")} ${msg("Size")} ${msg("Last Modified")} @@ -542,7 +544,7 @@ export class CollectionsList extends BtrixElement { ${this.localize.number(col.pageCount, { notation: "compact" })} - ${pluralOf("pages", col.uniquePageCount)} + ${pluralOf("pages", col.pageCount)} ${this.localize.bytes(col.totalSize || 0, { diff --git a/frontend/src/types/crawler.ts b/frontend/src/types/crawler.ts index 95edaba426..4beb8c7ef0 100644 --- a/frontend/src/types/crawler.ts +++ b/frontend/src/types/crawler.ts @@ -165,6 +165,7 @@ type ArchivedItemBase = { lastQAState: CrawlState | null; lastQAStarted: string | null; pageCount?: number; + uniquePageCount?: number; filePageCount?: number; errorPageCount?: number; }; From 7fcb84d6a7743465ceb29262a7fff1eacfa5ac2b Mon Sep 17 00:00:00 2001 From: sua yoo Date: Wed, 22 Jan 2025 14:45:46 -0800 Subject: [PATCH 19/22] fix wrong count used --- frontend/src/pages/org/collection-detail.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/frontend/src/pages/org/collection-detail.ts b/frontend/src/pages/org/collection-detail.ts index 99d97a542b..5678d2c8b5 100644 --- a/frontend/src/pages/org/collection-detail.ts +++ b/frontend/src/pages/org/collection-detail.ts @@ -486,7 +486,7 @@ export class CollectionDetail extends BtrixElement { ${this.renderDetailItem( msg("Total Pages"), (col) => - `${this.localize.number(col.pageCount)} ${pluralOf("pages", col.crawlCount)}`, + `${this.localize.number(col.pageCount)} ${pluralOf("pages", col.pageCount)}`, )} ${when(this.collection?.created, (created) => // Collections created before 49516bc4 is released may not have date in db From 1218c87318725e305c6fd2c092e3671ff2511226 Mon Sep 17 00:00:00 2001 From: sua yoo Date: Wed, 22 Jan 2025 14:50:23 -0800 Subject: [PATCH 20/22] update strings --- .../src/features/collections/select-collection-access.ts | 7 ++++--- frontend/src/layouts/collections/metadataColumn.ts | 8 ++++---- frontend/src/pages/org/collections-list.ts | 3 ++- frontend/src/strings/collections/metadata.ts | 7 +++++++ 4 files changed, 17 insertions(+), 8 deletions(-) create mode 100644 frontend/src/strings/collections/metadata.ts diff --git a/frontend/src/features/collections/select-collection-access.ts b/frontend/src/features/collections/select-collection-access.ts index e4f2ce45b7..7547298ca2 100644 --- a/frontend/src/features/collections/select-collection-access.ts +++ b/frontend/src/features/collections/select-collection-access.ts @@ -5,6 +5,7 @@ import { customElement, property } from "lit/decorators.js"; import { when } from "lit/directives/when.js"; import { BtrixElement } from "@/classes/BtrixElement"; +import { metadata } from "@/strings/collections/metadata"; import { CollectionAccess } from "@/types/collection"; @localized() @@ -110,9 +111,9 @@ export class SelectCollectionAccess extends BtrixElement {
  • ${msg("Name")}
  • ${msg("Summary")}
  • ${msg("About")}
  • -
  • ${msg("Collection Period")}
  • -
  • ${msg("Unique Pages in Collection")}
  • -
  • ${msg("Total Pages Crawled")}
  • +
  • ${metadata.dateLatest}
  • +
  • ${metadata.uniquePageCount}
  • +
  • ${metadata.pageCount}
  • ${msg("Collection Size")}
  • diff --git a/frontend/src/layouts/collections/metadataColumn.ts b/frontend/src/layouts/collections/metadataColumn.ts index 76a01ff2e9..e1ac449f53 100644 --- a/frontend/src/layouts/collections/metadataColumn.ts +++ b/frontend/src/layouts/collections/metadataColumn.ts @@ -1,7 +1,7 @@ -import { msg } from "@lit/localize"; import { html, type TemplateResult } from "lit"; import { when } from "lit/directives/when.js"; +import { metadata } from "@/strings/collections/metadata"; import { monthYearDateRange } from "@/strings/utils"; import type { Collection, PublicCollection } from "@/types/collection"; import localize from "@/utils/localize"; @@ -35,7 +35,7 @@ export function metadataColumn(collection?: Collection | PublicCollection) { return html` ${metadataItem({ - label: msg("Collection Period"), + label: metadata.dateLatest, render: (col) => html` ${monthYearDateRange(col.dateEarliest, col.dateLatest)} @@ -43,12 +43,12 @@ export function metadataColumn(collection?: Collection | PublicCollection) { `, })} ${metadataItem({ - label: msg("Unique Pages in Collection"), + label: metadata.uniquePageCount, render: (col) => `${localize.number(col.uniquePageCount)} ${pluralOf("pages", col.uniquePageCount)}`, })} ${metadataItem({ - label: msg("Total Pages Crawled"), + label: metadata.pageCount, render: (col) => `${localize.number(col.pageCount)} ${pluralOf("pages", col.pageCount)}`, })} diff --git a/frontend/src/pages/org/collections-list.ts b/frontend/src/pages/org/collections-list.ts index cbdfb401a0..567dafec5c 100644 --- a/frontend/src/pages/org/collections-list.ts +++ b/frontend/src/pages/org/collections-list.ts @@ -19,6 +19,7 @@ import { SelectCollectionAccess } from "@/features/collections/select-collection import { emptyMessage } from "@/layouts/emptyMessage"; import { pageHeader } from "@/layouts/pageHeader"; import { RouteNamespace } from "@/routes"; +import { metadata } from "@/strings/collections/metadata"; import { monthYearDateRange } from "@/strings/utils"; import type { APIPaginatedList, APIPaginationQuery } from "@/types/api"; import { @@ -56,7 +57,7 @@ const sortableFields: Record< defaultDirection: SortDirection.Ascending, }, dateLatest: { - label: msg("Collection Period"), + label: metadata.dateLatest, defaultDirection: SortDirection.Descending, }, crawlCount: { diff --git a/frontend/src/strings/collections/metadata.ts b/frontend/src/strings/collections/metadata.ts new file mode 100644 index 0000000000..1f7d906ec6 --- /dev/null +++ b/frontend/src/strings/collections/metadata.ts @@ -0,0 +1,7 @@ +import { msg } from "@lit/localize"; + +export const metadata = { + dateLatest: msg("Collection Period"), + uniquePageCount: msg("Unique Pages in Collection"), + pageCount: msg("Total Pages Crawled"), +}; From 7e2398c1463c63553eb5eaa1372938696bcbe758 Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Wed, 22 Jan 2025 19:10:16 -0800 Subject: [PATCH 21/22] fix typo in migration --- backend/btrixcloud/migrations/migration_0041_pages_snapshots.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/btrixcloud/migrations/migration_0041_pages_snapshots.py b/backend/btrixcloud/migrations/migration_0041_pages_snapshots.py index 745777cce1..75d1dc2227 100644 --- a/backend/btrixcloud/migrations/migration_0041_pages_snapshots.py +++ b/backend/btrixcloud/migrations/migration_0041_pages_snapshots.py @@ -31,7 +31,7 @@ async def migrate_up(self): ) return - async for coll in colls_mdb.collections.find({}): + async for coll in colls_mdb.find({}): coll_id = coll["_id"] try: await self.coll_ops.update_collection_counts_and_tags(coll_id) From cab55a4de95b5ed48718286855d563927d41364f Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Wed, 22 Jan 2025 20:05:02 -0800 Subject: [PATCH 22/22] test: add crawlUniquePageCount and uploadUniquePageCount to tests --- backend/test/test_org.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/backend/test/test_org.py b/backend/test/test_org.py index 665f556b8c..a5e3a4cf48 100644 --- a/backend/test/test_org.py +++ b/backend/test/test_org.py @@ -570,6 +570,8 @@ def test_org_metrics(crawler_auth_headers, default_org_id): assert data["archivedItemCount"] == data["crawlCount"] + data["uploadCount"] assert data["pageCount"] > 0 assert data["uniquePageCount"] > 0 + assert data["crawlUniquePageCount"] > 0 + assert data["uploadUniquePageCount"] >= 0 assert data["profileCount"] >= 0 assert data["workflowsRunningCount"] >= 0 assert data["workflowsQueuedCount"] >= 0