From 60c11e0348f88163e3f8b77d348867ba7dcf5830 Mon Sep 17 00:00:00 2001 From: Joongi Kim Date: Thu, 9 Jan 2025 00:27:07 +0900 Subject: [PATCH 1/4] fix: Accept container images that has the 'null' labels field in their manifests --- .../manager/container_registry/base.py | 20 +++++-- .../manager/container_registry/harbor.py | 55 +++++++++++++------ .../manager/container_registry/local.py | 28 ++++++---- 3 files changed, 69 insertions(+), 34 deletions(-) diff --git a/src/ai/backend/manager/container_registry/base.py b/src/ai/backend/manager/container_registry/base.py index 023c7b667c1..55d3a604c31 100644 --- a/src/ai/backend/manager/container_registry/base.py +++ b/src/ai/backend/manager/container_registry/base.py @@ -172,8 +172,8 @@ async def commit_rescan_result(self) -> None: continue except ValueError as e: skip_reason = str(e) - progress_msg = f"Skipped image - {image_identifier.canonical}/{image_identifier.architecture} ({skip_reason})" - log.warning(progress_msg) + progress_msg = f"Skipped image (from_image_str) - {image_identifier.canonical}/{image_identifier.architecture} ({skip_reason})" + log.warning(progress_msg, exc_info=True) break session.add( @@ -200,7 +200,7 @@ async def commit_rescan_result(self) -> None: else: skip_reason = "No container registry found matching the image." - progress_msg = f"Skipped image - {image_identifier.canonical}/{image_identifier.architecture} ({skip_reason})" + progress_msg = f"Skipped image (registry not found) - {image_identifier.canonical}/{image_identifier.architecture} ({skip_reason})" log.warning(progress_msg) if (reporter := progress_reporter.get()) is not None: @@ -351,7 +351,13 @@ async def _read_manifest_list( ) if not manifests[architecture]["labels"]: - log.warning("Labels section not found on image {}:{}/{}", image, tag, architecture) + log.warning( + "The image {}:{}/{} has no metadata labels -> treating as vanilla image", + image, + tag, + architecture, + ) + manifests[architecture]["labels"] = {} await self._read_manifest(image, tag, manifests) @@ -555,7 +561,11 @@ async def _read_manifest( finally: if skip_reason: log.warning( - "Skipped image - {}:{}/{} ({})", image, tag, architecture, skip_reason + "Skipped image (_read_manifest inner) - {}:{}/{} ({})", + image, + tag, + architecture, + skip_reason, ) progress_msg = f"Skipped {image}:{tag}/{architecture} ({skip_reason})" else: diff --git a/src/ai/backend/manager/container_registry/harbor.py b/src/ai/backend/manager/container_registry/harbor.py index 094dec67716..8bf5c80fc89 100644 --- a/src/ai/backend/manager/container_registry/harbor.py +++ b/src/ai/backend/manager/container_registry/harbor.py @@ -3,7 +3,7 @@ import json import logging import urllib.parse -from typing import Any, AsyncIterator, Mapping, Optional, cast +from typing import Any, AsyncIterator, Mapping, Optional, cast, override import aiohttp import aiohttp.client_exceptions @@ -123,8 +123,12 @@ async def _scan_tag( if not labels: log.warning( - "Labels section not found on image {}:{}/{}", image, tag, architecture + "Labels section not found on image {}:{}/{} -> treating as vanilla images", + image, + tag, + architecture, ) + labels = {} manifest = { architecture: { "size": size_bytes, @@ -181,6 +185,7 @@ async def untag( ): # 404 means image is already removed from harbor so we can just safely ignore the exception raise RuntimeError(f"Failed to untag {image}: {e.message}") from e + @override async def fetch_repositories( self, sess: aiohttp.ClientSession, @@ -228,6 +233,7 @@ async def fetch_repositories( next_page_url.query ) + @override async def _scan_image( self, sess: aiohttp.ClientSession, @@ -293,6 +299,7 @@ async def _scan_image( next_page_url.query ) + @override async def _scan_tag( self, sess: aiohttp.ClientSession, @@ -333,18 +340,18 @@ async def _scan_tag( case _ as media_type: raise RuntimeError(f"Unsupported artifact media-type: {media_type}") + @override async def _process_oci_index( self, tg: aiotools.TaskGroup, sess: aiohttp.ClientSession, - _rqst_args: Mapping[str, Any], + rqst_args: Mapping[str, Any], image: str, tag: str, image_info: Mapping[str, Any], ) -> None: - rqst_args = dict(_rqst_args) - if not rqst_args.get("headers"): - rqst_args["headers"] = {} + rqst_args = {**rqst_args} + rqst_args["headers"] = rqst_args.get("headers") or {} rqst_args["headers"].update({"Accept": "application/vnd.oci.image.manifest.v1+json"}) digests: list[tuple[str, str]] = [] for reference in image_info["references"]: @@ -369,18 +376,18 @@ async def _process_oci_index( ) ) + @override async def _process_docker_v2_multiplatform_image( self, tg: aiotools.TaskGroup, sess: aiohttp.ClientSession, - _rqst_args: Mapping[str, Any], + rqst_args: Mapping[str, Any], image: str, tag: str, image_info: Mapping[str, Any], ) -> None: - rqst_args = dict(_rqst_args) - if not rqst_args.get("headers"): - rqst_args["headers"] = {} + rqst_args = {**rqst_args} + rqst_args["headers"] = rqst_args.get("headers") or {} rqst_args["headers"].update({ "Accept": "application/vnd.docker.distribution.manifest.v2+json" }) @@ -407,18 +414,18 @@ async def _process_docker_v2_multiplatform_image( ) ) + @override async def _process_docker_v2_image( self, tg: aiotools.TaskGroup, sess: aiohttp.ClientSession, - _rqst_args: Mapping[str, Any], + rqst_args: Mapping[str, Any], image: str, tag: str, image_info: Mapping[str, Any], ) -> None: - rqst_args = dict(_rqst_args) - if not rqst_args.get("headers"): - rqst_args["headers"] = {} + rqst_args = {**rqst_args} + rqst_args["headers"] = rqst_args.get("headers") or {} rqst_args["headers"].update({ "Accept": "application/vnd.docker.distribution.manifest.v2+json" }) @@ -475,8 +482,14 @@ async def _harbor_scan_tag_per_arch( elif _container_config_labels := data.get("container_config", {}).get("Labels"): labels = _container_config_labels - if not labels: - log.warning("Labels section not found on image {}:{}/{}", image, tag, architecture) + if labels is None: + log.warning( + "Labels section not found on image {}:{}/{} -> treating as vanilla image", + image, + tag, + architecture, + ) + labels = {} manifests[architecture] = { "size": size_bytes, @@ -523,8 +536,14 @@ async def _harbor_scan_tag_single_arch( elif _container_config_labels := data.get("container_config", {}).get("Labels"): labels = _container_config_labels - if not labels: - log.warning("Labels section not found on image {}:{}/{}", image, tag, architecture) + if labels is None: + log.warning( + "Labels section not found on image {}:{}/{} -> treating as vanilla image", + image, + tag, + architecture, + ) + labels = {} manifests[architecture] = { "size": size_bytes, "labels": labels, diff --git a/src/ai/backend/manager/container_registry/local.py b/src/ai/backend/manager/container_registry/local.py index 1177e4a211d..ab450289259 100644 --- a/src/ai/backend/manager/container_registry/local.py +++ b/src/ai/backend/manager/container_registry/local.py @@ -3,7 +3,7 @@ import json import logging from contextlib import asynccontextmanager as actxmgr -from typing import AsyncIterator, Optional +from typing import AsyncIterator, override import aiohttp import sqlalchemy as sa @@ -38,9 +38,6 @@ async def fetch_repositories( if (reporter := progress_reporter.get()) is not None: reporter.total_progress = len(items) for item in items: - labels = item["Labels"] - if not labels: - continue if item["RepoTags"] is not None: for image_ref_str in item["RepoTags"]: if image_ref_str == ":": @@ -48,6 +45,7 @@ async def fetch_repositories( continue yield image_ref_str # this includes the tag part + @override async def _scan_image( self, sess: aiohttp.ClientSession, @@ -61,14 +59,13 @@ async def _scan_tag_local( sess: aiohttp.ClientSession, rqst_args: dict[str, str], image: str, - digest: str, - tag: Optional[str] = None, + tag: str, ) -> None: async def _read_image_info( _tag: str, ) -> tuple[dict[str, dict], str | None]: async with sess.get( - self.registry_url / "images" / f"{image}:{digest}" / "json" + self.registry_url / "images" / f"{image}:{tag}" / "json" ) as response: data = await response.json() architecture = arch_name_aliases.get(data["Architecture"], data["Architecture"]) @@ -79,7 +76,7 @@ async def _read_image_info( "ContainerConfig.Image": data.get("ContainerConfig", {}).get("Image", None), "Architecture": architecture, } - log.debug("scanned image info: {}:{}\n{}", image, digest, json.dumps(summary, indent=2)) + log.debug("scanned image info: {}:{}\n{}", image, tag, json.dumps(summary, indent=2)) already_exists = 0 config_digest = data["Id"] async with self.db.begin_readonly_session() as db_session: @@ -91,14 +88,23 @@ async def _read_image_info( ) if already_exists > 0: return {}, "already synchronized from a remote registry" + labels = data["Config"]["Labels"] + if labels is None: + log.debug( + "The image {}:{}/{} has no metadata labels -> treating as vanilla image", + image, + tag, + architecture, + ) + labels = {} return { architecture: { "size": data["Size"], - "labels": data["Config"]["Labels"], + "labels": labels, "digest": config_digest, }, }, None async with concurrency_sema.get(): - manifests, skip_reason = await _read_image_info(digest) - await self._read_manifest(image, digest, manifests, skip_reason) + manifests, skip_reason = await _read_image_info(tag) + await self._read_manifest(image, tag, manifests, skip_reason) From 8b1a2a2807b85069fe70e7d32b29bc7436a0c0ad Mon Sep 17 00:00:00 2001 From: Joongi Kim Date: Thu, 9 Jan 2025 11:09:02 +0900 Subject: [PATCH 2/4] doc: Add news fragment --- changes/3411.fix.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 changes/3411.fix.md diff --git a/changes/3411.fix.md b/changes/3411.fix.md new file mode 100644 index 00000000000..5253ac79c0c --- /dev/null +++ b/changes/3411.fix.md @@ -0,0 +1 @@ +Fix scanning and loading container images with no labels at all (`null` in the image manifests) From b9644cc247a2edc039f65afdf324e78318e3b959 Mon Sep 17 00:00:00 2001 From: Joongi Kim Date: Thu, 9 Jan 2025 11:18:11 +0900 Subject: [PATCH 3/4] fix: Make the warning message consistent --- src/ai/backend/manager/container_registry/harbor.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/ai/backend/manager/container_registry/harbor.py b/src/ai/backend/manager/container_registry/harbor.py index 8bf5c80fc89..bf4cd82f2d6 100644 --- a/src/ai/backend/manager/container_registry/harbor.py +++ b/src/ai/backend/manager/container_registry/harbor.py @@ -123,7 +123,7 @@ async def _scan_tag( if not labels: log.warning( - "Labels section not found on image {}:{}/{} -> treating as vanilla images", + "The image {}:{}/{} has no metadata labels -> treating as vanilla image", image, tag, architecture, @@ -484,7 +484,7 @@ async def _harbor_scan_tag_per_arch( if labels is None: log.warning( - "Labels section not found on image {}:{}/{} -> treating as vanilla image", + "The image {}:{}/{} has no metadata labels -> treating as vanilla image", image, tag, architecture, @@ -538,7 +538,7 @@ async def _harbor_scan_tag_single_arch( if labels is None: log.warning( - "Labels section not found on image {}:{}/{} -> treating as vanilla image", + "The image {}:{}/{} has no metadata labels -> treating as vanilla image", image, tag, architecture, From eae0c1df43f3f90aabab517d07f6dadaba835f5e Mon Sep 17 00:00:00 2001 From: Joongi Kim Date: Sat, 18 Jan 2025 00:12:00 +0900 Subject: [PATCH 4/4] fix: Explicitly log and report progress upon ProjectMismatchWithCanonical --- src/ai/backend/manager/container_registry/base.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/ai/backend/manager/container_registry/base.py b/src/ai/backend/manager/container_registry/base.py index 4019bf1c05d..7dce1443814 100644 --- a/src/ai/backend/manager/container_registry/base.py +++ b/src/ai/backend/manager/container_registry/base.py @@ -154,15 +154,12 @@ async def commit_rescan_result(self) -> None: self.registry_info.registry_name, is_local=is_local, ) - except ProjectMismatchWithCanonical: - continue - except ValueError as e: + except (ProjectMismatchWithCanonical, ValueError) as e: skip_reason = str(e) progress_msg = f"Skipped image - {image_identifier.canonical}/{image_identifier.architecture} ({skip_reason})" log.warning(progress_msg) if (reporter := progress_reporter.get()) is not None: await reporter.update(1, message=progress_msg) - continue session.add(