From 73dcec61c7cc72eadf5caf2809a903cfafc2a35f Mon Sep 17 00:00:00 2001 From: Joongi Kim Date: Sat, 18 Jan 2025 00:17:20 +0900 Subject: [PATCH] fix(BA-478): Accept container images that has the 'null' labels field in their manifests (#3411) Backported-from: main (24.12) Backported-to: 24.09 Backport-of: 3411 --- changes/3411.fix.md | 1 + .../manager/container_registry/base.py | 19 +++++--- .../manager/container_registry/harbor.py | 47 ++++++++++++------- .../manager/container_registry/local.py | 27 ++++++----- 4 files changed, 60 insertions(+), 34 deletions(-) create mode 100644 changes/3411.fix.md diff --git a/changes/3411.fix.md b/changes/3411.fix.md new file mode 100644 index 0000000000..5253ac79c0 --- /dev/null +++ b/changes/3411.fix.md @@ -0,0 +1 @@ +Fix scanning and loading container images with no labels at all (`null` in the image manifests) diff --git a/src/ai/backend/manager/container_registry/base.py b/src/ai/backend/manager/container_registry/base.py index 9b2574f3dc..7dce144381 100644 --- a/src/ai/backend/manager/container_registry/base.py +++ b/src/ai/backend/manager/container_registry/base.py @@ -154,15 +154,12 @@ async def commit_rescan_result(self) -> None: self.registry_info.registry_name, is_local=is_local, ) - except ProjectMismatchWithCanonical: - continue - except ValueError as e: + except (ProjectMismatchWithCanonical, ValueError) as e: skip_reason = str(e) progress_msg = f"Skipped image - {image_identifier.canonical}/{image_identifier.architecture} ({skip_reason})" log.warning(progress_msg) if (reporter := progress_reporter.get()) is not None: await reporter.update(1, message=progress_msg) - continue session.add( @@ -334,7 +331,13 @@ async def _read_manifest_list( ) if not manifests[architecture]["labels"]: - log.warning("Labels section not found on image {}:{}/{}", image, tag, architecture) + log.warning( + "The image {}:{}/{} has no metadata labels -> treating as vanilla image", + image, + tag, + architecture, + ) + manifests[architecture]["labels"] = {} await self._read_manifest(image, tag, manifests) @@ -538,7 +541,11 @@ async def _read_manifest( finally: if skip_reason: log.warning( - "Skipped image - {}:{}/{} ({})", image, tag, architecture, skip_reason + "Skipped image (_read_manifest inner) - {}:{}/{} ({})", + image, + tag, + architecture, + skip_reason, ) progress_msg = f"Skipped {image}:{tag}/{architecture} ({skip_reason})" else: diff --git a/src/ai/backend/manager/container_registry/harbor.py b/src/ai/backend/manager/container_registry/harbor.py index a999ffccf1..70c75b4149 100644 --- a/src/ai/backend/manager/container_registry/harbor.py +++ b/src/ai/backend/manager/container_registry/harbor.py @@ -119,8 +119,12 @@ async def _scan_tag( if not labels: log.warning( - "Labels section not found on image {}:{}/{}", image, tag, architecture + "The image {}:{}/{} has no metadata labels -> treating as vanilla image", + image, + tag, + architecture, ) + labels = {} manifest = { architecture: { "size": size_bytes, @@ -331,14 +335,13 @@ async def _process_oci_index( self, tg: aiotools.TaskGroup, sess: aiohttp.ClientSession, - _rqst_args: Mapping[str, Any], + rqst_args: Mapping[str, Any], image: str, tag: str, image_info: Mapping[str, Any], ) -> None: - rqst_args = dict(_rqst_args) - if not rqst_args.get("headers"): - rqst_args["headers"] = {} + rqst_args = {**rqst_args} + rqst_args["headers"] = rqst_args.get("headers") or {} rqst_args["headers"].update({"Accept": "application/vnd.oci.image.manifest.v1+json"}) digests: list[tuple[str, str]] = [] for reference in image_info["references"]: @@ -368,14 +371,13 @@ async def _process_docker_v2_multiplatform_image( self, tg: aiotools.TaskGroup, sess: aiohttp.ClientSession, - _rqst_args: Mapping[str, Any], + rqst_args: Mapping[str, Any], image: str, tag: str, image_info: Mapping[str, Any], ) -> None: - rqst_args = dict(_rqst_args) - if not rqst_args.get("headers"): - rqst_args["headers"] = {} + rqst_args = {**rqst_args} + rqst_args["headers"] = rqst_args.get("headers") or {} rqst_args["headers"].update({ "Accept": "application/vnd.docker.distribution.manifest.v2+json" }) @@ -407,14 +409,13 @@ async def _process_docker_v2_image( self, tg: aiotools.TaskGroup, sess: aiohttp.ClientSession, - _rqst_args: Mapping[str, Any], + rqst_args: Mapping[str, Any], image: str, tag: str, image_info: Mapping[str, Any], ) -> None: - rqst_args = dict(_rqst_args) - if not rqst_args.get("headers"): - rqst_args["headers"] = {} + rqst_args = {**rqst_args} + rqst_args["headers"] = rqst_args.get("headers") or {} rqst_args["headers"].update({ "Accept": "application/vnd.docker.distribution.manifest.v2+json" }) @@ -471,8 +472,14 @@ async def _harbor_scan_tag_per_arch( elif _container_config_labels := data.get("container_config", {}).get("Labels"): labels = _container_config_labels - if not labels: - log.warning("Labels section not found on image {}:{}/{}", image, tag, architecture) + if labels is None: + log.warning( + "The image {}:{}/{} has no metadata labels -> treating as vanilla image", + image, + tag, + architecture, + ) + labels = {} manifests[architecture] = { "size": size_bytes, @@ -519,8 +526,14 @@ async def _harbor_scan_tag_single_arch( elif _container_config_labels := data.get("container_config", {}).get("Labels"): labels = _container_config_labels - if not labels: - log.warning("Labels section not found on image {}:{}/{}", image, tag, architecture) + if labels is None: + log.warning( + "The image {}:{}/{} has no metadata labels -> treating as vanilla image", + image, + tag, + architecture, + ) + labels = {} manifests[architecture] = { "size": size_bytes, "labels": labels, diff --git a/src/ai/backend/manager/container_registry/local.py b/src/ai/backend/manager/container_registry/local.py index d5a46c13c0..0507013af9 100644 --- a/src/ai/backend/manager/container_registry/local.py +++ b/src/ai/backend/manager/container_registry/local.py @@ -3,7 +3,7 @@ import json import logging from contextlib import asynccontextmanager as actxmgr -from typing import AsyncIterator, Optional, override +from typing import AsyncIterator, override import aiohttp import sqlalchemy as sa @@ -39,9 +39,6 @@ async def fetch_repositories( if (reporter := progress_reporter.get()) is not None: reporter.total_progress = len(items) for item in items: - labels = item["Labels"] - if not labels: - continue if item["RepoTags"] is not None: for image_ref_str in item["RepoTags"]: if image_ref_str == ":": @@ -63,14 +60,13 @@ async def _scan_tag_local( sess: aiohttp.ClientSession, rqst_args: dict[str, str], image: str, - digest: str, - tag: Optional[str] = None, + tag: str, ) -> None: async def _read_image_info( _tag: str, ) -> tuple[dict[str, dict], str | None]: async with sess.get( - self.registry_url / "images" / f"{image}:{digest}" / "json" + self.registry_url / "images" / f"{image}:{tag}" / "json" ) as response: data = await response.json() architecture = arch_name_aliases.get(data["Architecture"], data["Architecture"]) @@ -81,7 +77,7 @@ async def _read_image_info( "ContainerConfig.Image": data.get("ContainerConfig", {}).get("Image", None), "Architecture": architecture, } - log.debug("scanned image info: {}:{}\n{}", image, digest, json.dumps(summary, indent=2)) + log.debug("scanned image info: {}:{}\n{}", image, tag, json.dumps(summary, indent=2)) already_exists = 0 config_digest = data["Id"] async with self.db.begin_readonly_session() as db_session: @@ -93,14 +89,23 @@ async def _read_image_info( ) if already_exists > 0: return {}, "already synchronized from a remote registry" + labels = data["Config"]["Labels"] + if labels is None: + log.debug( + "The image {}:{}/{} has no metadata labels -> treating as vanilla image", + image, + tag, + architecture, + ) + labels = {} return { architecture: { "size": data["Size"], - "labels": data["Config"]["Labels"], + "labels": labels, "digest": config_digest, }, }, None async with concurrency_sema.get(): - manifests, skip_reason = await _read_image_info(digest) - await self._read_manifest(image, digest, manifests, skip_reason) + manifests, skip_reason = await _read_image_info(tag) + await self._read_manifest(image, tag, manifests, skip_reason)