Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(BA-478): Accept container images that has the 'null' labels field in their manifests #3411

Merged
merged 5 commits into from
Jan 17, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changes/3411.fix.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fix scanning and loading container images with no labels at all (`null` in the image manifests)
19 changes: 13 additions & 6 deletions src/ai/backend/manager/container_registry/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,15 +154,12 @@ async def commit_rescan_result(self) -> None:
self.registry_info.registry_name,
is_local=is_local,
)
except ProjectMismatchWithCanonical:
continue
except ValueError as e:
except (ProjectMismatchWithCanonical, ValueError) as e:
skip_reason = str(e)
progress_msg = f"Skipped image - {image_identifier.canonical}/{image_identifier.architecture} ({skip_reason})"
log.warning(progress_msg)
if (reporter := progress_reporter.get()) is not None:
await reporter.update(1, message=progress_msg)

continue

session.add(
Expand Down Expand Up @@ -334,7 +331,13 @@ async def _read_manifest_list(
)

if not manifests[architecture]["labels"]:
log.warning("Labels section not found on image {}:{}/{}", image, tag, architecture)
log.warning(
"The image {}:{}/{} has no metadata labels -> treating as vanilla image",
image,
tag,
architecture,
)
manifests[architecture]["labels"] = {}

await self._read_manifest(image, tag, manifests)

Expand Down Expand Up @@ -538,7 +541,11 @@ async def _read_manifest(
finally:
if skip_reason:
log.warning(
"Skipped image - {}:{}/{} ({})", image, tag, architecture, skip_reason
"Skipped image (_read_manifest inner) - {}:{}/{} ({})",
image,
tag,
architecture,
skip_reason,
)
progress_msg = f"Skipped {image}:{tag}/{architecture} ({skip_reason})"
else:
Expand Down
47 changes: 30 additions & 17 deletions src/ai/backend/manager/container_registry/harbor.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,8 +119,12 @@ async def _scan_tag(

if not labels:
log.warning(
"Labels section not found on image {}:{}/{}", image, tag, architecture
"The image {}:{}/{} has no metadata labels -> treating as vanilla image",
image,
tag,
architecture,
)
labels = {}
manifest = {
architecture: {
"size": size_bytes,
Expand Down Expand Up @@ -331,14 +335,13 @@ async def _process_oci_index(
self,
tg: aiotools.TaskGroup,
sess: aiohttp.ClientSession,
_rqst_args: Mapping[str, Any],
rqst_args: Mapping[str, Any],
image: str,
tag: str,
image_info: Mapping[str, Any],
) -> None:
rqst_args = dict(_rqst_args)
if not rqst_args.get("headers"):
rqst_args["headers"] = {}
rqst_args = {**rqst_args}
rqst_args["headers"] = rqst_args.get("headers") or {}
rqst_args["headers"].update({"Accept": "application/vnd.oci.image.manifest.v1+json"})
digests: list[tuple[str, str]] = []
for reference in image_info["references"]:
Expand Down Expand Up @@ -368,14 +371,13 @@ async def _process_docker_v2_multiplatform_image(
self,
tg: aiotools.TaskGroup,
sess: aiohttp.ClientSession,
_rqst_args: Mapping[str, Any],
rqst_args: Mapping[str, Any],
image: str,
tag: str,
image_info: Mapping[str, Any],
) -> None:
rqst_args = dict(_rqst_args)
if not rqst_args.get("headers"):
rqst_args["headers"] = {}
rqst_args = {**rqst_args}
rqst_args["headers"] = rqst_args.get("headers") or {}
rqst_args["headers"].update({
"Accept": "application/vnd.docker.distribution.manifest.v2+json"
})
Expand Down Expand Up @@ -407,14 +409,13 @@ async def _process_docker_v2_image(
self,
tg: aiotools.TaskGroup,
sess: aiohttp.ClientSession,
_rqst_args: Mapping[str, Any],
rqst_args: Mapping[str, Any],
image: str,
tag: str,
image_info: Mapping[str, Any],
) -> None:
rqst_args = dict(_rqst_args)
if not rqst_args.get("headers"):
rqst_args["headers"] = {}
rqst_args = {**rqst_args}
rqst_args["headers"] = rqst_args.get("headers") or {}
rqst_args["headers"].update({
"Accept": "application/vnd.docker.distribution.manifest.v2+json"
})
Expand Down Expand Up @@ -471,8 +472,14 @@ async def _harbor_scan_tag_per_arch(
elif _container_config_labels := data.get("container_config", {}).get("Labels"):
labels = _container_config_labels

if not labels:
log.warning("Labels section not found on image {}:{}/{}", image, tag, architecture)
if labels is None:
log.warning(
"The image {}:{}/{} has no metadata labels -> treating as vanilla image",
image,
tag,
architecture,
)
labels = {}

manifests[architecture] = {
"size": size_bytes,
Expand Down Expand Up @@ -519,8 +526,14 @@ async def _harbor_scan_tag_single_arch(
elif _container_config_labels := data.get("container_config", {}).get("Labels"):
labels = _container_config_labels

if not labels:
log.warning("Labels section not found on image {}:{}/{}", image, tag, architecture)
if labels is None:
log.warning(
"The image {}:{}/{} has no metadata labels -> treating as vanilla image",
image,
tag,
architecture,
)
labels = {}
manifests[architecture] = {
"size": size_bytes,
"labels": labels,
Expand Down
27 changes: 16 additions & 11 deletions src/ai/backend/manager/container_registry/local.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import json
import logging
from contextlib import asynccontextmanager as actxmgr
from typing import AsyncIterator, Optional, override
from typing import AsyncIterator, override

import aiohttp
import sqlalchemy as sa
Expand Down Expand Up @@ -39,9 +39,6 @@ async def fetch_repositories(
if (reporter := progress_reporter.get()) is not None:
reporter.total_progress = len(items)
for item in items:
labels = item["Labels"]
if not labels:
continue
if item["RepoTags"] is not None:
for image_ref_str in item["RepoTags"]:
if image_ref_str == "<none>:<none>":
Expand All @@ -63,14 +60,13 @@ async def _scan_tag_local(
sess: aiohttp.ClientSession,
rqst_args: dict[str, str],
image: str,
digest: str,
tag: Optional[str] = None,
tag: str,
) -> None:
async def _read_image_info(
_tag: str,
) -> tuple[dict[str, dict], str | None]:
async with sess.get(
self.registry_url / "images" / f"{image}:{digest}" / "json"
self.registry_url / "images" / f"{image}:{tag}" / "json"
) as response:
data = await response.json()
architecture = arch_name_aliases.get(data["Architecture"], data["Architecture"])
Expand All @@ -81,7 +77,7 @@ async def _read_image_info(
"ContainerConfig.Image": data.get("ContainerConfig", {}).get("Image", None),
"Architecture": architecture,
}
log.debug("scanned image info: {}:{}\n{}", image, digest, json.dumps(summary, indent=2))
log.debug("scanned image info: {}:{}\n{}", image, tag, json.dumps(summary, indent=2))
already_exists = 0
config_digest = data["Id"]
async with self.db.begin_readonly_session() as db_session:
Expand All @@ -93,14 +89,23 @@ async def _read_image_info(
)
if already_exists > 0:
return {}, "already synchronized from a remote registry"
labels = data["Config"]["Labels"]
if labels is None:
log.debug(
"The image {}:{}/{} has no metadata labels -> treating as vanilla image",
image,
tag,
architecture,
)
labels = {}
return {
architecture: {
"size": data["Size"],
"labels": data["Config"]["Labels"],
"labels": labels,
"digest": config_digest,
},
}, None

async with concurrency_sema.get():
manifests, skip_reason = await _read_image_info(digest)
await self._read_manifest(image, digest, manifests, skip_reason)
manifests, skip_reason = await _read_image_info(tag)
await self._read_manifest(image, tag, manifests, skip_reason)
Loading