From 6bd024bc52420d0fad88865afb4a70571faa701d Mon Sep 17 00:00:00 2001 From: alafanechere Date: Tue, 17 Dec 2024 09:31:34 +0100 Subject: [PATCH] base-images: declare a base image for our java connectors --- airbyte-ci/connectors/base_images/README.md | 31 +++++- .../base_images/base_images/java/__init__.py | 3 + .../base_images/base_images/java/bases.py | 102 ++++++++++++++++++ .../base_images/base_images/root_images.py | 7 ++ .../base_images/base_images/sanity_checks.py | 16 +++ .../base_images/templates/README.md.j2 | 8 +- .../base_images/base_images/utils/dagger.py | 6 ++ .../base_images/version_registry.py | 14 ++- .../airbyte_java_connector_base.json | 17 +++ .../connectors/base_images/pyproject.toml | 2 +- 10 files changed, 200 insertions(+), 6 deletions(-) create mode 100644 airbyte-ci/connectors/base_images/base_images/java/__init__.py create mode 100644 airbyte-ci/connectors/base_images/base_images/java/bases.py create mode 100644 airbyte-ci/connectors/base_images/base_images/utils/dagger.py create mode 100644 airbyte-ci/connectors/base_images/generated/changelogs/airbyte_java_connector_base.json diff --git a/airbyte-ci/connectors/base_images/README.md b/airbyte-ci/connectors/base_images/README.md index fbe05942d497..6f436d8079a0 100644 --- a/airbyte-ci/connectors/base_images/README.md +++ b/airbyte-ci/connectors/base_images/README.md @@ -6,7 +6,7 @@ Our connector build pipeline ([`airbyte-ci`](https://github.com/airbytehq/airbyt Our base images are declared in code, using the [Dagger Python SDK](https://dagger-io.readthedocs.io/en/sdk-python-v0.6.4/). - [Python base image code declaration](https://github.com/airbytehq/airbyte/blob/master/airbyte-ci/connectors/base_images/base_images/python/bases.py) -- ~Java base image code declaration~ *TODO* +- [Java base image code declaration](https://github.com/airbytehq/airbyte/blob/master/airbyte-ci/connectors/base_images/base_images/java/bases.py) ## Where are the Dockerfiles? @@ -39,6 +39,20 @@ RUN mkdir -p 755 /usr/share/nltk_data +### Example for `airbyte/java-connector-base`: +```dockerfile +FROM docker.io/amazoncorretto:21-al2023@sha256:5454cb606e803fce56861fdbc9eab365eaa2ab4f357ceb8c1d56f4f8c8a7bc33 +RUN sh -c set -o xtrace && yum update -y --security && yum install -y tar openssl findutils && yum clean all +ENV AIRBYTE_SPEC_CMD=/airbyte/javabase.sh --spec +ENV AIRBYTE_CHECK_CMD=/airbyte/javabase.sh --check +ENV AIRBYTE_DISCOVER_CMD=/airbyte/javabase.sh --discover +ENV AIRBYTE_READ_CMD=/airbyte/javabase.sh --read +ENV AIRBYTE_WRITE_CMD=/airbyte/javabase.sh --write +ENV AIRBYTE_ENTRYPOINT=/airbyte/base.sh +``` + + + ## Base images @@ -59,6 +73,16 @@ RUN mkdir -p 755 /usr/share/nltk_data | 1.0.0 | ✅| docker.io/airbyte/python-connector-base:1.0.0@sha256:dd17e347fbda94f7c3abff539be298a65af2d7fc27a307d89297df1081a45c27 | Initial release: based on Python 3.9.18, on slim-bookworm system, with pip==23.2.1 and poetry==1.6.1 | +### `airbyte/java-connector-base` + +| Version | Published | Docker Image Address | Changelog | +|---------|-----------|--------------|-----------| +| 1.0.0-rc.4 | ✅| docker.io/airbyte/java-connector-base:1.0.0-rc.4@sha256:be86e5684e1e6d9280512d3d8071b47153698fe08ad990949c8eeff02803201a | Bundle yum calls in a single RUN | +| 1.0.0-rc.3 | ✅| docker.io/airbyte/java-connector-base:1.0.0-rc.3@sha256:be86e5684e1e6d9280512d3d8071b47153698fe08ad990949c8eeff02803201a | | +| 1.0.0-rc.2 | ✅| docker.io/airbyte/java-connector-base:1.0.0-rc.2@sha256:fca66e81b4d2e4869a03b57b1b34beb048e74f5d08deb2046c3bb9919e7e2273 | Set entrypoint to base.sh | +| 1.0.0-rc.1 | ✅| docker.io/airbyte/java-connector-base:1.0.0-rc.1@sha256:886a7ce7eccfe3c8fb303511d0e46b83b7edb4f28e3705818c090185ba511fe7 | Create a base image for our java connectors. | + + ## How to release a new base image version (example for Python) ### Requirements @@ -102,6 +126,9 @@ poetry run mypy base_images --check-untyped-defs ## CHANGELOG +### 1.4.0 +- Declare a base image for our java connectors. + ### 1.3.1 - Update the crane image address. The previous address was deleted by the maintainer. @@ -120,4 +147,4 @@ poetry run mypy base_images --check-untyped-defs ### 1.0.1 -- Bumped dependencies ([#42581](https://github.com/airbytehq/airbyte/pull/42581)) +- Bumped dependencies ([#42581](https://github.com/airbytehq/airbyte/pull/42581)) \ No newline at end of file diff --git a/airbyte-ci/connectors/base_images/base_images/java/__init__.py b/airbyte-ci/connectors/base_images/base_images/java/__init__.py new file mode 100644 index 000000000000..c941b3045795 --- /dev/null +++ b/airbyte-ci/connectors/base_images/base_images/java/__init__.py @@ -0,0 +1,3 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# diff --git a/airbyte-ci/connectors/base_images/base_images/java/bases.py b/airbyte-ci/connectors/base_images/base_images/java/bases.py new file mode 100644 index 000000000000..ed820e5b9863 --- /dev/null +++ b/airbyte-ci/connectors/base_images/base_images/java/bases.py @@ -0,0 +1,102 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# +from __future__ import annotations + +from typing import Callable, Final + +import dagger +from base_images import bases, published_image +from base_images import sanity_checks as base_sanity_checks +from base_images.python import sanity_checks as python_sanity_checks +from base_images.root_images import AMAZON_CORRETTO_21_AL_2023 +from base_images.utils.dagger import sh_dash_c + + +class AirbyteJavaConnectorBaseImage(bases.AirbyteConnectorBaseImage): + # TODO: remove this once we want to build the base image with the airbyte user. + USER: Final[str] = "root" + + root_image: Final[published_image.PublishedImage] = AMAZON_CORRETTO_21_AL_2023 + repository: Final[str] = "airbyte/java-connector-base" + + DD_AGENT_JAR_URL: Final[str] = "https://dtdg.co/latest-java-tracer" + BASE_SCRIPT_URL = "https://raw.githubusercontent.com/airbytehq/airbyte/6d8a3a2bc4f4ca79f10164447a90fdce5c9ad6f9/airbyte-integrations/bases/base/base.sh" + JAVA_BASE_SCRIPT_URL: Final[ + str + ] = "https://raw.githubusercontent.com/airbytehq/airbyte/6d8a3a2bc4f4ca79f10164447a90fdce5c9ad6f9/airbyte-integrations/bases/base-java/javabase.sh" + + def get_container(self, platform: dagger.Platform) -> dagger.Container: + """Returns the container used to build the base image for java connectors + We currently use the Amazon coretto image as a base. + We install some packages required to build java connectors. + We also download the datadog java agent jar and the javabase.sh script. + We set some env variables used by the javabase.sh script. + + Args: + platform (dagger.Platform): The platform this container should be built for. + + Returns: + dagger.Container: The container used to build the base image. + """ + + return ( + # TODO: Call this when we want to build the base image with the airbyte user + # self.get_base_container(platform) + self.dagger_client.container(platform=platform) + .from_(self.root_image.address) + # Bundle RUN commands together to reduce the number of layers. + .with_exec( + sh_dash_c( + [ + # Update first, but in the same .with_exec step as the package installation. + # Otherwise, we risk caching stale package URLs. + "yum update -y --security", + # tar is equired to untar java connector binary distributions. + # openssl is required because we need to ssh and scp sometimes. + # findutils is required for xargs, which is shipped as part of findutils. + f"yum install -y tar openssl findutils", + # Remove any dangly bits. + "yum clean all", + ] + ) + ) + .with_workdir("/airbyte") + # Copy the datadog java agent jar from the internet. + .with_file("dd-java-agent.jar", self.dagger_client.http(self.DD_AGENT_JAR_URL)) + # Copy base.sh from the git repo. + .with_file("base.sh", self.dagger_client.http(self.BASE_SCRIPT_URL)) + # Copy javabase.sh from the git repo. + .with_file("javabase.sh", self.dagger_client.http(self.JAVA_BASE_SCRIPT_URL)) + # Set a bunch of env variables used by base.sh. + .with_env_variable("AIRBYTE_SPEC_CMD", "/airbyte/javabase.sh --spec") + .with_env_variable("AIRBYTE_CHECK_CMD", "/airbyte/javabase.sh --check") + .with_env_variable("AIRBYTE_DISCOVER_CMD", "/airbyte/javabase.sh --discover") + .with_env_variable("AIRBYTE_READ_CMD", "/airbyte/javabase.sh --read") + .with_env_variable("AIRBYTE_WRITE_CMD", "/airbyte/javabase.sh --write") + .with_env_variable("AIRBYTE_ENTRYPOINT", "/airbyte/base.sh") + .with_entrypoint(["/airbyte/base.sh"]) + ) + + async def run_sanity_checks(self, platform: dagger.Platform): + """Runs sanity checks on the base image container. + This method is called before image publication. + Consider it like a pre-flight check before take-off to the remote registry. + + Args: + platform (dagger.Platform): The platform on which the sanity checks should run. + """ + container = self.get_container(platform) + await base_sanity_checks.check_user_can_read_dir(container, self.USER, self.AIRBYTE_DIR_PATH) + await base_sanity_checks.check_user_can_write_dir(container, self.USER, self.AIRBYTE_DIR_PATH) + await base_sanity_checks.check_file_exists(container, "/airbyte/dd-java-agent.jar") + await base_sanity_checks.check_file_exists(container, "/airbyte/base.sh") + await base_sanity_checks.check_file_exists(container, "/airbyte/javabase.sh") + await base_sanity_checks.check_env_var_with_printenv(container, "AIRBYTE_SPEC_CMD", "/airbyte/javabase.sh --spec") + await base_sanity_checks.check_env_var_with_printenv(container, "AIRBYTE_CHECK_CMD", "/airbyte/javabase.sh --check") + await base_sanity_checks.check_env_var_with_printenv(container, "AIRBYTE_DISCOVER_CMD", "/airbyte/javabase.sh --discover") + await base_sanity_checks.check_env_var_with_printenv(container, "AIRBYTE_READ_CMD", "/airbyte/javabase.sh --read") + await base_sanity_checks.check_env_var_with_printenv(container, "AIRBYTE_WRITE_CMD", "/airbyte/javabase.sh --write") + await base_sanity_checks.check_env_var_with_printenv(container, "AIRBYTE_ENTRYPOINT", "/airbyte/base.sh") + await base_sanity_checks.check_a_command_is_available_using_version_option(container, "tar") + await base_sanity_checks.check_a_command_is_available_using_version_option(container, "openssl", "version") diff --git a/airbyte-ci/connectors/base_images/base_images/root_images.py b/airbyte-ci/connectors/base_images/base_images/root_images.py index 8cb7036d22ef..dcd0892a8f6c 100644 --- a/airbyte-ci/connectors/base_images/base_images/root_images.py +++ b/airbyte-ci/connectors/base_images/base_images/root_images.py @@ -24,3 +24,10 @@ tag="3.10.14-slim-bookworm", sha="2407c61b1a18067393fecd8a22cf6fceede893b6aaca817bf9fbfe65e33614a3", ) + +AMAZON_CORRETTO_21_AL_2023 = PublishedImage( + registry="docker.io", + repository="amazoncorretto", + tag="21-al2023", + sha="5454cb606e803fce56861fdbc9eab365eaa2ab4f357ceb8c1d56f4f8c8a7bc33", +) diff --git a/airbyte-ci/connectors/base_images/base_images/sanity_checks.py b/airbyte-ci/connectors/base_images/base_images/sanity_checks.py index a88b137a028d..287636cef73c 100644 --- a/airbyte-ci/connectors/base_images/base_images/sanity_checks.py +++ b/airbyte-ci/connectors/base_images/base_images/sanity_checks.py @@ -178,3 +178,19 @@ async def check_user_can_write_dir(container: dagger.Container, user: str, dir_p await container.with_user(user).with_exec(["touch", f"{dir_path}/foo.txt"]) except dagger.ExecError: raise errors.SanityCheckError(f"{dir_path} is not writable by the {user}.") + + +async def check_file_exists(container: dagger.Container, file_path: str): + """Check that a file exists in the container. + + Args: + container (dagger.Container): The container on which the sanity checks should run. + file_path (str): The file path to check. + + Raises: + errors.SanityCheckError: Raised if the file does not exist. + """ + try: + await container.with_exec(["test", "-f", file_path]) + except dagger.ExecError: + raise errors.SanityCheckError(f"{file_path} does not exist.") diff --git a/airbyte-ci/connectors/base_images/base_images/templates/README.md.j2 b/airbyte-ci/connectors/base_images/base_images/templates/README.md.j2 index 89314b1491d5..c5484077a291 100644 --- a/airbyte-ci/connectors/base_images/base_images/templates/README.md.j2 +++ b/airbyte-ci/connectors/base_images/base_images/templates/README.md.j2 @@ -6,7 +6,7 @@ Our connector build pipeline ([`airbyte-ci`](https://github.com/airbytehq/airbyt Our base images are declared in code, using the [Dagger Python SDK](https://dagger-io.readthedocs.io/en/sdk-python-v0.6.4/). - [Python base image code declaration](https://github.com/airbytehq/airbyte/blob/master/airbyte-ci/connectors/base_images/base_images/python/bases.py) -- ~Java base image code declaration~ *TODO* +- [Java base image code declaration](https://github.com/airbytehq/airbyte/blob/master/airbyte-ci/connectors/base_images/base_images/java/bases.py) ## Where are the Dockerfiles? @@ -79,6 +79,12 @@ poetry run mypy base_images --check-untyped-defs ## CHANGELOG +### 1.4.0 +- Declare a base image for our java connectors. + +### 1.3.1 +- Update the crane image address. The previous address was deleted by the maintainer. + ### 1.2.0 - Improve new version prompt to pick bump type with optional pre-release version. diff --git a/airbyte-ci/connectors/base_images/base_images/utils/dagger.py b/airbyte-ci/connectors/base_images/base_images/utils/dagger.py new file mode 100644 index 000000000000..0a71d9e80416 --- /dev/null +++ b/airbyte-ci/connectors/base_images/base_images/utils/dagger.py @@ -0,0 +1,6 @@ +# Copyright (c) 2024 Airbyte, Inc., all rights reserved. + + +def sh_dash_c(lines: list[str]) -> list[str]: + """Wrap sequence of commands in shell for safe usage of dagger Container's with_exec method.""" + return ["sh", "-c", " && ".join(["set -o xtrace"] + lines)] diff --git a/airbyte-ci/connectors/base_images/base_images/version_registry.py b/airbyte-ci/connectors/base_images/base_images/version_registry.py index 41337c8006a8..1495c77c327c 100644 --- a/airbyte-ci/connectors/base_images/base_images/version_registry.py +++ b/airbyte-ci/connectors/base_images/base_images/version_registry.py @@ -13,11 +13,12 @@ import semver from base_images import consts, published_image from base_images.bases import AirbyteConnectorBaseImage +from base_images.java.bases import AirbyteJavaConnectorBaseImage from base_images.python.bases import AirbyteManifestOnlyConnectorBaseImage, AirbytePythonConnectorBaseImage from base_images.utils import docker from connector_ops.utils import ConnectorLanguage # type: ignore -MANAGED_BASE_IMAGES = [AirbytePythonConnectorBaseImage] +MANAGED_BASE_IMAGES = [AirbytePythonConnectorBaseImage, AirbyteJavaConnectorBaseImage] @dataclass @@ -270,6 +271,12 @@ async def get_manifest_only_registry( ) +async def get_java_registry( + dagger_client: dagger.Client, docker_credentials: Tuple[str, str], cache_ttl_seconds: int = 0 +) -> VersionRegistry: + return await VersionRegistry.load(AirbyteJavaConnectorBaseImage, dagger_client, docker_credentials, cache_ttl_seconds=cache_ttl_seconds) + + async def get_registry_for_language( dagger_client: dagger.Client, language: ConnectorLanguage, docker_credentials: Tuple[str, str], cache_ttl_seconds: int = 0 ) -> VersionRegistry: @@ -291,6 +298,8 @@ async def get_registry_for_language( return await get_python_registry(dagger_client, docker_credentials, cache_ttl_seconds=cache_ttl_seconds) elif language is ConnectorLanguage.MANIFEST_ONLY: return await get_manifest_only_registry(dagger_client, docker_credentials, cache_ttl_seconds=cache_ttl_seconds) + elif language is ConnectorLanguage.JAVA: + return await get_java_registry(dagger_client, docker_credentials, cache_ttl_seconds=cache_ttl_seconds) else: raise NotImplementedError(f"Registry for language {language} is not implemented yet.") @@ -298,5 +307,6 @@ async def get_registry_for_language( async def get_all_registries(dagger_client: dagger.Client, docker_credentials: Tuple[str, str]) -> List[VersionRegistry]: return [ await get_python_registry(dagger_client, docker_credentials), - # await get_java_registry(dagger_client), + await get_java_registry(dagger_client, docker_credentials), + # await get_manifest_only_registry(dagger_client, docker_credentials), ] diff --git a/airbyte-ci/connectors/base_images/generated/changelogs/airbyte_java_connector_base.json b/airbyte-ci/connectors/base_images/generated/changelogs/airbyte_java_connector_base.json new file mode 100644 index 000000000000..f859fddf2644 --- /dev/null +++ b/airbyte-ci/connectors/base_images/generated/changelogs/airbyte_java_connector_base.json @@ -0,0 +1,17 @@ +[ + { + "version": "1.0.0-rc.4", + "changelog_entry": "Bundle yum calls in a single RUN", + "dockerfile_example": "FROM docker.io/amazoncorretto:21-al2023@sha256:5454cb606e803fce56861fdbc9eab365eaa2ab4f357ceb8c1d56f4f8c8a7bc33\nRUN sh -c set -o xtrace && yum update -y --security && yum install -y tar openssl findutils && yum clean all\nENV AIRBYTE_SPEC_CMD=/airbyte/javabase.sh --spec\nENV AIRBYTE_CHECK_CMD=/airbyte/javabase.sh --check\nENV AIRBYTE_DISCOVER_CMD=/airbyte/javabase.sh --discover\nENV AIRBYTE_READ_CMD=/airbyte/javabase.sh --read\nENV AIRBYTE_WRITE_CMD=/airbyte/javabase.sh --write\nENV AIRBYTE_ENTRYPOINT=/airbyte/base.sh" + }, + { + "version": "1.0.0-rc.2", + "changelog_entry": "Set entrypoint to base.sh", + "dockerfile_example": "FROM docker.io/amazoncorretto:21-al2023@sha256:5454cb606e803fce56861fdbc9eab365eaa2ab4f357ceb8c1d56f4f8c8a7bc33\nRUN yum update -y --security\nRUN yum install -y tar openssl findutils\nENV AIRBYTE_SPEC_CMD=/airbyte/javabase.sh --spec\nENV AIRBYTE_CHECK_CMD=/airbyte/javabase.sh --check\nENV AIRBYTE_DISCOVER_CMD=/airbyte/javabase.sh --discover\nENV AIRBYTE_READ_CMD=/airbyte/javabase.sh --read\nENV AIRBYTE_WRITE_CMD=/airbyte/javabase.sh --write\nENV AIRBYTE_ENTRYPOINT=/airbyte/base.sh" + }, + { + "version": "1.0.0-rc.1", + "changelog_entry": "Create a base image for our java connectors.", + "dockerfile_example": "FROM docker.io/amazoncorretto:21-al2023@sha256:5454cb606e803fce56861fdbc9eab365eaa2ab4f357ceb8c1d56f4f8c8a7bc33\nRUN yum update -y --security\nRUN yum install -y tar openssl findutils\nENV AIRBYTE_SPEC_CMD=/airbyte/javabase.sh --spec\nENV AIRBYTE_CHECK_CMD=/airbyte/javabase.sh --check\nENV AIRBYTE_DISCOVER_CMD=/airbyte/javabase.sh --discover\nENV AIRBYTE_READ_CMD=/airbyte/javabase.sh --read\nENV AIRBYTE_WRITE_CMD=/airbyte/javabase.sh --write\nENV AIRBYTE_ENTRYPOINT=/airbyte/base.sh" + } +] diff --git a/airbyte-ci/connectors/base_images/pyproject.toml b/airbyte-ci/connectors/base_images/pyproject.toml index f6afeb14e5cb..6c4e41f34fea 100644 --- a/airbyte-ci/connectors/base_images/pyproject.toml +++ b/airbyte-ci/connectors/base_images/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "airbyte-connectors-base-images" -version = "1.3.1" +version = "1.4.0" description = "This package is used to generate and publish the base images for Airbyte Connectors." authors = ["Augustin Lafanechere "] readme = "README.md"