Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[airbyte-ci] build python connectors from base images #30456

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
aa1d8cd
change our python connector build process to use the base images
alafanechere Sep 14, 2023
36a0034
change our python connector build process to use the base images
alafanechere Sep 14, 2023
992c318
revert changes on source-faker
alafanechere Sep 17, 2023
64f2837
change interface to GLOBAL_REGISTRY
alafanechere Sep 19, 2023
76c8fb0
do not interact with global registry on build
alafanechere Sep 19, 2023
ffec854
do not depend on base_images
alafanechere Sep 20, 2023
9b5a698
port the development overrides to the new build process and test it
alafanechere Sep 20, 2023
7284c23
do not pick connectors without metadata for testing
alafanechere Sep 20, 2023
0419c95
mount airbyte-cdk during tests
alafanechere Sep 20, 2023
8ba5cea
implement suggestions
alafanechere Sep 21, 2023
30780d1
DEMO - to revert
alafanechere Sep 21, 2023
249aed6
Revert "DEMO - to revert"
alafanechere Sep 21, 2023
4590d20
Merge builder container and with_python_package to support poetry
bnchrch Sep 27, 2023
77437b7
Remove Dagger Build hack
bnchrch Sep 27, 2023
1817865
Add local cdk support
bnchrch Sep 27, 2023
18693fc
Update default base image
bnchrch Sep 28, 2023
2dbce7d
Automated Commit - Formatting Changes
bnchrch Sep 28, 2023
04f32de
Move metadata check to get all connectors
bnchrch Oct 8, 2023
cd42288
remove explicit dev override mocks from test
bnchrch Oct 9, 2023
f600805
Merge branch 'master' into 09-14-change_our_python_connector_build_pr…
alafanechere Oct 10, 2023
7db8c2d
make source-file-secure use the base image
alafanechere Oct 10, 2023
3490668
revert source-file-secure changes
alafanechere Oct 10, 2023
28dc026
rever source-file-secure changes
alafanechere Oct 10, 2023
637103f
Automated Commit - Formatting Changes
alafanechere Oct 10, 2023
ca83df9
bump connector_ops version
alafanechere Oct 10, 2023
9e07934
Merge branch '09-14-change_our_python_connector_build_process_to_use_…
alafanechere Oct 10, 2023
5e1cacb
update poetry lock
alafanechere Oct 10, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 7 additions & 3 deletions airbyte-ci/connectors/connector_ops/connector_ops/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,9 @@
CONNECTOR_PATH_PREFIX = "airbyte-integrations/connectors"
SOURCE_CONNECTOR_PATH_PREFIX = CONNECTOR_PATH_PREFIX + "/source-"
DESTINATION_CONNECTOR_PATH_PREFIX = CONNECTOR_PATH_PREFIX + "/destination-"
THIRD_PARTY_CONNECTOR_PATH_PREFIX = CONNECTOR_PATH_PREFIX + "/third_party/"

THIRD_PARTY_GLOB = "third-party"
THIRD_PARTY_CONNECTOR_PATH_PREFIX = CONNECTOR_PATH_PREFIX + f"/{THIRD_PARTY_GLOB}/"
SCAFFOLD_CONNECTOR_GLOB = "-scaffold-"


Expand Down Expand Up @@ -331,7 +333,7 @@ def version_in_dockerfile_label(self) -> Optional[str]:
for line in f:
if "io.airbyte.version" in line:
return line.split("=")[1].strip()
except FileNotFoundError as e:
except FileNotFoundError:
return None
raise ConnectorVersionNotFound(
"""
Expand Down Expand Up @@ -540,7 +542,9 @@ def get_all_connectors_in_repo() -> Set[Connector]:
return {
Connector(Path(metadata_file).parent.name)
for metadata_file in glob(f"{repo_path}/airbyte-integrations/connectors/**/metadata.yaml", recursive=True)
if SCAFFOLD_CONNECTOR_GLOB not in metadata_file
# HACK: The Connector util is not good at fetching metadata for third party connectors.
# We want to avoid picking a connector that does not have metadata.
if SCAFFOLD_CONNECTOR_GLOB not in metadata_file and THIRD_PARTY_GLOB not in metadata_file
}


Expand Down
2 changes: 1 addition & 1 deletion airbyte-ci/connectors/connector_ops/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"

[tool.poetry]
name = "connector_ops"
version = "0.2.4"
version = "0.2.5"
description = "Packaged maintained by the connector operations team to perform CI for connectors"
authors = ["Airbyte <[email protected]>"]

Expand Down
7 changes: 7 additions & 0 deletions airbyte-ci/connectors/connector_ops/tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,3 +194,10 @@ def test_get_all_gradle_dependencies(with_test_dependencies):
Path("airbyte-json-validation"),
]
assert set(all_dependencies) == set(expected_dependencies)


def test_get_all_connectors_in_repo():
all_connectors = utils.get_all_connectors_in_repo()
assert len(all_connectors) > 0
assert all([isinstance(connector, utils.Connector) for connector in all_connectors])
assert all([connector.metadata is not None for connector in all_connectors])
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ data:
hosts:
- "*.googleapis.com"
connectorBuildOptions:
baseImage: airbyte/airbyte-python-connectors-base:0.1.0
baseImage: airbyte/python-connector-base:1.0.0
connectorSubtype: file
connectorType: source
definitionId: 71607ba1-c0ac-4799-8049-7f4b90dd50f7
Expand Down
106 changes: 52 additions & 54 deletions airbyte-ci/connectors/pipelines/README.md

Large diffs are not rendered by default.

140 changes: 35 additions & 105 deletions airbyte-ci/connectors/pipelines/pipelines/actions/environments.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from typing import TYPE_CHECKING, Callable, List, Optional

import toml
from dagger import CacheVolume, Client, Container, DaggerError, Directory, File, Platform, Secret
from dagger import CacheVolume, Client, Container, Directory, File, Platform, Secret
from dagger.engine._version import CLI_VERSION as dagger_engine_version
from pipelines import consts
from pipelines.consts import (
Expand Down Expand Up @@ -151,6 +151,7 @@ def with_python_package(
python_environment: Container,
package_source_code_path: str,
exclude: Optional[List] = None,
include: Optional[List] = None,
) -> Container:
"""Load a python package source code to a python environment container.

Expand All @@ -164,7 +165,7 @@ def with_python_package(
Returns:
Container: A python environment container with the python package source code.
"""
package_source_code_directory: Directory = context.get_repo_dir(package_source_code_path, exclude=exclude)
package_source_code_directory: Directory = context.get_repo_dir(package_source_code_path, exclude=exclude, include=include)
work_dir_path = f"/{package_source_code_path}"
container = python_environment.with_mounted_directory(work_dir_path, package_source_code_directory).with_workdir(work_dir_path)
return container
Expand Down Expand Up @@ -308,29 +309,29 @@ def _install_python_dependencies_from_setup_py(
container: Container,
additional_dependency_groups: Optional[List] = None,
) -> Container:
install_connector_package_cmd = ["python", "-m", "pip", "install", "."]
install_connector_package_cmd = ["pip", "install", "."]
container = container.with_exec(install_connector_package_cmd)

if additional_dependency_groups:
# e.g. .[dev,tests]
group_string = f".[{','.join(additional_dependency_groups)}]"
group_install_cmd = ["python", "-m", "pip", "install", group_string]
group_install_cmd = ["pip", "install", group_string]

container = container.with_exec(group_install_cmd)

return container


def _install_python_dependencies_from_requirements_txt(container: Container) -> Container:
install_requirements_cmd = ["python", "-m", "pip", "install", "-r", "requirements.txt"]
install_requirements_cmd = ["pip", "install", "-r", "requirements.txt"]
return container.with_exec(install_requirements_cmd)


def _install_python_dependencies_from_poetry(
container: Container,
additional_dependency_groups: Optional[List] = None,
) -> Container:
pip_install_poetry_cmd = ["python", "-m", "pip", "install", "poetry"]
pip_install_poetry_cmd = ["pip", "install", "poetry"]
poetry_disable_virtual_env_cmd = ["poetry", "config", "virtualenvs.create", "false"]
poetry_install_no_venv_cmd = ["poetry", "install", "--no-root"]
if additional_dependency_groups:
Expand All @@ -346,6 +347,7 @@ async def with_installed_python_package(
package_source_code_path: str,
additional_dependency_groups: Optional[List] = None,
exclude: Optional[List] = None,
include: Optional[List] = None,
) -> Container:
"""Install a python package in a python environment container.

Expand All @@ -359,7 +361,7 @@ async def with_installed_python_package(
Returns:
Container: A python environment container with the python package installed.
"""
container = with_python_package(context, python_environment, package_source_code_path, exclude=exclude)
container = with_python_package(context, python_environment, package_source_code_path, exclude=exclude, include=include)

local_dependencies = await find_local_python_dependencies(context, package_source_code_path)

Expand Down Expand Up @@ -402,7 +404,7 @@ async def apply_python_development_overrides(context: ConnectorContext, connecto
path_to_cdk = "airbyte-cdk/python/"
directory_to_mount = context.get_repo_dir(path_to_cdk)

context.logger.info(f"Mounting {directory_to_mount}")
context.logger.info(f"Mounting CDK from {directory_to_mount}")

# Install the airbyte-cdk package from the local directory
# We use --no-deps to avoid conflicts with the airbyte-cdk version required by the connector
Expand All @@ -413,7 +415,30 @@ async def apply_python_development_overrides(context: ConnectorContext, connecto
return connector_container


async def with_python_connector_installed(context: ConnectorContext) -> Container:
async def with_python_connector_installed(
context: PipelineContext,
python_container: Container,
connector_source_path: str,
additional_dependency_groups: Optional[List] = None,
exclude: Optional[List] = None,
include: Optional[List] = None,
) -> Container:
"""Install an airbyte python connectors dependencies."""
container = await with_installed_python_package(
context,
python_container,
connector_source_path,
additional_dependency_groups=additional_dependency_groups,
exclude=exclude,
include=include,
)

container = await apply_python_development_overrides(context, container)

return container


async def with_test_python_connector_installed(context: ConnectorContext) -> Container:
"""Install an airbyte connector python package in a testing environment.

Args:
Expand All @@ -438,12 +463,10 @@ async def with_python_connector_installed(context: ConnectorContext) -> Containe
".dockerignore",
]
]
container = await with_installed_python_package(
container = await with_python_connector_installed(
context, testing_environment, connector_source_path, additional_dependency_groups=["dev", "tests", "main"], exclude=exclude
)

container = await apply_python_development_overrides(context, container)

return container


Expand Down Expand Up @@ -910,45 +933,6 @@ async def with_airbyte_java_connector(context: ConnectorContext, connector_java_
return await finalize_build(context, connector_container)


async def get_cdk_version_from_python_connector(python_connector: Container) -> Optional[str]:
pip_freeze_stdout = await python_connector.with_entrypoint("pip").with_exec(["freeze"]).stdout()
cdk_dependency_line = next((line for line in pip_freeze_stdout.split("\n") if "airbyte-cdk" in line), None)
if not cdk_dependency_line:
return None

if "file://" in cdk_dependency_line:
return "LOCAL"

_, cdk_version = cdk_dependency_line.split("==")
return cdk_version


async def with_airbyte_python_connector(context: ConnectorContext, build_platform: Platform) -> Container:
if context.connector.technical_name == "source-file-secure":
return await with_airbyte_python_connector_full_dagger(context, build_platform)

pip_cache: CacheVolume = context.dagger_client.cache_volume("pip_cache")
connector_container = (
context.dagger_client.container(platform=build_platform)
.with_mounted_cache("/root/.cache/pip", pip_cache)
.build(await context.get_connector_dir())
.with_label("io.airbyte.name", context.metadata["dockerRepository"])
)

connector_container = await apply_python_development_overrides(context, connector_container)

cdk_version = await get_cdk_version_from_python_connector(connector_container)
if cdk_version:
context.logger.info(f"Connector has a cdk dependency, using cdk version {cdk_version}")
connector_container = connector_container.with_label("io.airbyte.cdk_version", cdk_version)
context.cdk_version = cdk_version
if not await connector_container.label("io.airbyte.version") == context.metadata["dockerImageTag"]:
raise DaggerError(
"Abusive caching might be happening. The connector container should have been built with the correct version as defined in metadata.yaml"
)
return await finalize_build(context, connector_container)


async def finalize_build(context: ConnectorContext, connector_container: Container) -> Container:
"""Finalize build by adding dagger engine version label and running finalize_build.sh or finalize_build.py if present in the connector directory."""
connector_container = connector_container.with_label("io.dagger.engine_version", dagger_engine_version)
Expand Down Expand Up @@ -989,60 +973,6 @@ async def finalize_build(context: ConnectorContext, connector_container: Contain
return connector_container.with_entrypoint(original_entrypoint)


async def with_airbyte_python_connector_full_dagger(context: ConnectorContext, build_platform: Platform) -> Container:
setup_dependencies_to_mount = await find_local_python_dependencies(
context, str(context.connector.code_directory), search_dependencies_in_setup_py=True, search_dependencies_in_requirements_txt=False
)

pip_cache: CacheVolume = context.dagger_client.cache_volume("pip_cache")
base = context.dagger_client.container(platform=build_platform).from_("python:3.9-slim")
snake_case_name = context.connector.technical_name.replace("-", "_")
entrypoint = ["python", "/airbyte/integration_code/main.py"]
builder = (
base.with_workdir("/airbyte/integration_code")
.with_env_variable("DAGGER_BUILD", "1")
.with_mounted_cache("/root/.cache/pip", pip_cache)
.with_exec(
sh_dash_c(
[
"apt-get update",
"apt-get install -y tzdata",
"pip install --upgrade pip",
]
)
)
.with_file("setup.py", (await context.get_connector_dir(include="setup.py")).file("setup.py"))
)

for dependency_path in setup_dependencies_to_mount:
in_container_dependency_path = f"/local_dependencies/{Path(dependency_path).name}"
builder = builder.with_mounted_directory(in_container_dependency_path, context.get_repo_dir(dependency_path))

builder = builder.with_exec(["pip", "install", "--prefix=/install", "."])

connector_container = (
base.with_workdir("/airbyte/integration_code")
.with_exec(
sh_dash_c(
[
"apt-get update",
"apt-get install -y bash",
]
)
)
.with_directory("/usr/local", builder.directory("/install"))
.with_file("/usr/localtime", builder.file("/usr/share/zoneinfo/Etc/UTC"))
.with_new_file("/etc/timezone", contents="Etc/UTC")
.with_file("main.py", (await context.get_connector_dir(include="main.py")).file("main.py"))
.with_directory(snake_case_name, (await context.get_connector_dir(include=snake_case_name)).directory(snake_case_name))
.with_env_variable("AIRBYTE_ENTRYPOINT", " ".join(entrypoint))
.with_entrypoint(entrypoint)
.with_label("io.airbyte.version", context.metadata["dockerImageTag"])
.with_label("io.airbyte.name", context.metadata["dockerRepository"])
)
return await finalize_build(context, connector_container)


def with_crane(
context: PipelineContext,
) -> Container:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,9 @@
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
#


from dagger import Container, Platform
from pipelines.actions.environments import with_airbyte_python_connector
from pipelines.actions.environments import apply_python_development_overrides, with_python_connector_installed
from pipelines.bases import StepResult
from pipelines.builds.common import BuildConnectorImagesBase
from pipelines.contexts import ConnectorContext
Expand All @@ -15,8 +16,86 @@ class BuildConnectorImages(BuildConnectorImagesBase):
A spec command is run on the container to validate it was built successfully.
"""

async def _build_connector(self, platform: Platform) -> Container:
return await with_airbyte_python_connector(self.context, platform)
DEFAULT_ENTRYPOINT = ["python", "/airbyte/integration_code/main.py"]
PATH_TO_INTEGRATION_CODE = "/airbyte/integration_code"

async def _build_connector(self, platform: Platform):
if (
"connectorBuildOptions" in self.context.connector.metadata
and "baseImage" in self.context.connector.metadata["connectorBuildOptions"]
):
return await self._build_from_base_image(platform)
else:
return await self._build_from_dockerfile(platform)

def _get_base_container(self, platform: Platform) -> Container:
base_image_name = self.context.connector.metadata["connectorBuildOptions"]["baseImage"]
self.logger.info(f"Building connector from base image {base_image_name}")
return self.dagger_client.container(platform=platform).from_(base_image_name)

async def _create_builder_container(self, base_container: Container) -> Container:
"""Pre install the connector dependencies in a builder container.
If a python connectors depends on another local python connector, we need to mount its source in the container
This occurs for the source-file-secure connector for example, which depends on source-file

Args:
base_container (Container): The base container to use to build the connector.

Returns:
Container: The builder container, with installed dependencies.
"""
ONLY_PYTHON_BUILD_FILES = ["setup.py", "requirements.txt", "pyproject.toml", "poetry.lock"]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the list looks good generally for the cases we support, but is there any usecase for overriding these?

Also, do the dependencies on other local connectors still work with these whitelisted files? (assume so, but to confirm)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I cant see a usecase for overriding these.

Basically this is defining "all ways to define python dependencies" and making sure they are mounted to the container.

If python adds yet another way, we would just extend this list instead of override.

also all the local dependency logic is still there and exists in with_python_connector_installed

builder = await with_python_connector_installed(
self.context,
base_container,
str(self.context.connector.code_directory),
include=ONLY_PYTHON_BUILD_FILES,
)

return builder

async def _build_from_base_image(self, platform: Platform) -> Container:
"""Build the connector container using the base image defined in the metadata, in the connectorBuildOptions.baseImage field.

Returns:
Container: The connector container built from the base image.
"""
self.logger.info("Building connector from base image in metadata")
base = self._get_base_container(platform)
builder = await self._create_builder_container(base)

# The snake case name of the connector corresponds to the python package name of the connector
# We want to mount it to the container under PATH_TO_INTEGRATION_CODE/connector_snake_case_name
connector_snake_case_name = self.context.connector.technical_name.replace("-", "_")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

📚 Let explain why we use snake case

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done


connector_container = (
# copy python dependencies from builder to connector container
base.with_directory("/usr/local", builder.directory("/usr/local"))
.with_workdir(self.PATH_TO_INTEGRATION_CODE)
.with_file("main.py", (await self.context.get_connector_dir(include="main.py")).file("main.py"))
.with_directory(
connector_snake_case_name,
(await self.context.get_connector_dir(include=connector_snake_case_name)).directory(connector_snake_case_name),
)
.with_env_variable("AIRBYTE_ENTRYPOINT", " ".join(self.DEFAULT_ENTRYPOINT))
.with_entrypoint(self.DEFAULT_ENTRYPOINT)
.with_label("io.airbyte.version", self.context.connector.metadata["dockerImageTag"])
.with_label("io.airbyte.name", self.context.connector.metadata["dockerRepository"])
)
return connector_container

async def _build_from_dockerfile(self, platform: Platform) -> Container:
"""Build the connector container using its Dockerfile.

Returns:
Container: The connector container built from its Dockerfile.
"""
self.logger.warn(
"This connector is built from its Dockerfile. This is now deprecated. Please set connectorBuildOptions.baseImage metadata field to use or new build process."
)
container = self.dagger_client.container(platform=platform).build(await self.context.get_connector_dir())
container = await apply_python_development_overrides(self.context, container)
return container


async def run_connector_build(context: ConnectorContext) -> StepResult:
Expand Down
Loading