Skip to content

Commit

Permalink
Source DuckDB: ✨ Add MotherDuck support 🦆🦆 (#29428)
Browse files Browse the repository at this point in the history
Co-authored-by: Elena Felder <[email protected]>
  • Loading branch information
aaronsteers and elefeint authored Sep 19, 2023
1 parent 5c56ac1 commit 68380cb
Show file tree
Hide file tree
Showing 23 changed files with 1,492 additions and 149 deletions.
76 changes: 76 additions & 0 deletions .devcontainer/destination-duckdb/devcontainer.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
// For format details, see https://aka.ms/devcontainer.json. For config options, see the
// README at: https://github.com/devcontainers/templates/tree/main/src/python
{
"name": "DuckDB Destination Connector DevContainer (Python)",

// Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile
"image": "mcr.microsoft.com/devcontainers/python:0-3.9",

// Features to add to the dev container. More info: https://containers.dev/features.
"features": {
"ghcr.io/devcontainers-contrib/features/poetry:2": {},
"ghcr.io/devcontainers/features/docker-in-docker": {}
},
"overrideFeatureInstallOrder": [
// Deterministic order maximizes cache reuse
"ghcr.io/devcontainers-contrib/features/poetry",
"ghcr.io/devcontainers/features/docker-in-docker"
],

"workspaceFolder": "/workspaces/airbyte/airbyte-integrations/connectors/destination-duckdb",

// Configure tool-specific properties.
"customizations": {
"vscode": {
"extensions": [
// Python extensions:
"charliermarsh.ruff",
"matangover.mypy",
"ms-python.black",
"ms-python.python",
"ms-python.vscode-pylance",

// Toml support
"tamasfe.even-better-toml",

// Yaml and JSON Schema support:
"redhat.vscode-yaml",

// Contributing:
"GitHub.vscode-pull-request-github"
],
"settings": {
"extensions.ignoreRecommendations": true,
"git.openRepositoryInParentFolders": "always",
"python.defaultInterpreterPath": ".venv/bin/python",
"python.interpreter.infoVisibility": "always",
"python.terminal.activateEnvironment": true,
"python.testing.pytestEnabled": true,
"python.testing.cwd": "/workspaces/airbyte/airbyte-integrations/connectors/destination-duckdb",
"python.testing.pytestArgs": [
"--rootdir=/workspaces/airbyte/airbyte-integrations/connectors/destination-duckdb",
"."
]
}
}
},
"containerEnv": {
"POETRY_VIRTUALENVS_IN_PROJECT": "true"
},

// Mark the root directory as 'safe' for git.
"initializeCommand": "git config --add safe.directory /workspaces/airbyte",

// Use 'postCreateCommand' to run commands after the container is created.
// Post-create tasks:
// 1. Create a symlink directory.
// 2. Create symlinks for the devcontainer.json and docs markdown file.
// 3. Install the Python/Poetry dependencies.
"postCreateCommand": "mkdir -p ./.symlinks && echo '*' > ./.symlinks/.gitignore && ln -sf /workspaces/airbyte/.devcontainer/destination-duckdb/devcontainer.json ./.symlinks/devcontainer.json && ln -sf /workspaces/airbyte/docs/integrations/destinations/duckdb.md ./.symlinks/duckdb-docs.md && poetry install"

// Use 'forwardPorts' to make a list of ports inside the container available locally.
// "forwardPorts": [],

// Uncomment to connect as root instead. More info: https://aka.ms/dev-containers-non-root.
// "remoteUser": "root"
}
2 changes: 1 addition & 1 deletion airbyte-ci/connectors/connector_ops/connector_ops/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -251,7 +251,7 @@ def metadata(self) -> Optional[dict]:
def language(self) -> ConnectorLanguage:
if Path(self.code_directory / self.technical_name.replace("-", "_") / "manifest.yaml").is_file():
return ConnectorLanguage.LOW_CODE
if Path(self.code_directory / "setup.py").is_file():
if Path(self.code_directory / "setup.py").is_file() or Path(self.code_directory / "pyproject.toml").is_file():
return ConnectorLanguage.PYTHON
try:
with open(self.code_directory / "Dockerfile") as dockerfile:
Expand Down
2 changes: 1 addition & 1 deletion airbyte-ci/connectors/connector_ops/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"

[tool.poetry]
name = "connector_ops"
version = "0.2.2"
version = "0.2.3"
description = "Packaged maintained by the connector operations team to perform CI for connectors"
authors = ["Airbyte <[email protected]>"]

Expand Down
1 change: 1 addition & 0 deletions airbyte-ci/connectors/pipelines/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -407,6 +407,7 @@ This command runs the Python tests for a airbyte-ci poetry package.
## Changelog
| Version | PR | Description |
|---------| --------------------------------------------------------- |-----------------------------------------------------------------------------------------------------------|
| 1.4.0 | [#30330](https://github.com/airbytehq/airbyte/pull/30330) | Add support for pyproject.toml as the prefered entry point for a connector package |
| 1.3.0 | [#30461](https://github.com/airbytehq/airbyte/pull/30461) | Add `--use-local-cdk` flag to all connectors commands |
| 1.2.3 | [#30477](https://github.com/airbytehq/airbyte/pull/30477) | Fix a test regression introduced the previous version. |
| 1.2.2 | [#30438](https://github.com/airbytehq/airbyte/pull/30438) | Add workaround to always stream logs properly with --is-local. |
Expand Down
60 changes: 45 additions & 15 deletions airbyte-ci/connectors/pipelines/pipelines/actions/environments.py
Original file line number Diff line number Diff line change
Expand Up @@ -305,6 +305,42 @@ async def find_local_dependencies_in_pyproject_toml(
return local_dependency_paths


def _install_python_dependencies_from_setup_py(
container: Container,
additional_dependency_groups: Optional[List] = None,
) -> Container:
install_connector_package_cmd = ["python", "-m", "pip", "install", "."]
container = container.with_exec(install_connector_package_cmd)

if additional_dependency_groups:
# e.g. .[dev,tests]
group_string = f".[{','.join(additional_dependency_groups)}]"
group_install_cmd = ["python", "-m", "pip", "install", group_string]

container = container.with_exec(group_install_cmd)

return container


def _install_python_dependencies_from_requirements_txt(container: Container) -> Container:
install_requirements_cmd = ["python", "-m", "pip", "install", "-r", "requirements.txt"]
return container.with_exec(install_requirements_cmd)


def _install_python_dependencies_from_poetry(
container: Container,
additional_dependency_groups: Optional[List] = None,
) -> Container:
pip_install_poetry_cmd = ["python", "-m", "pip", "install", "poetry"]
poetry_disable_virtual_env_cmd = ["poetry", "config", "virtualenvs.create", "false"]
poetry_install_no_venv_cmd = ["poetry", "install", "--no-root"]
if additional_dependency_groups:
for group in additional_dependency_groups:
poetry_install_no_venv_cmd += ["--with", group]

return container.with_exec(pip_install_poetry_cmd).with_exec(poetry_disable_virtual_env_cmd).with_exec(poetry_install_no_venv_cmd)


async def with_installed_python_package(
context: PipelineContext,
python_environment: Container,
Expand All @@ -324,29 +360,23 @@ async def with_installed_python_package(
Returns:
Container: A python environment container with the python package installed.
"""
install_requirements_cmd = ["python", "-m", "pip", "install", "-r", "requirements.txt"]
install_connector_package_cmd = ["python", "-m", "pip", "install", "."]

container = with_python_package(context, python_environment, package_source_code_path, exclude=exclude)

local_dependencies = await find_local_python_dependencies(context, package_source_code_path)

for dependency_directory in local_dependencies:
container = container.with_mounted_directory("/" + dependency_directory, context.get_repo_dir(dependency_directory))

has_setup_py, has_requirements_txt = await check_path_in_workdir(container, "setup.py"), await check_path_in_workdir(
container, "requirements.txt"
)
has_setup_py = await check_path_in_workdir(container, "setup.py")
has_requirements_txt = await check_path_in_workdir(container, "requirements.txt")
has_pyproject_toml = await check_path_in_workdir(container, "pyproject.toml")

if has_setup_py:
container = container.with_exec(install_connector_package_cmd)
if has_requirements_txt:
container = container.with_exec(install_requirements_cmd)

if additional_dependency_groups:
container = container.with_exec(
install_connector_package_cmd[:-1] + [install_connector_package_cmd[-1] + f"[{','.join(additional_dependency_groups)}]"]
)
if has_pyproject_toml:
container = _install_python_dependencies_from_poetry(container)
elif has_setup_py:
container = _install_python_dependencies_from_setup_py(container, additional_dependency_groups)
elif has_requirements_txt:
container = _install_python_dependencies_from_requirements_txt(container)

return container

Expand Down
2 changes: 1 addition & 1 deletion airbyte-ci/connectors/pipelines/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"

[tool.poetry]
name = "pipelines"
version = "1.3.0"
version = "1.4.0"
description = "Packaged maintained by the connector operations team to perform CI for connectors' pipelines"
authors = ["Airbyte <[email protected]>"]

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,11 @@


@pytest.fixture
def python_connector() -> Connector:
return Connector("source-openweather")
def python_connectors() -> Connector:
return [
Connector("source-openweather"), # setup.py based
Connector("destination-duckdb"), # pyproject.toml based
]


@pytest.fixture
Expand All @@ -29,11 +32,12 @@ def context(dagger_client):
return context


async def test_with_installed_python_package(context, python_connector):
python_environment = context.dagger_client.container().from_("python:3.10")
installed_connector_package = await environments.with_installed_python_package(
context,
python_environment,
str(python_connector.code_directory),
)
await installed_connector_package.with_exec(["python", "main.py", "spec"])
async def test_with_installed_python_package(context, python_connectors):
for python_connector in python_connectors:
python_environment = context.dagger_client.container().from_("python:3.10")
installed_connector_package = await environments.with_installed_python_package(
context,
python_environment,
str(python_connector.code_directory),
)
await installed_connector_package.with_exec(["python", "main.py", "spec"])
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,6 @@
!main.py
!destination_duckdb
!setup.py
!pyproject.toml
!poetry.lock
!README.md
2 changes: 2 additions & 0 deletions airbyte-integrations/connectors/destination-duckdb/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# Ignore symlinks created within the dev container
.symlinks
38 changes: 15 additions & 23 deletions airbyte-integrations/connectors/destination-duckdb/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,42 +1,34 @@
FROM python:3.9.11 as base
FROM python:3.9.11
# FROM python:3.9.11-alpine3.15 as base
# switched from alpine as there were tons of errors (in case you want to switch back to alpine)
# - https://stackoverflow.com/a/57485724/5246670
# - numpy error: https://stackoverflow.com/a/22411624/5246670
# - libstdc++ https://github.com/amancevice/docker-pandas/issues/12#issuecomment-717215043
# - musl-dev linux-headers g++ because of: https://stackoverflow.com/a/40407099/5246670

# build and load all requirements
FROM base as builder
WORKDIR /airbyte/integration_code

# upgrade pip to the latest version
# Upgrade system packages and install Poetry
RUN apt-get update && apt-get -y upgrade \
&& pip install --upgrade pip
&& pip install --upgrade pip \
&& pip install poetry

COPY setup.py ./
# install necessary packages to a temporary folder
RUN pip install --prefix=/install .
# build a clean environment
FROM base
# RUN conda install -c conda-forge python-duckdb
WORKDIR /airbyte/integration_code

# copy all loaded and built libraries to a pure basic image
COPY --from=builder /install /usr/local
# add default timezone settings
COPY --from=builder /usr/share/zoneinfo/Etc/UTC /etc/localtime
RUN echo "Etc/UTC" > /etc/timezone
# Copy only poetry.lock* in case it doesn't exist
COPY pyproject.toml poetry.lock* ./
RUN poetry config virtualenvs.create false \
&& poetry install --no-root --no-dev

#adding duckdb manually (outside of setup.py - lots of errors)
RUN pip install duckdb

# copy payload code only
# Copy code
COPY main.py ./
COPY destination_duckdb ./destination_duckdb

# Timezone setup
COPY --from=python:3.9.11 /usr/share/zoneinfo/Etc/UTC /etc/localtime
RUN echo "Etc/UTC" > /etc/timezone

# Entry point
ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py"
ENTRYPOINT ["python", "/airbyte/integration_code/main.py"]

LABEL io.airbyte.version=0.1.0
LABEL io.airbyte.version=0.2.0
LABEL io.airbyte.name=airbyte/destination-duckdb
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
connector_image: airbyte/destination-duckdb:dev
acceptance_tests:
spec:
tests:
- spec_path: integration_tests/spec.json
config_path: "integration_tests/config.json"
connection:
tests:
- config_path: "integration_tests/config.json"
status: "succeed"

This file was deleted.

Loading

0 comments on commit 68380cb

Please sign in to comment.