From 2069bbd356b614e8a5bf5be4e7a9cb1be38733b6 Mon Sep 17 00:00:00 2001 From: Joe Reuter Date: Thu, 18 Jan 2024 12:15:09 +0100 Subject: [PATCH] review comments --- .../actions/run-dagger-pipeline/action.yml | 4 + .github/workflows/publish_connectors.yml | 3 +- .../airbyte_ci/connectors/publish/pipeline.py | 52 +++-- .../pipelines/airbyte_ci/poetry/commands.py | 69 +----- .../pipelines/airbyte_ci/poetry/pipeline.py | 209 ------------------ .../airbyte_ci/poetry/publish/__init__.py | 3 + .../airbyte_ci/poetry/publish/commands.py | 81 +++++++ .../airbyte_ci/poetry/publish/context.py | 96 ++++++++ .../airbyte_ci/poetry/publish/pipeline.py | 153 +++++++++++++ .../pipelines/airbyte_ci/poetry/utils.py | 3 +- .../connectors/pipelines/pipelines/consts.py | 2 + .../tests/test_poetry/test_poetry_publish.py | 92 ++++++++ .../pipelines/tests/test_publish.py | 68 ++++++ 13 files changed, 540 insertions(+), 295 deletions(-) delete mode 100644 airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/poetry/pipeline.py create mode 100644 airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/poetry/publish/__init__.py create mode 100644 airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/poetry/publish/commands.py create mode 100644 airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/poetry/publish/context.py create mode 100644 airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/poetry/publish/pipeline.py create mode 100644 airbyte-ci/connectors/pipelines/tests/test_poetry/test_poetry_publish.py diff --git a/.github/actions/run-dagger-pipeline/action.yml b/.github/actions/run-dagger-pipeline/action.yml index 8d28ea9e788c..0fcebdfe833b 100644 --- a/.github/actions/run-dagger-pipeline/action.yml +++ b/.github/actions/run-dagger-pipeline/action.yml @@ -83,6 +83,9 @@ inputs: description: "URL to airbyte-ci binary" required: false default: https://connectors.airbyte.com/airbyte-ci/releases/ubuntu/latest/airbyte-ci + pypi_token: + description: "PyPI API token to publish to PyPI" + required: false runs: using: "composite" @@ -182,3 +185,4 @@ runs: CI: "True" TAILSCALE_AUTH_KEY: ${{ inputs.tailscale_auth_key }} DOCKER_REGISTRY_MIRROR_URL: ${{ inputs.docker_registry_mirror_url }} + PYPI_TOKEN: ${{ inputs.pypi_token }} diff --git a/.github/workflows/publish_connectors.yml b/.github/workflows/publish_connectors.yml index 21bce68f1e40..db44bfd767ac 100644 --- a/.github/workflows/publish_connectors.yml +++ b/.github/workflows/publish_connectors.yml @@ -84,8 +84,7 @@ jobs: s3_build_cache_secret_key: ${{ secrets.SELF_RUNNER_AWS_SECRET_ACCESS_KEY }} tailscale_auth_key: ${{ secrets.TAILSCALE_AUTH_KEY }} subcommand: "connectors ${{ github.event.inputs.connectors-options }} publish ${{ github.event.inputs.publish-options }}" - env: - PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }} + pypi_token: ${{ secrets.PYPI_TOKEN }} set-instatus-incident-on-failure: name: Create Instatus Incident on Failure diff --git a/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/connectors/publish/pipeline.py b/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/connectors/publish/pipeline.py index 2772eff210fa..492ac01882bf 100644 --- a/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/connectors/publish/pipeline.py +++ b/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/connectors/publish/pipeline.py @@ -14,7 +14,7 @@ from pipelines.airbyte_ci.connectors.publish.context import PublishConnectorContext from pipelines.airbyte_ci.connectors.reports import ConnectorReport from pipelines.airbyte_ci.metadata.pipeline import MetadataUpload, MetadataValidation -from pipelines.airbyte_ci.poetry.pipeline import PublishToPyPI, PyPIPublishContext +from pipelines.airbyte_ci.poetry.publish.pipeline import PublishToPyPI, PyPIPublishContext from pipelines.airbyte_ci.poetry.utils import is_package_published from pipelines.dagger.actions.remote_storage import upload_to_gcs from pipelines.dagger.actions.system import docker @@ -54,12 +54,12 @@ async def _run(self) -> StepResult: return StepResult(self, status=StepStatus.SUCCESS, stdout=f"No manifest found for {self.context.docker_image}.") -class CheckPypiPackageExists(Step): +class CheckPypiPackageDoesNotExist(Step): context: PyPIPublishContext title = "Check if the connector is published on pypi" async def _run(self) -> StepResult: - is_published = is_package_published(self.context.package_name, self.context.version, self.context.test_pypi) + is_published = is_package_published(self.context.package_name, self.context.version, self.context.registry) if is_published: return StepResult( self, status=StepStatus.SKIPPED, stderr=f"{self.context.package_name} already exists in version {self.context.version}." @@ -291,21 +291,10 @@ def create_connector_report(results: List[StepResult]) -> ConnectorReport: metadata_upload_results = await metadata_upload_step.run() results.append(metadata_upload_results) - # Try to convert the context to a PyPIPublishContext. If it returns None, it means we don't need to publish to pypi. - pypi_context = await PyPIPublishContext.from_connector_context(context) - if pypi_context: - check_pypi_package_exists_results = await CheckPypiPackageExists(pypi_context).run() - results.append(check_pypi_package_exists_results) - if check_pypi_package_exists_results.status is StepStatus.SKIPPED: - context.logger.info("The connector version is already published on pypi.") - elif check_pypi_package_exists_results.status is StepStatus.SUCCESS: - context.logger.info("The connector version is not published on pypi. Let's build and publish it.") - publish_to_pypi_results = await PublishToPyPI(pypi_context).run() - results.append(publish_to_pypi_results) - if publish_to_pypi_results.status is StepStatus.FAILURE: - return create_connector_report(results) - elif check_pypi_package_exists_results.status is StepStatus.FAILURE: - return create_connector_report(results) + pypi_steps, terminate_early = await _run_pypi_publish_pipeline(context) + results.extend(pypi_steps) + if terminate_early: + return create_connector_report(results) # Exit early if the connector image already exists or has failed to build if check_connector_image_results.status is not StepStatus.SUCCESS: @@ -346,6 +335,33 @@ def create_connector_report(results: List[StepResult]) -> ConnectorReport: return connector_report +async def _run_pypi_publish_pipeline(context: PublishConnectorContext) -> Tuple[List[StepResult], bool]: + """ + Run the pypi publish pipeline for a single connector. + Return the results of the steps and a boolean indicating whether there was an error and the pipeline should be stopped. + """ + results = [] + # Try to convert the context to a PyPIPublishContext. If it returns None, it means we don't need to publish to pypi. + pypi_context = await PyPIPublishContext.from_publish_connector_context(context) + if not pypi_context: + return results, False + + check_pypi_package_exists_results = await CheckPypiPackageDoesNotExist(pypi_context).run() + results.append(check_pypi_package_exists_results) + if check_pypi_package_exists_results.status is StepStatus.SKIPPED: + context.logger.info("The connector version is already published on pypi.") + elif check_pypi_package_exists_results.status is StepStatus.SUCCESS: + context.logger.info("The connector version is not published on pypi. Let's build and publish it.") + publish_to_pypi_results = await PublishToPyPI(pypi_context).run() + results.append(publish_to_pypi_results) + if publish_to_pypi_results.status is StepStatus.FAILURE: + return results, True + elif check_pypi_package_exists_results.status is StepStatus.FAILURE: + return results, True + + return results, False + + def reorder_contexts(contexts: List[PublishConnectorContext]) -> List[PublishConnectorContext]: """Reorder contexts so that the ones that are for strict-encrypt/secure connectors come first. The metadata upload on publish checks if the the connectors referenced in the metadata file are already published to DockerHub. diff --git a/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/poetry/commands.py b/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/poetry/commands.py index 796ead980110..92e70d6651e0 100644 --- a/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/poetry/commands.py +++ b/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/poetry/commands.py @@ -16,16 +16,19 @@ from pipelines.airbyte_ci.format.format_command import FormatCommand from pipelines.cli.click_decorators import click_ignore_unused_kwargs, click_merge_args_into_context_obj from pipelines.cli.dagger_pipeline_command import DaggerPipelineCommand +from pipelines.cli.lazy_group import LazyGroup from pipelines.helpers.cli import LogOptions, invoke_commands_concurrently, invoke_commands_sequentially, log_command_results from pipelines.models.contexts.click_pipeline_context import ClickPipelineContext, pass_pipeline_context from pipelines.models.steps import StepStatus -from .pipeline import PublishToPyPI, PyPIPublishContext - @click.group( name="poetry", help="Commands related to running poetry commands.", + cls=LazyGroup, + lazy_subcommands={ + "publish": "pipelines.airbyte_ci.poetry.publish.commands.publish", + }, ) @click.option( "--package-path", @@ -33,70 +36,8 @@ type=click.STRING, required=True, ) -@click.option("--docker-image", help="The docker image to run the command in.", type=click.STRING, default="mwalbeck/python-poetry") @click_merge_args_into_context_obj @pass_pipeline_context @click_ignore_unused_kwargs async def poetry(pipeline_context: ClickPipelineContext) -> None: pass - - -@poetry.command(cls=DaggerPipelineCommand, name="publish", help="Publish a Python package to PyPI.") -@click.option( - "--pypi-token", - help="Access token", - type=click.STRING, - required=True, - envvar="PYPI_TOKEN", -) -@click.option( - "--test-pypi", - help="Whether to publish to test.pypi.org instead of pypi.org.", - type=click.BOOL, - is_flag=True, - default=False, -) -@click.option( - "--publish-name", - help="The name of the package to publish. If not set, the name will be inferred from the pyproject.toml file of the package.", - type=click.STRING, -) -@click.option( - "--publish-version", - help="The version of the package to publish. If not set, the version will be inferred from the pyproject.toml file of the package.", - type=click.STRING, -) -@pass_pipeline_context -@click.pass_context -async def publish( - ctx: click.Context, - click_pipeline_context: ClickPipelineContext, - pypi_token: str, - test_pypi: bool, - publish_name: Optional[str], - publish_version: Optional[str], -) -> None: - context = PyPIPublishContext( - is_local=ctx.obj["is_local"], - git_branch=ctx.obj["git_branch"], - git_revision=ctx.obj["git_revision"], - ci_report_bucket=ctx.obj["ci_report_bucket_name"], - report_output_prefix=ctx.obj["report_output_prefix"], - gha_workflow_run_url=ctx.obj.get("gha_workflow_run_url"), - dagger_logs_url=ctx.obj.get("dagger_logs_url"), - pipeline_start_timestamp=ctx.obj.get("pipeline_start_timestamp"), - ci_context=ctx.obj.get("ci_context"), - ci_gcs_credentials=ctx.obj["ci_gcs_credentials"], - pypi_token=pypi_token, - test_pypi=test_pypi, - package_path=ctx.obj["package_path"], - build_docker_image=ctx.obj["docker_image"], - package_name=publish_name, - version=publish_version, - ) - dagger_client = await click_pipeline_context.get_dagger_client(pipeline_name=f"Publish {ctx.obj['package_path']} to PyPI") - context.dagger_client = dagger_client - - await PublishToPyPI(context).run() - - return True diff --git a/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/poetry/pipeline.py b/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/poetry/pipeline.py deleted file mode 100644 index 40e6c6a6d29b..000000000000 --- a/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/poetry/pipeline.py +++ /dev/null @@ -1,209 +0,0 @@ -# -# Copyright (c) 2023 Airbyte, Inc., all rights reserved. -# - -import configparser -import io -import os -from textwrap import dedent -from typing import Optional - -import tomli -import tomli_w -import yaml -from pipelines.airbyte_ci.connectors.context import ConnectorContext, PipelineContext -from pipelines.models.contexts.pipeline_context import PipelineContext -from pipelines.models.steps import Step, StepResult - - -class PyPIPublishContext(PipelineContext): - def __init__( - self, - pypi_token: str, - test_pypi: bool, - package_path: str, - build_docker_image: str, - ci_report_bucket: str, - report_output_prefix: str, - is_local: bool, - git_branch: bool, - git_revision: bool, - gha_workflow_run_url: Optional[str] = None, - dagger_logs_url: Optional[str] = None, - pipeline_start_timestamp: Optional[int] = None, - ci_context: Optional[str] = None, - ci_gcs_credentials: str = None, - package_name: Optional[str] = None, - version: Optional[str] = None, - ): - self.pypi_token = pypi_token - self.test_pypi = test_pypi - self.package_path = package_path - self.package_name = package_name - self.version = version - self.build_docker_image = build_docker_image - - pipeline_name = f"Publish PyPI {package_path}" - - super().__init__( - pipeline_name=pipeline_name, - report_output_prefix=report_output_prefix, - ci_report_bucket=ci_report_bucket, - is_local=is_local, - git_branch=git_branch, - git_revision=git_revision, - gha_workflow_run_url=gha_workflow_run_url, - dagger_logs_url=dagger_logs_url, - pipeline_start_timestamp=pipeline_start_timestamp, - ci_context=ci_context, - ci_gcs_credentials=ci_gcs_credentials, - ) - - @staticmethod - async def from_connector_context(connector_context: ConnectorContext) -> Optional["PyPIPublishContext"]: - """ - Create a PyPIPublishContext from a ConnectorContext. - - The metadata of the connector is read from the current workdir to capture changes that are not yet published. - If pypi is not enabled, this will return None. - """ - - current_metadata = yaml.safe_load( - await connector_context.get_repo_file(str(connector_context.connector.metadata_file_path)).contents() - )["data"] - print(current_metadata) - if ( - not "remoteRegistries" in current_metadata - or not "pypi" in current_metadata["remoteRegistries"] - or not current_metadata["remoteRegistries"]["pypi"]["enabled"] - ): - return None - - if "connectorBuildOptions" in current_metadata and "baseImage" in current_metadata["connectorBuildOptions"]: - build_docker_image = current_metadata["connectorBuildOptions"]["baseImage"] - else: - build_docker_image = "mwalbeck/python-poetry" - pypi_context = PyPIPublishContext( - pypi_token=os.environ["PYPI_TOKEN"], - test_pypi=True, # TODO: Go live - package_path=str(connector_context.connector.code_directory), - package_name=current_metadata["remoteRegistries"]["pypi"]["packageName"], - version=current_metadata["dockerImageTag"], - build_docker_image=build_docker_image, - ci_report_bucket=connector_context.ci_report_bucket, - report_output_prefix=connector_context.report_output_prefix, - is_local=connector_context.is_local, - git_branch=connector_context.git_branch, - git_revision=connector_context.git_revision, - gha_workflow_run_url=connector_context.gha_workflow_run_url, - dagger_logs_url=connector_context.dagger_logs_url, - pipeline_start_timestamp=connector_context.pipeline_start_timestamp, - ci_context=connector_context.ci_context, - ci_gcs_credentials=connector_context.ci_gcs_credentials, - ) - pypi_context.dagger_client = connector_context.dagger_client - return pypi_context - - -class PublishToPyPI(Step): - context: PyPIPublishContext - title = "Publish package to PyPI" - - async def _run(self) -> StepResult: - dir_to_publish = await self.context.get_repo_dir(self.context.package_path) - - files = await dir_to_publish.entries() - is_poetry_package = "pyproject.toml" in files - is_pip_package = "setup.py" in files - - # Try to infer package name and version from the pyproject.toml file. If it is not present, we need to have the package name and version set - # Setup.py packages need to set package name and version as parameter - if not self.context.package_name or not self.context.version: - if not is_poetry_package: - return self.skip( - "Connector does not have a pyproject.toml file and version and package name is not set otherwise, skipping." - ) - - pyproject_toml = dir_to_publish.file("pyproject.toml") - pyproject_toml_content = await pyproject_toml.contents() - contents = tomli.loads(pyproject_toml_content) - if ( - "tool" not in contents - or "poetry" not in contents["tool"] - or "name" not in contents["tool"]["poetry"] - or "version" not in contents["tool"]["poetry"] - ): - return self.skip( - "Connector does not have a pyproject.toml file which specifies package name and version and they are not set otherwise, skipping." - ) - - self.context.package_name = contents["tool"]["poetry"]["name"] - self.context.version = contents["tool"]["poetry"]["version"] - - print( - f"Uploading package {self.context.package_name} version {self.context.version} to {'testpypi' if self.context.test_pypi else 'pypi'}..." - ) - - if is_pip_package: - # legacy publish logic - pypi_username = self.context.dagger_client.set_secret("pypi_username", "__token__") - pypi_password = self.context.dagger_client.set_secret("pypi_password", f"pypi-{self.context.pypi_token}") - metadata = { - "name": self.context.package_name, - "version": self.context.version, - "author": "Airbyte", - "author_email": "contact@airbyte.io", - } - if "README.md" in files: - metadata["long_description"] = await dir_to_publish.file("README.md").contents() - metadata["long_description_content_type"] = "text/markdown" - - config = configparser.ConfigParser() - config["metadata"] = metadata - - setup_cfg_io = io.StringIO() - config.write(setup_cfg_io) - setup_cfg = setup_cfg_io.getvalue() - - twine_upload = ( - self.context.dagger_client.container() - .from_(self.context.build_docker_image) - .with_exec(["apt-get", "update"]) - .with_exec(["apt-get", "install", "-y", "twine"]) - .with_directory("package", dir_to_publish) - .with_workdir("package") - # clear out setup.py metadata so setup.cfg is used - .with_exec(["sed", "-i", "/name=/d; /author=/d; /author_email=/d; /version=/d", "setup.py"]) - .with_new_file("setup.cfg", contents=setup_cfg) - .with_exec(["pip", "install", "--upgrade", "setuptools", "wheel"]) - .with_exec(["python", "setup.py", "sdist", "bdist_wheel"]) - .with_secret_variable("TWINE_USERNAME", pypi_username) - .with_secret_variable("TWINE_PASSWORD", pypi_password) - .with_exec(["twine", "upload", "--verbose", "--repository", "testpypi" if self.context.test_pypi else "pypi", "dist/*"]) - ) - - return await self.get_step_result(twine_upload) - else: - # poetry publish logic - pypi_token = self.context.dagger_client.set_secret("pypi_token", f"pypi-{self.context.pypi_token}") - pyproject_toml = dir_to_publish.file("pyproject.toml") - pyproject_toml_content = await pyproject_toml.contents() - contents = tomli.loads(pyproject_toml_content) - # make sure package name and version are set to the configured one - contents["tool"]["poetry"]["name"] = self.context.package_name - contents["tool"]["poetry"]["version"] = self.context.version - poetry_publish = ( - self.context.dagger_client.container() - .from_(self.context.build_docker_image) - .with_secret_variable("PYPI_TOKEN", pypi_token) - .with_directory("package", dir_to_publish) - .with_workdir("package") - .with_new_file("pyproject.toml", contents=tomli_w.dumps(contents)) - .with_exec(["poetry", "config", "repositories.testpypi", "https://test.pypi.org/legacy/"]) - .with_exec( - ["sh", "-c", f"poetry config {'pypi-token.testpypi' if self.context.test_pypi else 'pypi-token.pypi'} $PYPI_TOKEN"] - ) - .with_exec(["poetry", "publish", "--build", "--repository", "testpypi" if self.context.test_pypi else "pypi"]) - ) - - return await self.get_step_result(poetry_publish) diff --git a/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/poetry/publish/__init__.py b/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/poetry/publish/__init__.py new file mode 100644 index 000000000000..c941b3045795 --- /dev/null +++ b/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/poetry/publish/__init__.py @@ -0,0 +1,3 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# diff --git a/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/poetry/publish/commands.py b/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/poetry/publish/commands.py new file mode 100644 index 000000000000..5953830192df --- /dev/null +++ b/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/poetry/publish/commands.py @@ -0,0 +1,81 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + +""" +Module exposing the format commands. +""" +from __future__ import annotations + +from typing import Optional + +import asyncclick as click +from connector_ops.utils import CONNECTOR_PATH_PREFIX +from pipelines.cli.dagger_pipeline_command import DaggerPipelineCommand +from pipelines.models.contexts.click_pipeline_context import ClickPipelineContext, pass_pipeline_context + +from .context import PyPIPublishContext +from .pipeline import PublishToPyPI + + +@click.command(cls=DaggerPipelineCommand, name="publish", help="Publish a Python package to PyPI.") +@click.option( + "--pypi-token", + help="Access token", + type=click.STRING, + required=True, + envvar="PYPI_TOKEN", +) +@click.option( + "--registry-url", + help="Which registry to publish to. If not set, the default pypi is used. For test pypi, use https://test.pypi.org/legacy/", + type=click.STRING, + default="https://pypi.org/simple", +) +@click.option( + "--publish-name", + help="The name of the package to publish. If not set, the name will be inferred from the pyproject.toml file of the package.", + type=click.STRING, +) +@click.option( + "--publish-version", + help="The version of the package to publish. If not set, the version will be inferred from the pyproject.toml file of the package.", + type=click.STRING, +) +@pass_pipeline_context +@click.pass_context +async def publish( + ctx: click.Context, + click_pipeline_context: ClickPipelineContext, + pypi_token: str, + registry_url: bool, + publish_name: Optional[str], + publish_version: Optional[str], +) -> None: + context = PyPIPublishContext( + is_local=ctx.obj["is_local"], + git_branch=ctx.obj["git_branch"], + git_revision=ctx.obj["git_revision"], + ci_report_bucket=ctx.obj["ci_report_bucket_name"], + report_output_prefix=ctx.obj["report_output_prefix"], + gha_workflow_run_url=ctx.obj.get("gha_workflow_run_url"), + dagger_logs_url=ctx.obj.get("dagger_logs_url"), + pipeline_start_timestamp=ctx.obj.get("pipeline_start_timestamp"), + ci_context=ctx.obj.get("ci_context"), + ci_gcs_credentials=ctx.obj["ci_gcs_credentials"], + pypi_token=pypi_token, + registry=registry_url, + package_path=ctx.obj["package_path"], + package_name=publish_name, + version=publish_version, + ) + + if context.package_path.startswith(CONNECTOR_PATH_PREFIX): + context.logger.warning("It looks like you are trying to publish a connector. Please use the `connectors` command group instead.") + + dagger_client = await click_pipeline_context.get_dagger_client(pipeline_name=f"Publish {ctx.obj['package_path']} to PyPI") + context.dagger_client = dagger_client + + await PublishToPyPI(context).run() + + return True diff --git a/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/poetry/publish/context.py b/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/poetry/publish/context.py new file mode 100644 index 000000000000..676ca760daf9 --- /dev/null +++ b/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/poetry/publish/context.py @@ -0,0 +1,96 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + +import os +from datetime import date +from typing import Optional + +from pipelines.airbyte_ci.connectors.context import PipelineContext +from pipelines.airbyte_ci.connectors.publish.context import PublishConnectorContext +from pipelines.models.contexts.pipeline_context import PipelineContext + + +class PyPIPublishContext(PipelineContext): + def __init__( + self, + pypi_token: str, + package_path: str, + ci_report_bucket: str, + report_output_prefix: str, + is_local: bool, + git_branch: bool, + git_revision: bool, + registry: Optional[str] = None, + gha_workflow_run_url: Optional[str] = None, + dagger_logs_url: Optional[str] = None, + pipeline_start_timestamp: Optional[int] = None, + ci_context: Optional[str] = None, + ci_gcs_credentials: str = None, + package_name: Optional[str] = None, + version: Optional[str] = None, + ): + self.pypi_token = pypi_token + self.registry = registry or "https://pypi.org/simple" + self.package_path = package_path + self.package_name = package_name + self.version = version + + pipeline_name = f"Publish PyPI {package_path}" + + super().__init__( + pipeline_name=pipeline_name, + report_output_prefix=report_output_prefix, + ci_report_bucket=ci_report_bucket, + is_local=is_local, + git_branch=git_branch, + git_revision=git_revision, + gha_workflow_run_url=gha_workflow_run_url, + dagger_logs_url=dagger_logs_url, + pipeline_start_timestamp=pipeline_start_timestamp, + ci_context=ci_context, + ci_gcs_credentials=ci_gcs_credentials, + ) + + @staticmethod + async def from_publish_connector_context(connector_context: PublishConnectorContext) -> Optional["PyPIPublishContext"]: + """ + Create a PyPIPublishContext from a ConnectorContext. + + The metadata of the connector is read from the current workdir to capture changes that are not yet published. + If pypi is not enabled, this will return None. + """ + + current_metadata = connector_context.connector.metadata + if ( + not "remoteRegistries" in current_metadata + or not "pypi" in current_metadata["remoteRegistries"] + or not current_metadata["remoteRegistries"]["pypi"]["enabled"] + ): + return None + + version = current_metadata["dockerImageTag"] + if connector_context.pre_release: + # use current date as pre-release version + rc_tag = date.today().strftime("%Y-%m-%d-%H-%M") + version = f"{version}rc{rc_tag}" + + pypi_context = PyPIPublishContext( + pypi_token=os.environ["PYPI_TOKEN"], + registry="https://test.pypi.org/", # TODO: go live + package_path=str(connector_context.connector.code_directory), + package_name=current_metadata["remoteRegistries"]["pypi"]["packageName"], + version=current_metadata["dockerImageTag"], + ci_report_bucket=connector_context.ci_report_bucket, + report_output_prefix=connector_context.report_output_prefix, + is_local=connector_context.is_local, + git_branch=connector_context.git_branch, + git_revision=connector_context.git_revision, + gha_workflow_run_url=connector_context.gha_workflow_run_url, + dagger_logs_url=connector_context.dagger_logs_url, + pipeline_start_timestamp=connector_context.pipeline_start_timestamp, + ci_context=connector_context.ci_context, + ci_gcs_credentials=connector_context.ci_gcs_credentials, + ) + pypi_context.dagger_client = connector_context.dagger_client + return pypi_context diff --git a/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/poetry/publish/pipeline.py b/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/poetry/publish/pipeline.py new file mode 100644 index 000000000000..ed7651d7cf02 --- /dev/null +++ b/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/poetry/publish/pipeline.py @@ -0,0 +1,153 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + +import configparser +import io +import uuid +from enum import Enum, auto +from typing import Optional, Tuple + +import tomli +import tomli_w +from dagger import Directory +from pipelines.airbyte_ci.poetry.publish.context import PyPIPublishContext +from pipelines.consts import PYPROJECT_TOML_FILE_PATH, SETUP_PY_FILE_PATH +from pipelines.dagger.actions.python.poetry import with_poetry +from pipelines.helpers.utils import sh_dash_c +from pipelines.models.steps import Step, StepResult + + +class PackageType(Enum): + POETRY = auto() + PIP = auto() + NONE = auto() + + +class PublishToPyPI(Step): + context: PyPIPublishContext + title = "Publish package to PyPI" + + def _get_base_container(self): + return with_poetry(self.context) + + async def _get_package_metadata_from_pyproject_toml(self, dir_to_publish: Directory) -> Optional[Tuple[str, str]]: + pyproject_toml = dir_to_publish.file(PYPROJECT_TOML_FILE_PATH) + pyproject_toml_content = await pyproject_toml.contents() + contents = tomli.loads(pyproject_toml_content) + if ( + "tool" not in contents + or "poetry" not in contents["tool"] + or "name" not in contents["tool"]["poetry"] + or "version" not in contents["tool"]["poetry"] + ): + return None + + return (contents["tool"]["poetry"]["name"], contents["tool"]["poetry"]["version"]) + + async def _get_package_type(self, dir_to_publish: Directory) -> PackageType: + files = await dir_to_publish.entries() + has_pyproject_toml = PYPROJECT_TOML_FILE_PATH in files + has_setup_py = SETUP_PY_FILE_PATH in files + if has_pyproject_toml: + return PackageType.POETRY + elif has_setup_py: + return PackageType.PIP + else: + return PackageType.NONE + + async def _run(self) -> StepResult: + dir_to_publish = await self.context.get_repo_dir(self.context.package_path) + package_type = await self._get_package_type(dir_to_publish) + + if package_type == PackageType.NONE: + return self.skip("Connector does not have a pyproject.toml file or setup.py file, skipping.") + + # Try to infer package name and version from the pyproject.toml file. If it is not present, we need to have the package name and version set + # Setup.py packages need to set package name and version as parameter + if not self.context.package_name or not self.context.version: + if not package_type == PackageType.POETRY: + return self.skip( + "Connector does not have a pyproject.toml file and version and package name is not set otherwise, skipping." + ) + + package_metadata = await self._get_package_metadata_from_pyproject_toml(dir_to_publish) + + if not package_metadata: + return self.skip( + "Connector does not have a pyproject.toml file which specifies package name and version and they are not set otherwise, skipping." + ) + + self.context.package_name = package_metadata[0] + self.context.version = package_metadata[1] + + self.logger.info(f"Uploading package {self.context.package_name} version {self.context.version} to {self.context.registry}...") + + if package_type == PackageType.PIP: + return await self._pip_publish(dir_to_publish) + else: + return await self._poetry_publish(dir_to_publish) + + async def _poetry_publish(self, dir_to_publish: Directory) -> StepResult: + pypi_token = self.context.dagger_client.set_secret("pypi_token", f"pypi-{self.context.pypi_token}") + pyproject_toml = dir_to_publish.file(PYPROJECT_TOML_FILE_PATH) + pyproject_toml_content = await pyproject_toml.contents() + contents = tomli.loads(pyproject_toml_content) + # make sure package name and version are set to the configured one + contents["tool"]["poetry"]["name"] = self.context.package_name + contents["tool"]["poetry"]["version"] = self.context.version + # enforce consistent author + contents["tool"]["poetry"]["authors"] = ["Airbyte "] + poetry_publish = ( + self._get_base_container() + .with_secret_variable("PYPI_TOKEN", pypi_token) + .with_directory("package", dir_to_publish) + .with_workdir("package") + .with_new_file(PYPROJECT_TOML_FILE_PATH, contents=tomli_w.dumps(contents)) + .with_exec(["poetry", "config", "repositories.mypypi", self.context.registry]) + .with_exec(sh_dash_c([f"poetry config pypi-token.mypypi $PYPI_TOKEN"])) + .with_env_variable("CACHEBUSTER", str(uuid.uuid4())) + .with_exec(sh_dash_c(["poetry publish --build --repository mypypi -vvv --no-interaction"])) + ) + + return await self.get_step_result(poetry_publish) + + async def _pip_publish(self, dir_to_publish: Directory) -> StepResult: + files = await dir_to_publish.entries() + pypi_username = self.context.dagger_client.set_secret("pypi_username", "__token__") + pypi_password = self.context.dagger_client.set_secret("pypi_password", f"pypi-{self.context.pypi_token}") + metadata = { + "name": self.context.package_name, + "version": self.context.version, + # Enforce consistent author + "author": "Airbyte", + "author_email": "contact@airbyte.io", + } + if "README.md" in files: + metadata["long_description"] = await dir_to_publish.file("README.md").contents() + metadata["long_description_content_type"] = "text/markdown" + + config = configparser.ConfigParser() + config["metadata"] = metadata + + setup_cfg_io = io.StringIO() + config.write(setup_cfg_io) + setup_cfg = setup_cfg_io.getvalue() + + twine_upload = ( + self._get_base_container() + .with_exec(sh_dash_c(["apt-get update", "apt-get install -y twine"])) + .with_directory("package", dir_to_publish) + .with_workdir("package") + # clear out setup.py metadata so setup.cfg is used + .with_exec(["sed", "-i", "/name=/d; /author=/d; /author_email=/d; /version=/d", SETUP_PY_FILE_PATH]) + .with_new_file("setup.cfg", contents=setup_cfg) + .with_exec(["pip", "install", "--upgrade", "setuptools", "wheel"]) + .with_exec(["python", SETUP_PY_FILE_PATH, "sdist", "bdist_wheel"]) + .with_secret_variable("TWINE_USERNAME", pypi_username) + .with_secret_variable("TWINE_PASSWORD", pypi_password) + .with_env_variable("CACHEBUSTER", str(uuid.uuid4())) + .with_exec(["twine", "upload", "--verbose", "--repository-url", self.context.registry, "dist/*"]) + ) + + return await self.get_step_result(twine_upload) diff --git a/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/poetry/utils.py b/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/poetry/utils.py index b0d6d2f07650..966c596eb428 100644 --- a/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/poetry/utils.py +++ b/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/poetry/utils.py @@ -3,7 +3,7 @@ import requests -def is_package_published(package_name, version, test_pypi=False): +def is_package_published(package_name: str, version: str, base_url: str): """ Check if a package with a specific version is published on PyPI or Test PyPI. @@ -12,7 +12,6 @@ def is_package_published(package_name, version, test_pypi=False): :param test_pypi: Set to True to check on Test PyPI, False for regular PyPI. :return: True if the package is found with the specified version, False otherwise. """ - base_url = "https://test.pypi.org/pypi" if test_pypi else "https://pypi.org/pypi" url = f"{base_url}/{package_name}/{version}/json" response = requests.get(url) diff --git a/airbyte-ci/connectors/pipelines/pipelines/consts.py b/airbyte-ci/connectors/pipelines/pipelines/consts.py index 851578cc7a0c..57a413a5d18b 100644 --- a/airbyte-ci/connectors/pipelines/pipelines/consts.py +++ b/airbyte-ci/connectors/pipelines/pipelines/consts.py @@ -60,6 +60,8 @@ POETRY_CACHE_PATH = "/root/.cache/pypoetry" STORAGE_DRIVER = "fuse-overlayfs" TAILSCALE_AUTH_KEY = os.getenv("TAILSCALE_AUTH_KEY") +PYPROJECT_TOML_FILE_PATH = "pyproject.toml" +SETUP_PY_FILE_PATH = "setup.py" class CIContext(str, Enum): diff --git a/airbyte-ci/connectors/pipelines/tests/test_poetry/test_poetry_publish.py b/airbyte-ci/connectors/pipelines/tests/test_poetry/test_poetry_publish.py new file mode 100644 index 000000000000..e802e7132624 --- /dev/null +++ b/airbyte-ci/connectors/pipelines/tests/test_poetry/test_poetry_publish.py @@ -0,0 +1,92 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + +import json +import random +from pathlib import Path +from typing import List +from unittest.mock import AsyncMock, MagicMock, patch + +import anyio +import pytest +import requests +from connector_ops.utils import Connector, ConnectorLanguage +from dagger import Client, Directory, Platform +from pipelines.airbyte_ci.connectors.context import ConnectorContext +from pipelines.airbyte_ci.connectors.publish import pipeline as publish_pipeline +from pipelines.airbyte_ci.connectors.upgrade_cdk import pipeline as upgrade_cdk_pipeline +from pipelines.airbyte_ci.poetry.publish.context import PyPIPublishContext +from pipelines.dagger.actions.python.poetry import with_poetry +from pipelines.models.contexts.pipeline_context import PipelineContext +from pipelines.models.steps import StepStatus + +pytestmark = [ + pytest.mark.anyio, +] + + +@pytest.fixture +def context(dagger_client: Client): + context = PyPIPublishContext( + package_path="test", + version="0.2.0", + pypi_token="test", + package_name="test", + registry="http://local_registry:8080/", + is_local=True, + git_branch="test", + git_revision="test", + report_output_prefix="test", + ci_report_bucket="test", + ) + context.dagger_client = dagger_client + return context + + +@pytest.mark.parametrize( + "package_path, package_name, expected_asset", + [ + pytest.param( + "airbyte-integrations/connectors/source-apify-dataset", + "airbyte-source-apify-dataset", + "airbyte_source_apify_dataset-0.2.0-py3-none-any.whl", + id="setup.py project", + ), + pytest.param( + "airbyte-lib", + "airbyte-lib", + "airbyte_lib-0.2.0-py3-none-any.whl", + id="poetry project", + ), + ], +) +async def test_run_poetry_publish(context: PyPIPublishContext, package_path: str, package_name: str, expected_asset: str): + context.package_name = package_name + context.package_path = package_path + pypi_registry = ( + # need to use linux/amd64 because the pypiserver image is only available for that platform + context.dagger_client.container(platform=Platform("linux/amd64")) + .from_("pypiserver/pypiserver:v2.0.1") + .with_exec(["run", "-P", ".", "-a", "."]) + .with_exposed_port(8080) + .as_service() + ) + + base_container = with_poetry(context).with_service_binding("local_registry", pypi_registry) + step = publish_pipeline.PublishToPyPI(context) + step._get_base_container = MagicMock(return_value=base_container) + step_result = await step.run() + assert step_result.status == StepStatus.SUCCESS + + # Query the registry to check that the package was published + tunnel = await context.dagger_client.host().tunnel(pypi_registry).start() + endpoint = await tunnel.endpoint(scheme="http") + list_url = f"{endpoint}/simple/" + list_response = requests.get(list_url) + assert list_response.status_code == 200 + assert package_name in list_response.text + url = f"{endpoint}/simple/{package_name}" + response = requests.get(url) + assert response.status_code == 200 + assert expected_asset in response.text diff --git a/airbyte-ci/connectors/pipelines/tests/test_publish.py b/airbyte-ci/connectors/pipelines/tests/test_publish.py index b7fe7d764d5f..a2fbc404911c 100644 --- a/airbyte-ci/connectors/pipelines/tests/test_publish.py +++ b/airbyte-ci/connectors/pipelines/tests/test_publish.py @@ -2,8 +2,10 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # import json +import os import random from typing import List +from unittest.mock import patch import anyio import pytest @@ -153,6 +155,7 @@ def test_parse_spec_output_no_spec(self, publish_context): (publish_pipeline, "PushConnectorImageToRegistry"), (publish_pipeline, "PullConnectorImageFromRegistry"), (publish_pipeline.steps, "run_connector_build"), + (publish_pipeline, "CheckPypiPackageDoesNotExist"), ] @@ -333,3 +336,68 @@ async def test_run_connector_publish_pipeline_when_image_does_not_exist( publish_pipeline.PullConnectorImageFromRegistry.return_value.run.assert_not_called() publish_pipeline.UploadSpecToCache.return_value.run.assert_not_called() publish_pipeline.MetadataUpload.return_value.run.assert_not_called() + + +@pytest.mark.parametrize( + "pypi_enabled, pypi_package_does_not_exist_status, publish_step_status, expect_publish_to_pypi_called, expect_build_connector_called", + [ + pytest.param(True, StepStatus.SUCCESS, StepStatus.SUCCESS, True, True, id="happy_path"), + pytest.param(False, StepStatus.SUCCESS, StepStatus.SUCCESS, False, True, id="pypi_disabled, skip all pypi steps"), + pytest.param(True, StepStatus.SKIPPED, StepStatus.SUCCESS, False, True, id="pypi_package_exists, skip publish_to_pypi"), + pytest.param(True, StepStatus.SUCCESS, StepStatus.FAILURE, True, False, id="publish_step_fails, abort"), + pytest.param(True, StepStatus.FAILURE, StepStatus.FAILURE, False, False, id="pypi_package_does_not_exist_fails, abort"), + ], +) +async def test_run_connector_pypi_publish_pipeline( + mocker, + pypi_enabled, + pypi_package_does_not_exist_status, + publish_step_status, + expect_publish_to_pypi_called, + expect_build_connector_called, +): + + for module, to_mock in STEPS_TO_PATCH: + mocker.patch.object(module, to_mock, return_value=mocker.AsyncMock()) + + mocked_publish_to_pypi = mocker.patch("pipelines.airbyte_ci.connectors.publish.pipeline.PublishToPyPI", return_value=mocker.AsyncMock()) + + for step in [ + publish_pipeline.MetadataValidation, + publish_pipeline.CheckConnectorImageDoesNotExist, + publish_pipeline.UploadSpecToCache, + publish_pipeline.MetadataUpload, + publish_pipeline.PushConnectorImageToRegistry, + publish_pipeline.PullConnectorImageFromRegistry, + ]: + step.return_value.run.return_value = mocker.Mock(name=f"{step.title}_result", status=StepStatus.SUCCESS) + + mocked_publish_to_pypi.return_value.run.return_value = mocker.Mock(name="publish_to_pypi_result", status=publish_step_status) + + publish_pipeline.CheckPypiPackageDoesNotExist.return_value.run.return_value = mocker.Mock( + name="pypi_package_does_not_exist_result", status=pypi_package_does_not_exist_status + ) + + context = mocker.MagicMock( + ci_gcs_credentials="", + connector=mocker.MagicMock( + code_directory="path/to/connector", + metadata={"dockerImageTag": "1.2.3", "remoteRegistries": {"pypi": {"enabled": pypi_enabled, "packageName": "test"}}}, + ), + ) + semaphore = anyio.Semaphore(1) + with patch.dict(os.environ, {"PYPI_TOKEN": "test"}): + await publish_pipeline.run_connector_publish_pipeline(context, semaphore) + if expect_publish_to_pypi_called: + mocked_publish_to_pypi.return_value.run.assert_called_once() + # assert that the first argument passed to mocked_publish_to_pypi contains the things from the context + assert mocked_publish_to_pypi.call_args.args[0].pypi_token == "test" + assert mocked_publish_to_pypi.call_args.args[0].package_name == "test" + assert mocked_publish_to_pypi.call_args.args[0].version == "1.2.3" + assert mocked_publish_to_pypi.call_args.args[0].registry == "https://test.pypi.org/" + assert mocked_publish_to_pypi.call_args.args[0].package_path == "path/to/connector" + else: + mocked_publish_to_pypi.return_value.run.assert_not_called() + + if expect_build_connector_called: + publish_pipeline.steps.run_connector_build.assert_called_once()