From 5e3b74630f7c9a97bac6895cce171e27d49c8e75 Mon Sep 17 00:00:00 2001 From: Evan Tahler Date: Mon, 19 Feb 2024 10:13:04 -0800 Subject: [PATCH] Update `metadata-service` to latest version + docs (#35419) --- .../metadata_service/lib/pyproject.toml | 2 +- .../metadata_service/orchestrator/README.md | 92 ++++++++++++++----- .../metadata_service/orchestrator/poetry.lock | 25 ++++- 3 files changed, 91 insertions(+), 28 deletions(-) diff --git a/airbyte-ci/connectors/metadata_service/lib/pyproject.toml b/airbyte-ci/connectors/metadata_service/lib/pyproject.toml index 079ca51f3f54..b4f0389e433b 100644 --- a/airbyte-ci/connectors/metadata_service/lib/pyproject.toml +++ b/airbyte-ci/connectors/metadata_service/lib/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "metadata-service" -version = "0.3.3" +version = "0.3.4" description = "" authors = ["Ben Church "] readme = "README.md" diff --git a/airbyte-ci/connectors/metadata_service/orchestrator/README.md b/airbyte-ci/connectors/metadata_service/orchestrator/README.md index bea709c3007f..ed1c3b44ade9 100644 --- a/airbyte-ci/connectors/metadata_service/orchestrator/README.md +++ b/airbyte-ci/connectors/metadata_service/orchestrator/README.md @@ -1,6 +1,6 @@ # Connector Orchestrator -This is the Orchestrator for Airbyte metadata built on Dagster. +This is the Orchestrator for Airbyte metadata built on Dagster. # Setup @@ -8,7 +8,8 @@ This is the Orchestrator for Airbyte metadata built on Dagster. #### Poetry -Before you can start working on this project, you will need to have Poetry installed on your system. Please follow the instructions below to install Poetry: +Before you can start working on this project, you will need to have Poetry installed on your system. +Please follow the instructions below to install Poetry: 1. Open your terminal or command prompt. 2. Install Poetry using the recommended installation method: @@ -23,125 +24,165 @@ Alternatively, you can use `pip` to install Poetry: pip install --user poetry ``` -3. After the installation is complete, close and reopen your terminal to ensure the newly installed `poetry` command is available in your system's PATH. +3. After the installation is complete, close and reopen your terminal to ensure the newly installed + `poetry` command is available in your system's PATH. -For more detailed instructions and alternative installation methods, please refer to the official Poetry documentation: https://python-poetry.org/docs/#installation +For more detailed instructions and alternative installation methods, please refer to the official +Poetry documentation: https://python-poetry.org/docs/#installation ### Using Poetry in the Project -Once Poetry is installed, you can use it to manage the project's dependencies and virtual environment. To get started, navigate to the project's root directory in your terminal and follow these steps: - +Once Poetry is installed, you can use it to manage the project's dependencies and virtual +environment. To get started, navigate to the project's root directory in your terminal and follow +these steps: ## Installation + ```bash poetry install cp .env.template .env ``` ## Create a GCP Service Account and Dev Bucket + Developing against the orchestrator requires a development bucket in GCP. The orchestrator will use this bucket to: + - store important output files. (e.g. Reports) - watch for changes to the `registry` directory in the bucket. However all tmp files will be stored in a local directory. To create a development bucket: + 1. Create a GCP Service Account with the following permissions: - - Storage Admin - - Storage Object Admin - - Storage Object Creator - - Storage Object Viewer + - Storage Admin + - Storage Object Admin + - Storage Object Creator + - Storage Object Viewer 2. Create a PUBLIC GCS bucket 3. Add the service account as a member of the bucket with the following permissions: - - Storage Admin - - Storage Object Admin - - Storage Object Creator - - Storage Object Viewer + + - Storage Admin + - Storage Object Admin + - Storage Object Creator + - Storage Object Viewer 4. Add the following environment variables to your `.env` file: - - `METADATA_BUCKET` - - `GCS_CREDENTIALS` + - `METADATA_BUCKET` + - `GCS_CREDENTIALS` Note that the `GCS_CREDENTIALS` should be the raw json string of the service account credentials. Here is an example of how to import the service account credentials into your environment: + ```bash export GCS_CREDENTIALS=`cat /path/to/credentials.json` ``` ## The Orchestrator -The orchestrator (built using Dagster) is responsible for orchestrating various the metadata processes. +The orchestrator (built using Dagster) is responsible for orchestrating various the metadata +processes. + +Dagster has a number of concepts that are important to understand before working on the +orchestrator. -Dagster has a number of concepts that are important to understand before working on the orchestrator. 1. Assets 2. Resources 3. Schedules 4. Sensors 5. Ops -Refer to the [Dagster documentation](https://docs.dagster.io/concepts) for more information on these concepts. +Refer to the [Dagster documentation](https://docs.dagster.io/concepts) for more information on these +concepts. ### Starting the Dagster Daemons + Start the orchestrator with the following command: + ```bash poetry run dagster dev ``` Then you can access the Dagster UI at http://localhost:3000 -Note its important to use `dagster dev` instead of `dagit` because `dagster dev` start additional services that are required for the orchestrator to run. Namely the sensor service. +Note its important to use `dagster dev` instead of `dagit` because `dagster dev` start additional +services that are required for the orchestrator to run. Namely the sensor service. ### Materializing Assets with the UI -When you navigate to the orchestrator in the UI, you will see a list of assets that are available to be materialized. + +When you navigate to the orchestrator in the UI, you will see a list of assets that are available to +be materialized. From here you have the following options + 1. Materialize all assets 2. Select a subset of assets to materialize 3. Enable a sensor to automatically materialize assets ### Materializing Assets without the UI -In some cases you may want to run the orchestrator without the UI. To learn more about Dagster's CLI commands, see the [Dagster CLI documentation](https://docs.dagster.io/_apidocs/cli). +In some cases you may want to run the orchestrator without the UI. To learn more about Dagster's CLI +commands, see the [Dagster CLI documentation](https://docs.dagster.io/_apidocs/cli). ## Running Tests + ```bash poetry run pytest ``` +## Deploying to Dagster Automatically + +GitHub Actions is used to automatically deploy the orchestrator to Dagster Cloud +([Github Action](https://github.com/airbytehq/airbyte/blob/master/.github/workflows/metadata_service_deploy_orchestrator_dagger.yml)). + +1. Update the version of your code (`../lib`) and update the version of the package in + `pyproject.toml` +1. In this project (`../orchestrator`) Run `poetry lock --no-update` to bump the version of the + requirements you may have changed in + `airbyte-ci/connectors/metadata_service/orchestrator/poetry.lock` +1. Push your changes to the `master` branch and the orchestrator will be automatically deployed to + Dagster Cloud. + ## Deploying to Dagster Cloud manually -Note: This is a temporary solution until we have a CI/CD pipeline setup. -Getting the CICD setup is currently blocked until we hear back from Dagster on a better way to use relative imports in a Dagster Cloud Deployment. +This should only be needed if the above (automatic deployment) fails. ### Installing the dagster-cloud cli + ```bash pip install dagster-cloud dagster-cloud config ``` ### Deploying the orchestrator + ```bash cd orchestrator DAGSTER_CLOUD_API_TOKEN= airbyte-ci metadata deploy orchestrator ``` # Using the Orchestrator to create a Connector Registry for Development + The orchestrator can be used to create a connector registry for development purposes. ## Setup + First you will need to setup the orchestrator as described above. Then you will want to do the following ### 1. Mirror the production bucket -Use the Google Cloud Console to mirror the production bucket (prod-airbyte-cloud-connector-metadata-service) to your development bucket. + +Use the Google Cloud Console to mirror the production bucket +(prod-airbyte-cloud-connector-metadata-service) to your development bucket. [Docs](https://cloud.google.com/storage-transfer/docs/cloud-storage-to-cloud-storage) ### 2. Upload any local metadata files you want to test changes with + ```bash # assuming your terminal is in the same location as this readme cd ../lib @@ -150,6 +191,7 @@ poetry run metadata_service upload ``` ### 3. Generate the registry + ```bash poetry run dagster dev open http://localhost:3000 diff --git a/airbyte-ci/connectors/metadata_service/orchestrator/poetry.lock b/airbyte-ci/connectors/metadata_service/orchestrator/poetry.lock index d350d91b4064..8b4130bb6cc1 100644 --- a/airbyte-ci/connectors/metadata_service/orchestrator/poetry.lock +++ b/airbyte-ci/connectors/metadata_service/orchestrator/poetry.lock @@ -1914,6 +1914,16 @@ files = [ {file = "MarkupSafe-2.1.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:5bbe06f8eeafd38e5d0a4894ffec89378b6c6a625ff57e3028921f8ff59318ac"}, {file = "MarkupSafe-2.1.3-cp311-cp311-win32.whl", hash = "sha256:dd15ff04ffd7e05ffcb7fe79f1b98041b8ea30ae9234aed2a9168b5797c3effb"}, {file = "MarkupSafe-2.1.3-cp311-cp311-win_amd64.whl", hash = "sha256:134da1eca9ec0ae528110ccc9e48041e0828d79f24121a1a146161103c76e686"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:f698de3fd0c4e6972b92290a45bd9b1536bffe8c6759c62471efaa8acb4c37bc"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:aa57bd9cf8ae831a362185ee444e15a93ecb2e344c8e52e4d721ea3ab6ef1823"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ffcc3f7c66b5f5b7931a5aa68fc9cecc51e685ef90282f4a82f0f5e9b704ad11"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47d4f1c5f80fc62fdd7777d0d40a2e9dda0a05883ab11374334f6c4de38adffd"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1f67c7038d560d92149c060157d623c542173016c4babc0c1913cca0564b9939"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:9aad3c1755095ce347e26488214ef77e0485a3c34a50c5a5e2471dff60b9dd9c"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:14ff806850827afd6b07a5f32bd917fb7f45b046ba40c57abdb636674a8b559c"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8f9293864fe09b8149f0cc42ce56e3f0e54de883a9de90cd427f191c346eb2e1"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-win32.whl", hash = "sha256:715d3562f79d540f251b99ebd6d8baa547118974341db04f5ad06d5ea3eb8007"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-win_amd64.whl", hash = "sha256:1b8dd8c3fd14349433c79fa8abeb573a55fc0fdd769133baac1f5e07abf54aeb"}, {file = "MarkupSafe-2.1.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:8e254ae696c88d98da6555f5ace2279cf7cd5b3f52be2b5cf97feafe883b58d2"}, {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cb0932dc158471523c9637e807d9bfb93e06a95cbf010f1a38b98623b929ef2b"}, {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9402b03f1a1b4dc4c19845e5c749e3ab82d5078d16a2a4c2cd2df62d57bb0707"}, @@ -1970,7 +1980,7 @@ files = [ [[package]] name = "metadata-service" -version = "0.3.3" +version = "0.3.4" description = "" optional = false python-versions = "^3.9" @@ -3013,6 +3023,7 @@ files = [ {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938"}, {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d"}, {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515"}, + {file = "PyYAML-6.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:326c013efe8048858a6d312ddd31d56e468118ad4cdeda36c719bf5bb6192290"}, {file = "PyYAML-6.0.1-cp310-cp310-win32.whl", hash = "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924"}, {file = "PyYAML-6.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d"}, {file = "PyYAML-6.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007"}, @@ -3020,8 +3031,16 @@ files = [ {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d"}, {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc"}, {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673"}, + {file = "PyYAML-6.0.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e7d73685e87afe9f3b36c799222440d6cf362062f78be1013661b00c5c6f678b"}, {file = "PyYAML-6.0.1-cp311-cp311-win32.whl", hash = "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741"}, {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, + {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, + {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, + {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"}, + {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, + {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, + {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"}, + {file = "PyYAML-6.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:0d3304d8c0adc42be59c5f8a4d9e3d7379e6955ad754aa9d6ab7a398b59dd1df"}, {file = "PyYAML-6.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47"}, {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98"}, {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c"}, @@ -3038,6 +3057,7 @@ files = [ {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5"}, {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696"}, {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735"}, + {file = "PyYAML-6.0.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:49a183be227561de579b4a36efbb21b3eab9651dd81b1858589f796549873dd6"}, {file = "PyYAML-6.0.1-cp38-cp38-win32.whl", hash = "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206"}, {file = "PyYAML-6.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62"}, {file = "PyYAML-6.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8"}, @@ -3045,6 +3065,7 @@ files = [ {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6"}, {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0"}, {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c"}, + {file = "PyYAML-6.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:04ac92ad1925b2cff1db0cfebffb6ffc43457495c9b3c39d3fcae417d7125dc5"}, {file = "PyYAML-6.0.1-cp39-cp39-win32.whl", hash = "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c"}, {file = "PyYAML-6.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486"}, {file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"}, @@ -4250,4 +4271,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p [metadata] lock-version = "2.0" python-versions = "^3.9, <3.13" -content-hash = "bb5bbfdca5cf2dd2c8040275e5ae8ff9ec78719f2aad3bdddb0f652b9f2bd893" +content-hash = "122eb321956cdeea7fd94d3d6ef5fd0d2b8153d2d5fb136a9644fc83a1f56419"