diff --git a/.github/workflows/README.md b/.github/workflows/README.md index 40146e0d1f..5300ef9aa1 100644 --- a/.github/workflows/README.md +++ b/.github/workflows/README.md @@ -49,6 +49,15 @@ This workflow builds a static website from the Svelte app and deploys it to Netl This workflow compiles dbt models and docs, syncs Metabase, uploads dbt artifacts, builds a visual comment about model changes. +## deploy-dbt-docs.yml + +This workflow generates latest dbt artifacts with extracted questions (cards) and dashboards from Metabase using dbt-metabase exposures, and update the dbt documentation. + +Developers and analysts can view all dbt documentation and easily verify if particular models are in use by searching inside the `warehouse\models\metabase` folder or on [dbt docs site](https://dbt-docs.calitp.org/#!/overview). + +It is scheduled to run every Monday at 8am UTC (1am PST) or when there is any changes on model merged to the main branch. + + ## deploy-kubernetes.yml This workflow deploys changes to the production Kubernetes cluster when they get merged into the `main` branch. diff --git a/.github/workflows/deploy-dbt-docs.yml b/.github/workflows/deploy-dbt-docs.yml new file mode 100644 index 0000000000..f7440fe03d --- /dev/null +++ b/.github/workflows/deploy-dbt-docs.yml @@ -0,0 +1,427 @@ +name: Deploy dbt docs + +on: + # schedule: + # - cron: "0 8 * * 1" # 8am UTC (1am PST) every Monday + push: + branches: + - main + paths: + - .github/workflows/deploy-dbt-docs.yml + - 'warehouse/**' + pull_request: + paths: + - .github/workflows/deploy-dbt-docs.yml + - 'warehouse/**' + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: ${{ github.ref != 'refs/heads/main' }} + +env: + PYTHON_VERSION: '3.11' + POETRY_VERSION: '2.0.1' + SERVICE_ACCOUNT: ${{ github.ref == 'refs/heads/main' && 'github-actions-service-account@cal-itp-data-infra.iam.gserviceaccount.com' || 'github-actions-service-account@cal-itp-data-infra-staging.iam.gserviceaccount.com' }} + WORKLOAD_IDENTITY_PROVIDER: ${{ github.ref == 'refs/heads/main' && 'projects/1005246706141/locations/global/workloadIdentityPools/github-actions/providers/data-infra' || 'projects/473674835135/locations/global/workloadIdentityPools/github-actions/providers/data-infra' }} + PROJECT_ID: ${{ github.ref == 'refs/heads/main' && 'cal-itp-data-infra' || 'cal-itp-data-infra-staging' }} + DBT_TARGET: ${{ github.ref == 'refs/heads/main' && 'prod' || 'staging' }} + DBT_ARTIFACTS_BUCKET: ${{ github.ref == 'refs/heads/main' && 'calitp-dbt-artifacts' || 'calitp-staging-dbt-artifacts' }} + DBT_DOCS_BUCKET: ${{ github.ref == 'refs/heads/main' && 'calitp-dbt-docs' || 'calitp-staging-dbt-docs' }} + DBT_SOURCE_DATABASE: ${{ github.ref == 'refs/heads/main' && 'cal-itp-data-infra' || 'cal-itp-data-infra-staging' }} + METABASE_DESTINATION_DATABASE: ${{ github.ref == 'refs/heads/main' && 'Data Marts (formerly Warehouse Views)' || '(Internal) Staging Warehouse Views' }} + +jobs: + models-changed: + name: Detect dbt model changes + runs-on: ubuntu-latest + + outputs: + any_changed: ${{ steps.changed-files-warehouse.outputs.any_changed }} + + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - uses: tj-actions/changed-files@v46 + id: changed-files-warehouse + with: + files: 'warehouse/models/**/*.sql' + + export_exposures: + name: Export exposures + runs-on: ubuntu-latest + + permissions: + contents: read + id-token: write + + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Authenticate Google Service Account + uses: google-github-actions/auth@v2 + with: + create_credentials_file: 'true' + project_id: ${{ env.PROJECT_ID }} + service_account: ${{ env.SERVICE_ACCOUNT }} + workload_identity_provider: ${{ env.WORKLOAD_IDENTITY_PROVIDER }} + + - name: Setup GCloud utilities + uses: google-github-actions/setup-gcloud@v2 + + - name: Setup Graphviz + uses: ts-graphviz/setup-graphviz@v2 + + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: ${{ env.PYTHON_VERSION }} + + - name: Cache Poetry + uses: actions/cache@v3 + with: + path: ~/.cache/pypoetry + key: poetry-cache-${{ runner.os }}-python-${{ env.PYTHON_VERSION }}-poetry-${{ env.POETRY_VERSION }} + + - name: Setup Poetry + uses: abatilo/actions-poetry@v3 + with: + poetry-version: ${{ env.POETRY_VERSION }} + + - name: Cache Python packages + uses: actions/cache@v3 + with: + path: ~/.local + key: python-cache-${{ runner.os }}-python-${{ env.PYTHON_VERSION }}-lock-${{ hashFiles('warehouse/poetry.lock') }}-${{ hashFiles('.github/workflows/*.yml') }} + + - name: Install dependencies + working-directory: warehouse + run: poetry install + + - name: Cache dbt packages + uses: actions/cache@v3 + with: + path: warehouse/dbt_packages + key: python-cache-${{ runner.os }}-python-${{ env.PYTHON_VERSION }}-lock-${{ hashFiles('warehouse/poetry.lock') }}-dbt-packages-${{ hashFiles('warehouse/packages.yml') }} + + - name: Install dbt dependencies + working-directory: warehouse + run: poetry run dbt deps + + - name: Print dbt environment + working-directory: warehouse + run: poetry run dbt debug --target ${{ env.DBT_TARGET }} + + - name: Compile dbt + working-directory: warehouse + run: poetry run dbt compile --target ${{ env.DBT_TARGET }} --full-refresh + + - name: List target dir + working-directory: warehouse/target + run: ls + + - name: Create Metabase folder + working-directory: warehouse/models + run: mkdir metabase + + - name: Extract Metabase exposures + working-directory: warehouse + run: poetry run dbt-metabase exposures -v --output-grouping="type" --output-path=./models/metabase/ --manifest-path=./target/manifest.json --metabase-url="https://dashboards.calitp.org" --metabase-api-key="${{ secrets.METABASE_API_KEY }}" + + - name: Archive metabase exposures + uses: actions/upload-artifact@v4 + with: + name: exposures + path: warehouse/models/metabase/* + + compile: + name: Compile dbt docs + runs-on: ubuntu-latest + needs: [export_exposures] + + permissions: + contents: read + id-token: write + + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Download Metabase exposures + uses: actions/download-artifact@v4 + with: + name: exposures + path: warehouse/models/metabase/* + + - name: Authenticate Google Service Account + uses: google-github-actions/auth@v2 + with: + create_credentials_file: 'true' + project_id: ${{ env.PROJECT_ID }} + service_account: ${{ env.SERVICE_ACCOUNT }} + workload_identity_provider: ${{ env.WORKLOAD_IDENTITY_PROVIDER }} + + - name: Setup GCloud utilities + uses: google-github-actions/setup-gcloud@v2 + + - name: Setup Graphviz + uses: ts-graphviz/setup-graphviz@v2 + + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: ${{ env.PYTHON_VERSION }} + + - name: Cache Poetry + uses: actions/cache@v3 + with: + path: ~/.cache/pypoetry + key: poetry-cache-${{ runner.os }}-python-${{ env.PYTHON_VERSION }}-poetry-${{ env.POETRY_VERSION }} + + - name: Setup Poetry + uses: abatilo/actions-poetry@v3 + with: + poetry-version: ${{ env.POETRY_VERSION }} + + - name: Cache Python packages + uses: actions/cache@v3 + with: + path: ~/.local + key: python-cache-${{ runner.os }}-python-${{ env.PYTHON_VERSION }}-lock-${{ hashFiles('warehouse/poetry.lock') }}-${{ hashFiles('.github/workflows/*.yml') }} + + - name: Install dependencies + working-directory: warehouse + run: poetry install + + - name: Cache dbt packages + uses: actions/cache@v3 + with: + path: warehouse/dbt_packages + key: python-cache-${{ runner.os }}-python-${{ env.PYTHON_VERSION }}-lock-${{ hashFiles('warehouse/poetry.lock') }}-dbt-packages-${{ hashFiles('warehouse/packages.yml') }} + + - name: Install dbt dependencies + working-directory: warehouse + run: poetry run dbt deps + + - name: Print dbt environment + working-directory: warehouse + run: poetry run dbt debug --target ${{ env.DBT_TARGET }} + + - name: Compile dbt with exposures + working-directory: warehouse + run: poetry run dbt compile --target ${{ env.DBT_TARGET }} --full-refresh + + - name: Generate dbt documentation + working-directory: warehouse + run: poetry run dbt docs generate --target ${{ env.DBT_TARGET }} --no-compile + + - name: Archive compilation artifacts + uses: actions/upload-artifact@v4 + with: + name: dbt + path: | + warehouse/target/*.json + warehouse/target/*.html + + metabase: + name: Sync Metabase + runs-on: ubuntu-latest + needs: [models-changed, compile] + + if: ${{ needs.models-changed.outputs.any_changed == 'true' }} + + permissions: + contents: read + id-token: write + + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Download compilation artifacts + uses: actions/download-artifact@v4 + with: + name: dbt + path: warehouse/target + + - name: Authenticate Google Service Account + uses: google-github-actions/auth@v2 + with: + create_credentials_file: 'true' + project_id: ${{ env.PROJECT_ID }} + service_account: ${{ env.SERVICE_ACCOUNT }} + workload_identity_provider: ${{ env.WORKLOAD_IDENTITY_PROVIDER }} + + - name: Setup GCloud utilities + uses: google-github-actions/setup-gcloud@v2 + + - name: Setup Graphviz + uses: ts-graphviz/setup-graphviz@v2 + + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: ${{ env.PYTHON_VERSION }} + + - name: Cache Poetry + uses: actions/cache@v3 + with: + path: ~/.cache/pypoetry + key: poetry-cache-${{ runner.os }}-python-${{ env.PYTHON_VERSION }}-poetry-${{ env.POETRY_VERSION }} + + - name: Setup Poetry + uses: abatilo/actions-poetry@v3 + with: + poetry-version: ${{ env.POETRY_VERSION }} + + - name: Cache Python packages + uses: actions/cache@v3 + with: + path: ~/.local + key: python-cache-${{ runner.os }}-python-${{ env.PYTHON_VERSION }}-lock-${{ hashFiles('warehouse/poetry.lock') }}-${{ hashFiles('.github/workflows/*.yml') }} + + - name: Install dependencies + working-directory: warehouse + run: poetry install + + - name: Cache dbt packages + uses: actions/cache@v3 + with: + path: warehouse/dbt_packages + key: python-cache-${{ runner.os }}-python-${{ env.PYTHON_VERSION }}-lock-${{ hashFiles('warehouse/poetry.lock') }}-dbt-packages-${{ hashFiles('warehouse/packages.yml') }} + + - name: Install dbt dependencies + working-directory: warehouse + run: poetry run dbt deps + + - name: Download latest artifacts from GCS + working-directory: warehouse + # if: ${{ env.DBT_TARGET == 'staging' }} + run: gsutil cp -r gs://${{ env.DBT_ARTIFACTS_BUCKET }}/latest/ ./target/ + + - name: Print dbt environment + working-directory: warehouse + run: poetry run dbt debug --target ${{ env.DBT_TARGET }} + + - name: Run changed models + working-directory: warehouse + # if: ${{ env.DBT_TARGET == 'staging' }} + run: poetry run dbt run --select state:modified+ --target ${{ env.DBT_TARGET }} --state ./target/latest + + - name: Synchronize Metabase + working-directory: warehouse + run: poetry run dbt-metabase models -v --manifest-path=target/manifest.json --exclude-schemas="*staging, payments" --skip-sources --docs-url="https://dbt-docs.calitp.org" --metabase-url="https://dashboards.calitp.org" --metabase-database="${{ env.METABASE_DESTINATION_DATABASE }}" --metabase-api-key="${{ secrets.METABASE_API_KEY}}" + + upload: + name: Upload to Google Cloud Storage + runs-on: ubuntu-latest + needs: [compile, metabase] + + permissions: + contents: read + id-token: write + + if: ${{ always() && !failure() && !cancelled() }} + + steps: + - name: Download compilation artifacts + uses: actions/download-artifact@v4 + with: + name: dbt + path: warehouse/target + + - name: Download Metabase exposures + uses: actions/download-artifact@v4 + with: + name: exposures + path: warehouse/models/metabase + + - name: Authenticate Google Service Account + uses: google-github-actions/auth@v2 + with: + create_credentials_file: 'true' + project_id: ${{ env.PROJECT_ID }} + service_account: ${{ env.SERVICE_ACCOUNT }} + workload_identity_provider: ${{ env.WORKLOAD_IDENTITY_PROVIDER }} + + - name: Setup GCloud utilities + uses: google-github-actions/setup-gcloud@v2 + + - name: Get current timestamp + uses: josStorer/get-current-time@v2 + id: current-time + with: + format: "YYYY-MM-DDTHH:mm:ss.SSSSSS+00:00" + timezone: "UTC" + + - name: Get current date + uses: josStorer/get-current-time@v2 + id: current-date + with: + format: "YYYY-MM-DD" + timezone: "UTC" + + - name: Upload catalog.json + uses: google-github-actions/upload-cloud-storage@v1 + with: + path: './warehouse/target/' + glob: 'catalog.json' + parent: false + process_gcloudignore: false + destination: "${{ env.DBT_ARTIFACTS_BUCKET }}/catalog.json/dt=${{ steps.current-date.outputs.formattedTime }}/ts=${{ steps.current-time.outputs.formattedTime }}/" + + - name: Upload manifest.json + uses: google-github-actions/upload-cloud-storage@v1 + with: + path: './warehouse/target/' + glob: 'manifest.json' + parent: false + process_gcloudignore: false + destination: "${{ env.DBT_ARTIFACTS_BUCKET }}/manifest.json/dt=${{ steps.current-date.outputs.formattedTime }}/ts=${{ steps.current-time.outputs.formattedTime }}/" + + - name: Upload run_results.json + uses: google-github-actions/upload-cloud-storage@v1 + with: + path: './warehouse/target/' + glob: 'run_results.json' + parent: false + process_gcloudignore: false + destination: "${{ env.DBT_ARTIFACTS_BUCKET }}/run_results.json/dt=${{ steps.current-date.outputs.formattedTime }}/ts=${{ steps.current-time.outputs.formattedTime }}/" + + - name: Upload index.html + uses: google-github-actions/upload-cloud-storage@v1 + with: + path: './warehouse/target/' + glob: 'index.html' + parent: false + process_gcloudignore: false + destination: "${{ env.DBT_ARTIFACTS_BUCKET }}/index.html/dt=${{ steps.current-date.outputs.formattedTime }}/ts=${{ steps.current-time.outputs.formattedTime }}/" + + - name: Upload artifacts to latest + uses: google-github-actions/upload-cloud-storage@v1 + with: + path: './warehouse/target/' + glob: '{catalog.json,manifest.json,index.html,run_results.json}' + parent: false + process_gcloudignore: false + destination: "${{ env.DBT_ARTIFACTS_BUCKET }}/latest/" + + - name: Upload Metabase exposures + uses: google-github-actions/upload-cloud-storage@v1 + with: + path: './warehouse/models/metabase' + parent: false + process_gcloudignore: false + destination: "${{ env.DBT_ARTIFACTS_BUCKET }}/exposures/dt=${{ steps.current-date.outputs.formattedTime }}/ts=${{ steps.current-time.outputs.formattedTime }}/" + + - name: Upload documentation + uses: google-github-actions/upload-cloud-storage@v1 + with: + path: './warehouse/target/' + glob: '{catalog.json,manifest.json,index.html}' + parent: false + process_gcloudignore: false + destination: ${{ env.DBT_DOCS_BUCKET }}