-
Notifications
You must be signed in to change notification settings - Fork 190
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
ci: replace build-artifact-s3 with new workflow, add local tpch bench…
…es (#3864) This PR adds a few things: - `publish-dev-s3.yml` workflow to replace `build-artifact-s3.yml` with one that uses the unified build process in `build-wheel.yml` as well as serve a Python simplified repository API through Cloudfront - `benchmark-local-tpch.yml` which runs benchmarks for local TPC-H at 100SF. It will automatically build and publish if necessary, and is run both manually and as a nightly test - fix index URLs in `nightlies-tests.yml` and add local TPC-H benchmarks Eventually I plan on moving all of our current nightly benchmarks to our main repo, and get them publishing to Google sheets. Then we can deprecate our postgres/looker setup as well as our K8s ray cluster
- Loading branch information
1 parent
7ee2b35
commit 7bcec18
Showing
8 changed files
with
303 additions
and
221 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,94 @@ | ||
"""Run TPC-H benchmarks with native runner on local Parquet data and upload results to Google sheets. | ||
Expects tables as Parquet files in "/tmp/tpch-data/" | ||
""" | ||
|
||
import os | ||
import time | ||
from datetime import datetime, timezone | ||
|
||
import gspread | ||
|
||
import daft | ||
import daft.context | ||
from benchmarking.tpch import answers | ||
from daft.sql import SQLCatalog | ||
|
||
|
||
def get_df(name): | ||
return daft.read_parquet(f"/tmp/tpch-data/{name}/*") | ||
|
||
|
||
def run_benchmark(): | ||
table_names = [ | ||
"part", | ||
"supplier", | ||
"partsupp", | ||
"customer", | ||
"orders", | ||
"lineitem", | ||
"nation", | ||
"region", | ||
] | ||
|
||
def lowercase_column_names(df): | ||
return df.select(*[daft.col(name).alias(name.lower()) for name in df.column_names]) | ||
|
||
catalog = SQLCatalog({tbl: lowercase_column_names(get_df(tbl)) for tbl in table_names}) | ||
|
||
results = {} | ||
|
||
for q in range(1, 23): | ||
if q == 21: | ||
# TODO: remove this once we support q21 | ||
daft_df = answers.q21(get_df) | ||
else: | ||
with open(f"benchmarking/tpch/queries/{q:02}.sql") as query_file: | ||
query = query_file.read() | ||
daft_df = daft.sql(query, catalog=catalog) | ||
|
||
start = time.perf_counter() | ||
daft_df.collect() | ||
end = time.perf_counter() | ||
|
||
results[q] = end - start | ||
|
||
return results | ||
|
||
|
||
def get_run_metadata(): | ||
return { | ||
"started at": datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S.%f"), | ||
"daft version": daft.__version__, | ||
"github ref": os.getenv("GITHUB_REF"), | ||
"github sha": os.getenv("GITHUB_SHA"), | ||
} | ||
|
||
|
||
def upload_to_google_sheets(data): | ||
gc = gspread.service_account() | ||
|
||
sh = gc.open_by_url( | ||
"https://docs.google.com/spreadsheets/d/1d6pXsIsBkjjM93GYtoiF83WXvJXR4vFgFQdmG05u8eE/edit?gid=0#gid=0" | ||
) | ||
ws = sh.worksheet("Local TPC-H") | ||
ws.append_row(data) | ||
|
||
|
||
def main(): | ||
daft.context.set_runner_native() | ||
|
||
metadata = get_run_metadata() | ||
|
||
results = run_benchmark() | ||
|
||
data_dict = {**metadata, **results} | ||
|
||
print("Results:") | ||
print(data_dict) | ||
|
||
upload_to_google_sheets(list(data_dict.values())) | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
name: benchmark-local-tpch | ||
|
||
on: | ||
workflow_dispatch: | ||
workflow_call: | ||
inputs: | ||
daft_index_url: | ||
description: The index URL of the Daft build to benchmark. If not provided, builds Daft off of the provided branch. | ||
type: string | ||
default: '' | ||
required: false | ||
|
||
env: | ||
DAFT_INDEX_URL: ${{ inputs.daft_index_url != '' && inputs.daft_index_url || format('https://d1p3klp2t5517h.cloudfront.net/builds/dev/{0}', github.sha) }} | ||
AWS_REGION: us-west-2 | ||
|
||
DAFT_ANALYTICS_ENABLED: '0' | ||
UV_SYSTEM_PYTHON: 1 | ||
PYTHON_VERSION: '3.9' | ||
|
||
jobs: | ||
build: | ||
name: Build and publish wheels if necessary | ||
if: ${{ inputs.daft_index_url == '' }} | ||
uses: ./.github/workflows/publish-dev-s3.yml | ||
|
||
benchmark: | ||
needs: build | ||
if: ${{ !failure() && !cancelled() }} | ||
runs-on: [self-hosted, linux, arm64, bench-tpch] | ||
permissions: | ||
id-token: write | ||
contents: read | ||
|
||
steps: | ||
- name: Mount local SSD to /tmp | ||
run: | | ||
findmnt /tmp 1> /dev/null | ||
code=$? | ||
if [ $code -ne 0 ]; then | ||
sudo mkfs.ext4 /dev/nvme0n1 | ||
sudo mount -t ext4 /dev/nvme0n1 /tmp | ||
sudo chmod 777 /tmp | ||
fi | ||
- uses: actions/checkout@v4 | ||
with: | ||
submodules: true | ||
- name: Assume GitHub Actions AWS Credentials | ||
uses: aws-actions/configure-aws-credentials@v4 | ||
with: | ||
aws-region: ${{ env.AWS_REGION }} | ||
role-to-assume: ${{ secrets.ACTIONS_AWS_ROLE_ARN }} | ||
role-session-name: DaftLocalTpchGitHubWorkflow | ||
- name: Download TPC-H data (100SF) | ||
run: aws s3 cp s3://eventual-dev-benchmarking-fixtures/uncompressed/tpch-dbgen/100_0/32/parquet /tmp/tpch-data --recursive | ||
|
||
- name: Install uv | ||
uses: astral-sh/setup-uv@v4 | ||
with: | ||
python-version: ${{ env.PYTHON_VERSION }} | ||
- name: Install Daft and dev dependencies | ||
run: | | ||
rm -rf daft | ||
uv pip install getdaft --pre --extra-index-url ${{ env.DAFT_INDEX_URL }} | ||
uv pip install gspread | ||
- name: Write service account secret file | ||
run: | | ||
cat << EOF > ~/.config/gspread/service_account.json | ||
${{ secrets.GOOGLE_SHEETS_SERVICE_ACCOUNT }} | ||
EOF | ||
- name: Run benchmark and upload results to Google Sheets | ||
run: python .github/ci-scripts/local_tpch.py |
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,13 +1,8 @@ | ||
name: Verify platforms nightly wheels | ||
name: Test and benchmark suite on nightly build | ||
|
||
on: | ||
schedule: | ||
- cron: 0 13 * * * | ||
workflow_dispatch: | ||
workflow_run: | ||
workflows: [daft-publish] | ||
types: | ||
- completed | ||
workflow_call: | ||
|
||
env: | ||
DAFT_ANALYTICS_ENABLED: '0' | ||
|
@@ -44,7 +39,7 @@ jobs: | |
- name: Install Daft and dev dependencies | ||
run: | | ||
uv pip install -r requirements-dev.txt getdaft --pre --extra-index-url https://pypi.anaconda.org/daft-nightly/simple --force-reinstall | ||
uv pip install -r requirements-dev.txt getdaft --pre --extra-index-url https://d1p3klp2t5517h.cloudfront.net/builds/nightly --force-reinstall | ||
rm -rf daft | ||
- uses: actions/cache@v4 | ||
env: | ||
|
@@ -114,7 +109,7 @@ jobs: | |
echo "$GITHUB_WORKSPACE/venv/bin" >> $GITHUB_PATH | ||
- name: Install Daft and dev dependencies | ||
run: | | ||
uv pip install -r requirements-dev.txt getdaft --pre --extra-index-url https://pypi.anaconda.org/daft-nightly/simple --force-reinstall | ||
uv pip install -r requirements-dev.txt getdaft --pre --extra-index-url https://d1p3klp2t5517h.cloudfront.net/builds/nightly --force-reinstall | ||
rm -rf daft | ||
- name: Prepare tmpdirs for IO services | ||
run: | | ||
|
@@ -155,3 +150,32 @@ jobs: | |
env: | ||
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }} | ||
SLACK_WEBHOOK_TYPE: INCOMING_WEBHOOK | ||
|
||
benchmark-local-tpch: | ||
uses: ./.github/workflows/benchmark-local-tpch.yml | ||
with: | ||
daft_index_url: https://d1p3klp2t5517h.cloudfront.net/builds/nightly | ||
|
||
on-local-tpch-failure: | ||
name: Send Slack notification on failure | ||
runs-on: ubuntu-latest | ||
needs: benchmark-local-tpch | ||
if: ${{ failure() }} | ||
|
||
steps: | ||
- uses: slackapi/[email protected] | ||
with: | ||
payload: | | ||
{ | ||
"blocks": [ | ||
{ | ||
"type": "section", | ||
"text": { | ||
"type": "mrkdwn", | ||
"text": ":rotating_light: [CI] Local TPC-H benchmarks on nightly wheel <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|workflow> *FAILED* :rotating_light:" | ||
} | ||
} | ||
] | ||
} | ||
webhook: ${{ secrets.SLACK_WEBHOOK_URL }} | ||
webhook-type: incoming-webhook |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.