From fdcfe3bbbe1b4afbd3e2454fb9bef2a295024fe4 Mon Sep 17 00:00:00 2001 From: Adam Gutglick Date: Thu, 12 Sep 2024 14:05:32 +0100 Subject: [PATCH 01/11] . --- .github/workflows/ci.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index da1cb42e28..3ea09aec17 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -37,6 +37,8 @@ jobs: run: cargo doc --no-deps - name: Rust Test run: cargo test --workspace --all-features + - name: Rust Bench as test + run: cargo bench -- --test - name: Rust Build run: cargo build --all-features --all-targets From 173278f9bd1e9c92eb4d9fd292390bd95de8b6d2 Mon Sep 17 00:00:00 2001 From: Robert Kruszewski Date: Thu, 12 Sep 2024 14:34:25 +0100 Subject: [PATCH 02/11] fix --- bench-vortex/benches/compress_benchmark.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bench-vortex/benches/compress_benchmark.rs b/bench-vortex/benches/compress_benchmark.rs index ae9e598fa5..a9b7193b4f 100644 --- a/bench-vortex/benches/compress_benchmark.rs +++ b/bench-vortex/benches/compress_benchmark.rs @@ -19,7 +19,7 @@ fn vortex_compress_taxi(c: &mut Criterion) { fn vortex_compress_medicare1(c: &mut Criterion) { let dataset = BenchmarkDatasets::PBI(Medicare1); - dataset.as_uncompressed(); + dataset.write_as_parquet(); let mut group = c.benchmark_group("end to end - medicare"); group.sample_size(10); group.bench_function("compress", |b| { From fb6a86af69dd53464407886c7981abe7c8b9b2e0 Mon Sep 17 00:00:00 2001 From: Adam Gutglick Date: Thu, 12 Sep 2024 14:36:24 +0100 Subject: [PATCH 03/11] add backtraces --- .github/workflows/ci.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 3ea09aec17..8371a183a4 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -12,6 +12,7 @@ permissions: env: CARGO_TERM_COLOR: always + RUST_BACKTRACE: "1" jobs: build: From 432b14449e5f1247ad7b1682c7429cb8f13c598e Mon Sep 17 00:00:00 2001 From: Adam Gutglick Date: Thu, 12 Sep 2024 14:49:53 +0100 Subject: [PATCH 04/11] . --- .github/workflows/ci.yml | 5 +++++ bench-vortex/src/public_bi_data.rs | 2 +- bench-vortex/src/reader.rs | 3 ++- 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 8371a183a4..a895328d71 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -24,6 +24,11 @@ jobs: - uses: ./.github/actions/setup-rust - uses: ./.github/actions/setup-python + # Required to run benchmarks + - name: Install DuckDB + uses: opt-nc/setup-duckdb-action@v1.0.9 + with: + version: v1.0.0 - name: Python Lint - Format run: rye run ruff format --check . diff --git a/bench-vortex/src/public_bi_data.rs b/bench-vortex/src/public_bi_data.rs index b0adb9326a..da052f14a6 100644 --- a/bench-vortex/src/public_bi_data.rs +++ b/bench-vortex/src/public_bi_data.rs @@ -456,7 +456,7 @@ impl BenchmarkDataset for BenchmarkDatasets { &path_for_file_type(self, output_fname, "parquet"), |output_path| write_csv_as_parquet(f, output_path), ) - .expect("Failed to compress to parquet"); + .unwrap(); let pq_size = compressed.metadata().unwrap().size(); info!( "Parquet size: {}, {}B", diff --git a/bench-vortex/src/reader.rs b/bench-vortex/src/reader.rs index 90fbb5318c..a0a6595148 100644 --- a/bench-vortex/src/reader.rs +++ b/bench-vortex/src/reader.rs @@ -121,7 +121,8 @@ pub fn write_csv_as_parquet(csv_path: PathBuf, output_path: &Path) -> VortexResu csv_path.as_path().to_str().unwrap(), output_path.to_str().unwrap() )) - .status()? + .status() + .unwrap() .exit_ok() .unwrap(); Ok(()) From 69c4b69efdcaadff170c937ad44bf5136207582b Mon Sep 17 00:00:00 2001 From: Adam Gutglick Date: Thu, 12 Sep 2024 14:51:09 +0100 Subject: [PATCH 05/11] . --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a895328d71..9bdc98bd9d 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -44,7 +44,7 @@ jobs: - name: Rust Test run: cargo test --workspace --all-features - name: Rust Bench as test - run: cargo bench -- --test + run: cargo bench --profile=test -- --test - name: Rust Build run: cargo build --all-features --all-targets From 4ea07c795836435091f70d81a0a6d550ee9a91e9 Mon Sep 17 00:00:00 2001 From: Adam Gutglick Date: Thu, 12 Sep 2024 15:07:07 +0100 Subject: [PATCH 06/11] Update FSST --- Cargo.lock | 4 ++-- Cargo.toml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f5168f0dee..af69c81bd5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1586,9 +1586,9 @@ dependencies = [ [[package]] name = "fsst-rs" -version = "0.4.0" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "00c728b0566743ed11dcd120c35b5e2217df22c04679809612a4d12607e3078e" +checksum = "4762a8f74bc9404ad343b4036c5249b082cd70c595dae8ca12a40375e7463032" [[package]] name = "futures" diff --git a/Cargo.toml b/Cargo.toml index 8e044c81ad..52449e0c5a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -81,7 +81,7 @@ fastlanes = "0.1.5" flatbuffers = "24.3.25" flexbuffers = "2.0.0" fs_extra = "1.3.0" -fsst-rs = "0.4.0" +fsst-rs = "0.4.1" futures = { version = "0.3.30", default-features = false } futures-executor = "0.3.30" futures-util = "0.3.30" From faa0c40a7a5c43fe4d5c6259f6a0c4ab6abc2357 Mon Sep 17 00:00:00 2001 From: Robert Kruszewski Date: Thu, 12 Sep 2024 15:12:38 +0100 Subject: [PATCH 07/11] guard R2 benchmarks on configured env --- bench-vortex/benches/random_access.rs | 62 +++++++++++++++------------ 1 file changed, 34 insertions(+), 28 deletions(-) diff --git a/bench-vortex/benches/random_access.rs b/bench-vortex/benches/random_access.rs index 6d00240865..a330d29b66 100644 --- a/bench-vortex/benches/random_access.rs +++ b/bench-vortex/benches/random_access.rs @@ -1,3 +1,4 @@ +use std::env; use std::sync::Arc; use bench_vortex::reader::{ @@ -44,21 +45,24 @@ fn random_access_vortex(c: &mut Criterion) { }) }); - group.sample_size(10).bench_function("R2", |b| { - let r2_fs = Arc::new(AmazonS3Builder::from_env().build().unwrap()) as Arc; - let r2_path = object_store::path::Path::from_url_path( - taxi_vortex.file_name().unwrap().to_str().unwrap(), - ) - .unwrap(); - - b.to_async(Runtime::new().unwrap()).iter(|| async { - black_box( - take_vortex_object_store(&r2_fs, &r2_path, &INDICES) - .await - .unwrap(), + if env::var("AWS_ACCESS_KEY_ID").is_ok() { + group.sample_size(10).bench_function("R2", |b| { + let r2_fs = + Arc::new(AmazonS3Builder::from_env().build().unwrap()) as Arc; + let r2_path = object_store::path::Path::from_url_path( + taxi_vortex.file_name().unwrap().to_str().unwrap(), ) - }) - }); + .unwrap(); + + b.to_async(Runtime::new().unwrap()).iter(|| async { + black_box( + take_vortex_object_store(&r2_fs, &r2_path, &INDICES) + .await + .unwrap(), + ) + }) + }); + } } fn random_access_parquet(c: &mut Criterion) { @@ -71,21 +75,23 @@ fn random_access_parquet(c: &mut Criterion) { .iter(|| async { black_box(take_parquet(&taxi_parquet, &INDICES).await.unwrap()) }) }); - group.bench_function("R2", |b| { - let r2_fs = Arc::new(AmazonS3Builder::from_env().build().unwrap()); - let r2_parquet_path = object_store::path::Path::from_url_path( - taxi_parquet.file_name().unwrap().to_str().unwrap(), - ) - .unwrap(); - - b.to_async(Runtime::new().unwrap()).iter(|| async { - black_box( - take_parquet_object_store(r2_fs.clone(), &r2_parquet_path, &INDICES) - .await - .unwrap(), + if env::var("AWS_ACCESS_KEY_ID").is_ok() { + group.bench_function("R2", |b| { + let r2_fs = Arc::new(AmazonS3Builder::from_env().build().unwrap()); + let r2_parquet_path = object_store::path::Path::from_url_path( + taxi_parquet.file_name().unwrap().to_str().unwrap(), ) - }) - }); + .unwrap(); + + b.to_async(Runtime::new().unwrap()).iter(|| async { + black_box( + take_parquet_object_store(r2_fs.clone(), &r2_parquet_path, &INDICES) + .await + .unwrap(), + ) + }) + }); + } } criterion_group!(benches, random_access_vortex, random_access_parquet); From f5c5ee8aeea0fd39708231e1a1265df43d84775f Mon Sep 17 00:00:00 2001 From: Adam Gutglick Date: Thu, 12 Sep 2024 15:26:08 +0100 Subject: [PATCH 08/11] . --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 9bdc98bd9d..214d8fa5ff 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -44,7 +44,7 @@ jobs: - name: Rust Test run: cargo test --workspace --all-features - name: Rust Bench as test - run: cargo bench --profile=test -- --test + run: cargo bench --benches --profile=test -- --test - name: Rust Build run: cargo build --all-features --all-targets From c85aa9b2ec86368249f91511884b3a976ec3ce6c Mon Sep 17 00:00:00 2001 From: Adam Gutglick Date: Thu, 12 Sep 2024 15:26:53 +0100 Subject: [PATCH 09/11] . --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 214d8fa5ff..123a609f2a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -44,7 +44,7 @@ jobs: - name: Rust Test run: cargo test --workspace --all-features - name: Rust Bench as test - run: cargo bench --benches --profile=test -- --test + run: cargo bench --benches -- --test - name: Rust Build run: cargo build --all-features --all-targets From 8f660d21270579fc961a9a4873307bca91e4c6e3 Mon Sep 17 00:00:00 2001 From: Adam Gutglick Date: Thu, 12 Sep 2024 16:00:28 +0100 Subject: [PATCH 10/11] Move to another action --- .github/workflows/bench-test.yml | 34 ++++++++++++++++++++++++++++++++ .github/workflows/ci.yml | 27 ++++++++++++++++--------- 2 files changed, 52 insertions(+), 9 deletions(-) create mode 100644 .github/workflows/bench-test.yml diff --git a/.github/workflows/bench-test.yml b/.github/workflows/bench-test.yml new file mode 100644 index 0000000000..176aa91bf4 --- /dev/null +++ b/.github/workflows/bench-test.yml @@ -0,0 +1,34 @@ +name: CI + +on: + push: + branches: [ "develop" ] + pull_request: { } + workflow_dispatch: { } + +permissions: + actions: read + contents: read + +env: + CARGO_TERM_COLOR: always + RUST_BACKTRACE: "1" + +jobs: + build: + name: 'build' + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: ./.github/actions/cleanup + + - uses: ./.github/actions/setup-rust + - uses: ./.github/actions/setup-python + # Required to run benchmarks + - name: Install DuckDB + uses: opt-nc/setup-duckdb-action@v1.0.9 + with: + version: v1.0.0 + + - name: Rust Bench as test + run: cargo bench --benches -- --test diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 123a609f2a..612d623658 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -12,7 +12,7 @@ permissions: env: CARGO_TERM_COLOR: always - RUST_BACKTRACE: "1" + RUST_BACKTRACE: 1 jobs: build: @@ -24,11 +24,6 @@ jobs: - uses: ./.github/actions/setup-rust - uses: ./.github/actions/setup-python - # Required to run benchmarks - - name: Install DuckDB - uses: opt-nc/setup-duckdb-action@v1.0.9 - with: - version: v1.0.0 - name: Python Lint - Format run: rye run ruff format --check . @@ -43,8 +38,6 @@ jobs: run: cargo doc --no-deps - name: Rust Test run: cargo test --workspace --all-features - - name: Rust Bench as test - run: cargo bench --benches -- --test - name: Rust Build run: cargo build --all-features --all-targets @@ -62,7 +55,6 @@ jobs: name: 'miri' runs-on: ubuntu-latest env: - RUST_BACKTRACE: 1 MIRIFLAGS: -Zmiri-strict-provenance -Zmiri-symbolic-alignment-check -Zmiri-backtrace=full steps: - uses: actions/checkout@v4 @@ -70,3 +62,20 @@ jobs: - uses: ./.github/actions/setup-rust - name: Run tests with Miri run: cargo miri test + + bench-test: + name: 'bench test' + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: ./.github/actions/cleanup + + - uses: ./.github/actions/setup-rust + - uses: ./.github/actions/setup-python + # Required to run benchmarks + - name: Install DuckDB + uses: opt-nc/setup-duckdb-action@v1.0.9 + with: + version: v1.0.0 + - name: Rust Bench as test + run: cargo bench --benches -- --test From 343a6f90187ed258585a7612e3d6a57ab69ee1af Mon Sep 17 00:00:00 2001 From: Adam Gutglick Date: Thu, 12 Sep 2024 16:01:54 +0100 Subject: [PATCH 11/11] oops --- .github/workflows/bench-test.yml | 34 -------------------------------- 1 file changed, 34 deletions(-) delete mode 100644 .github/workflows/bench-test.yml diff --git a/.github/workflows/bench-test.yml b/.github/workflows/bench-test.yml deleted file mode 100644 index 176aa91bf4..0000000000 --- a/.github/workflows/bench-test.yml +++ /dev/null @@ -1,34 +0,0 @@ -name: CI - -on: - push: - branches: [ "develop" ] - pull_request: { } - workflow_dispatch: { } - -permissions: - actions: read - contents: read - -env: - CARGO_TERM_COLOR: always - RUST_BACKTRACE: "1" - -jobs: - build: - name: 'build' - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - uses: ./.github/actions/cleanup - - - uses: ./.github/actions/setup-rust - - uses: ./.github/actions/setup-python - # Required to run benchmarks - - name: Install DuckDB - uses: opt-nc/setup-duckdb-action@v1.0.9 - with: - version: v1.0.0 - - - name: Rust Bench as test - run: cargo bench --benches -- --test