Skip to content

Commit 0ddc82e

Browse files
authored
Use < instead of = in case benchmark predicates, use Integers (#18144)
## Which issue does this PR close? - Followup to #18097 ## Rationale for this change The last benchmark was incorrectly essentially indentical to the second to last one. The actual predicate was using `=` instead of `<`. ## What changes are included in this PR? - Adjust the operator in the case predicates to `<` - Adds two additional benchmarks covering `case x when ...` ## Are these changes tested? Verified with debugger. ## Are there any user-facing changes? No
1 parent 9079bbd commit 0ddc82e

File tree

1 file changed

+39
-15
lines changed

1 file changed

+39
-15
lines changed

datafusion/physical-expr/benches/case_when.rs

Lines changed: 39 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,7 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18-
use arrow::array::builder::StringBuilder;
19-
use arrow::array::{Array, ArrayRef, Int32Array};
18+
use arrow::array::{Array, ArrayRef, Int32Array, Int32Builder};
2019
use arrow::datatypes::{Field, Schema};
2120
use arrow::record_batch::RecordBatch;
2221
use criterion::{black_box, criterion_group, criterion_main, Criterion};
@@ -37,25 +36,22 @@ fn make_x_cmp_y(
3736
/// Columns are named `c<i>` where `i` is the column index.
3837
///
3938
/// The minimum value for `column_count` is `3`.
40-
/// `c0` contains incrementing int32 values
41-
/// `c1` contains strings with one null inserted every 7 rows
42-
/// `c2` contains strings with one null inserted every 9 rows
43-
/// `c3` to `cn`, is present, contain unspecified int32 values
39+
/// `c1` contains incrementing int32 values
40+
/// `c2` contains int32 values in blocks of 1000 that increment by 1000
41+
/// `c3` contains int32 values with one null inserted every 9 rows
42+
/// `c4` to `cn`, is present, contain unspecified int32 values
4443
fn make_batch(row_count: usize, column_count: usize) -> RecordBatch {
4544
assert!(column_count >= 3);
4645

47-
let mut c2 = StringBuilder::new();
48-
let mut c3 = StringBuilder::new();
46+
let mut c2 = Int32Builder::new();
47+
let mut c3 = Int32Builder::new();
4948
for i in 0..row_count {
50-
if i % 7 == 0 {
51-
c2.append_null();
52-
} else {
53-
c2.append_value(format!("string {i}"));
54-
}
49+
c2.append_value(i as i32 / 1000 * 1000);
50+
5551
if i % 9 == 0 {
5652
c3.append_null();
5753
} else {
58-
c3.append_value(format!("other string {i}"));
54+
c3.append_value(i as i32);
5955
}
6056
}
6157
let c1 = Arc::new(Int32Array::from_iter_values(0..row_count as i32));
@@ -193,7 +189,7 @@ fn run_benchmarks(c: &mut Criterion, batch: &RecordBatch) {
193189

194190
// Many when/then branches where all but the first few are effectively unreachable
195191
c.bench_function(format!("case_when {}x{}: CASE WHEN c1 < 0 THEN 0 WHEN c1 < 1000 THEN 1 ... WHEN c1 < n * 1000 THEN n ELSE n + 1 END", batch.num_rows(), batch.num_columns()).as_str(), |b| {
196-
let when_thens = (0..batch.num_rows() as i32).map(|i| (make_x_cmp_y(&c1, Operator::Eq, i * 1000), lit(i))).collect();
192+
let when_thens = (0..batch.num_rows() as i32).map(|i| (make_x_cmp_y(&c1, Operator::Lt, i * 1000), lit(i))).collect();
197193
let expr = Arc::new(
198194
case(
199195
None,
@@ -204,6 +200,34 @@ fn run_benchmarks(c: &mut Criterion, batch: &RecordBatch) {
204200
);
205201
b.iter(|| black_box(expr.evaluate(black_box(batch)).unwrap()))
206202
});
203+
204+
// Many when/then branches where all are effectively reachable
205+
c.bench_function(format!("case_when {}x{}: CASE c1 WHEN 0 THEN 0 WHEN 1 THEN 1 ... WHEN n THEN n ELSE n + 1 END", batch.num_rows(), batch.num_columns()).as_str(), |b| {
206+
let when_thens = (0..batch.num_rows() as i32).map(|i| (lit(i), lit(i))).collect();
207+
let expr = Arc::new(
208+
case(
209+
Some(Arc::clone(&c1)),
210+
when_thens,
211+
Some(lit(batch.num_rows() as i32))
212+
)
213+
.unwrap(),
214+
);
215+
b.iter(|| black_box(expr.evaluate(black_box(batch)).unwrap()))
216+
});
217+
218+
// Many when/then branches where all but the first few are effectively unreachable
219+
c.bench_function(format!("case_when {}x{}: CASE c2 WHEN 0 THEN 0 WHEN 1000 THEN 1 ... WHEN n * 1000 THEN n ELSE n + 1 END", batch.num_rows(), batch.num_columns()).as_str(), |b| {
220+
let when_thens = (0..batch.num_rows() as i32).map(|i| (lit(i * 1000), lit(i))).collect();
221+
let expr = Arc::new(
222+
case(
223+
Some(Arc::clone(&c2)),
224+
when_thens,
225+
Some(lit(batch.num_rows() as i32))
226+
)
227+
.unwrap(),
228+
);
229+
b.iter(|| black_box(expr.evaluate(black_box(batch)).unwrap()))
230+
});
207231
}
208232

209233
criterion_group!(benches, criterion_benchmark);

0 commit comments

Comments
 (0)