From 1ccaa6d98bcf8ead9e25fdd358fc0e398b05c8f2 Mon Sep 17 00:00:00 2001
From: Ryan Johnson <scovich@gmail.com>
Date: Thu, 24 Oct 2024 13:04:51 -0700
Subject: [PATCH] Cleanup and remove redundant parquet stats skipping tests

---
 .../engine/parquet_stats_skipping/tests.rs    | 852 ++++--------------
 kernel/src/scan/data_skipping.rs              |  22 +-
 kernel/src/scan/data_skipping/tests.rs        |  99 +-
 3 files changed, 223 insertions(+), 750 deletions(-)
diff --git a/kernel/src/engine/parquet_stats_skipping/tests.rs b/kernel/src/engine/parquet_stats_skipping/tests.rs
index 069db64cb..5e4e4130b 100644
--- a/kernel/src/engine/parquet_stats_skipping/tests.rs
+++ b/kernel/src/engine/parquet_stats_skipping/tests.rs
@@ -1,9 +1,26 @@
 use super::*;
-use crate::expressions::{ArrayData, Expression, StructData};
+use crate::expressions::Expression as Expr;
 use crate::predicates::PredicateEvaluator;
-use crate::schema::ArrayType;
 use crate::DataType;
 
+const TRUE: Option<bool> = Some(true);
+const FALSE: Option<bool> = Some(false);
+const NULL: Option<bool> = None;
+
+macro_rules! expect_eq {
+    ( $expr: expr, $expect: expr, $fmt: literal ) => {
+        let expect = ($expect);
+        let result = ($expr);
+        assert!(
+            result == expect,
+            "Expected {} = {:?}, got {:?}",
+            format!($fmt),
+            expect,
+            result
+        );
+    };
+}
+
 struct UnimplementedTestFilter;
 impl ParquetStatsProvider for UnimplementedTestFilter {
     fn get_parquet_min_stat(&self, _col: &str, _data_type: &DataType) -> Option<Scalar> {
@@ -23,284 +40,88 @@ impl ParquetStatsProvider for UnimplementedTestFilter {
     }
 }
 
-struct JunctionTest {
-    inputs: &'static [Option<bool>],
-    expect_and: Option<bool>,
-    expect_or: Option<bool>,
-}
+/// Tests apply_variadic and apply_scalar
+#[test]
+fn test_junctions() {
+    use VariadicOperator::*;
 
-macro_rules! expect_eq {
-    ( $expr: expr, $expect: expr, $fmt: literal ) => {
-        let expect = ($expect);
-        let result = ($expr);
-        assert!(
-            result == expect,
-            "Expected {} = {:?}, got {:?}",
-            format!($fmt),
-            expect,
-            result
-        );
-    };
-}
-impl JunctionTest {
-    fn new(
-        inputs: &'static [Option<bool>],
-        expect_and: Option<bool>,
-        expect_or: Option<bool>,
-    ) -> Self {
-        Self {
-            inputs,
-            expect_and,
-            expect_or,
-        }
-    }
-    fn do_test(&self) {
-        use VariadicOperator::*;
-        let filter = UnimplementedTestFilter;
-        let inputs: Vec<_> = self
-            .inputs
+
+    let test_cases = &[
+        // Every combo of 0, 1 and 2 inputs
+        (&[] as &[Option<bool>], TRUE, FALSE),
+        (&[TRUE], TRUE, TRUE),
+        (&[FALSE], FALSE, FALSE),
+        (&[NULL], NULL, NULL),
+        (&[TRUE, TRUE], TRUE, TRUE),
+        (&[TRUE, FALSE], FALSE, TRUE),
+        (&[TRUE, NULL], NULL, TRUE),
+        (&[FALSE, TRUE], FALSE, TRUE),
+        (&[FALSE, FALSE], FALSE, FALSE),
+        (&[FALSE, NULL], FALSE, NULL),
+        (&[NULL, TRUE], NULL, TRUE),
+        (&[NULL, FALSE], FALSE, NULL),
+        (&[NULL, NULL], NULL, NULL),
+        // Every combo of 1:2
+        (&[TRUE, FALSE, FALSE], FALSE, TRUE),
+        (&[FALSE, TRUE, FALSE], FALSE, TRUE),
+        (&[FALSE, FALSE, TRUE], FALSE, TRUE),
+        (&[TRUE, NULL, NULL], NULL, TRUE),
+        (&[NULL, TRUE, NULL], NULL, TRUE),
+        (&[NULL, NULL, TRUE], NULL, TRUE),
+        (&[FALSE, TRUE, TRUE], FALSE, TRUE),
+        (&[TRUE, FALSE, TRUE], FALSE, TRUE),
+        (&[TRUE, TRUE, FALSE], FALSE, TRUE),
+        (&[FALSE, NULL, NULL], FALSE, NULL),
+        (&[NULL, FALSE, NULL], FALSE, NULL),
+        (&[NULL, NULL, FALSE], FALSE, NULL),
+        (&[NULL, TRUE, TRUE], NULL, TRUE),
+        (&[TRUE, NULL, TRUE], NULL, TRUE),
+        (&[TRUE, TRUE, NULL], NULL, TRUE),
+        (&[NULL, FALSE, FALSE], FALSE, NULL),
+        (&[FALSE, NULL, FALSE], FALSE, NULL),
+        (&[FALSE, FALSE, NULL], FALSE, NULL),
+        // Every unique ordering of 3
+        (&[TRUE, FALSE, NULL], FALSE, TRUE),
+        (&[TRUE, NULL, FALSE], FALSE, TRUE),
+        (&[FALSE, TRUE, NULL], FALSE, TRUE),
+        (&[FALSE, NULL, TRUE], FALSE, TRUE),
+        (&[NULL, TRUE, FALSE], FALSE, TRUE),
+        (&[NULL, FALSE, TRUE], FALSE, TRUE),
+    ];
+
+    let filter = UnimplementedTestFilter;
+    for (inputs, expect_and, expect_or) in test_cases {
+        let inputs: Vec<_> = inputs
             .iter()
             .map(|val| match val {
-                Some(v) => Expression::literal(v),
-                None => Expression::null_literal(DataType::BOOLEAN),
+                Some(v) => Expr::literal(v),
+                None => Expr::null_literal(DataType::BOOLEAN),
             })
             .collect();
 
         expect_eq!(
             filter.eval_variadic(And, &inputs, false),
-            self.expect_and,
+            *expect_and,
             "AND({inputs:?})"
         );
         expect_eq!(
             filter.eval_variadic(Or, &inputs, false),
-            self.expect_or,
+            *expect_or,
             "OR({inputs:?})"
         );
         expect_eq!(
             filter.eval_variadic(And, &inputs, true),
-            self.expect_and.map(|val| !val),
+            expect_and.map(|val| !val),
             "NOT(AND({inputs:?}))"
         );
         expect_eq!(
             filter.eval_variadic(Or, &inputs, true),
-            self.expect_or.map(|val| !val),
+            expect_or.map(|val| !val),
             "NOT(OR({inputs:?}))"
         );
     }
 }
 
-/// Tests apply_variadic and apply_scalar
-#[test]
-fn test_junctions() {
-    let test_case = JunctionTest::new;
-    const TRUE: Option<bool> = Some(true);
-    const FALSE: Option<bool> = Some(false);
-    const NULL: Option<bool> = None;
-    let test_cases = &[
-        // Every combo of 0, 1 and 2 inputs
-        test_case(&[] as &[Option<bool>], TRUE, FALSE),
-        test_case(&[TRUE], TRUE, TRUE),
-        test_case(&[FALSE], FALSE, FALSE),
-        test_case(&[NULL], NULL, NULL),
-        test_case(&[TRUE, TRUE], TRUE, TRUE),
-        test_case(&[TRUE, FALSE], FALSE, TRUE),
-        test_case(&[TRUE, NULL], NULL, TRUE),
-        test_case(&[FALSE, TRUE], FALSE, TRUE),
-        test_case(&[FALSE, FALSE], FALSE, FALSE),
-        test_case(&[FALSE, NULL], FALSE, NULL),
-        test_case(&[NULL, TRUE], NULL, TRUE),
-        test_case(&[NULL, FALSE], FALSE, NULL),
-        test_case(&[NULL, NULL], NULL, NULL),
-        // Every combo of 1:2
-        test_case(&[TRUE, FALSE, FALSE], FALSE, TRUE),
-        test_case(&[FALSE, TRUE, FALSE], FALSE, TRUE),
-        test_case(&[FALSE, FALSE, TRUE], FALSE, TRUE),
-        test_case(&[TRUE, NULL, NULL], NULL, TRUE),
-        test_case(&[NULL, TRUE, NULL], NULL, TRUE),
-        test_case(&[NULL, NULL, TRUE], NULL, TRUE),
-        test_case(&[FALSE, TRUE, TRUE], FALSE, TRUE),
-        test_case(&[TRUE, FALSE, TRUE], FALSE, TRUE),
-        test_case(&[TRUE, TRUE, FALSE], FALSE, TRUE),
-        test_case(&[FALSE, NULL, NULL], FALSE, NULL),
-        test_case(&[NULL, FALSE, NULL], FALSE, NULL),
-        test_case(&[NULL, NULL, FALSE], FALSE, NULL),
-        test_case(&[NULL, TRUE, TRUE], NULL, TRUE),
-        test_case(&[TRUE, NULL, TRUE], NULL, TRUE),
-        test_case(&[TRUE, TRUE, NULL], NULL, TRUE),
-        test_case(&[NULL, FALSE, FALSE], FALSE, NULL),
-        test_case(&[FALSE, NULL, FALSE], FALSE, NULL),
-        test_case(&[FALSE, FALSE, NULL], FALSE, NULL),
-        // Every unique ordering of 3
-        test_case(&[TRUE, FALSE, NULL], FALSE, TRUE),
-        test_case(&[TRUE, NULL, FALSE], FALSE, TRUE),
-        test_case(&[FALSE, TRUE, NULL], FALSE, TRUE),
-        test_case(&[FALSE, NULL, TRUE], FALSE, TRUE),
-        test_case(&[NULL, TRUE, FALSE], FALSE, TRUE),
-        test_case(&[NULL, FALSE, TRUE], FALSE, TRUE),
-    ];
-    for test_case in test_cases {
-        test_case.do_test();
-    }
-}
-
-// tests apply_binary_scalars
-#[test]
-fn test_binary_scalars() {
-    use Scalar::*;
-    let smaller_values = &[
-        Integer(1),
-        Long(1),
-        Short(1),
-        Byte(1),
-        Float(1.0),
-        Double(1.0),
-        String("1".into()),
-        Boolean(false),
-        Timestamp(1),
-        TimestampNtz(1),
-        Date(1),
-        Binary(vec![1]),
-        Decimal(1, 10, 10), // invalid value,
-        Null(DataType::LONG),
-        Struct(StructData::try_new(vec![], vec![]).unwrap()),
-        Array(ArrayData::new(
-            ArrayType::new(DataType::LONG, false),
-            vec![],
-        )),
-    ];
-    let larger_values = &[
-        Integer(10),
-        Long(10),
-        Short(10),
-        Byte(10),
-        Float(10.0),
-        Double(10.0),
-        String("10".into()),
-        Boolean(true),
-        Timestamp(10),
-        TimestampNtz(10),
-        Date(10),
-        Binary(vec![10]),
-        Decimal(10, 10, 10), // invalid value
-        Null(DataType::LONG),
-        Struct(StructData::try_new(vec![], vec![]).unwrap()),
-        Array(ArrayData::new(
-            ArrayType::new(DataType::LONG, false),
-            vec![],
-        )),
-    ];
-
-    // scalars of different types are always incomparable
-    use BinaryOperator::*;
-    let binary_ops = [
-        Equal,
-        NotEqual,
-        LessThan,
-        LessThanOrEqual,
-        GreaterThan,
-        GreaterThanOrEqual,
-    ];
-    let compare = |op, a, b| {
-        PredicateEvaluator::eval_binary_scalars(&UnimplementedTestFilter, op, a, b, false)
-    };
-    for (i, a) in smaller_values.iter().enumerate() {
-        for b in smaller_values.iter().skip(i + 1) {
-            for op in binary_ops {
-                let result = compare(op, a, b);
-                let a_type = a.data_type();
-                let b_type = b.data_type();
-                assert!(
-                    result.is_none(),
-                    "{a_type:?} should not be comparable to {b_type:?}"
-                );
-            }
-        }
-    }
-
-    let expect_if_comparable_type = |s: &_, expect| match s {
-        Null(_) | Decimal(..) | Struct(_) | Array(_) => None,
-        _ => Some(expect),
-    };
-
-    // Test same-type comparisons where a == b
-    for (a, b) in smaller_values.iter().zip(smaller_values.iter()) {
-        expect_eq!(
-            compare(Equal, a, b),
-            expect_if_comparable_type(a, true),
-            "{a:?} == {b:?}"
-        );
-
-        expect_eq!(
-            compare(NotEqual, a, b),
-            expect_if_comparable_type(a, false),
-            "{a:?} != {b:?}"
-        );
-
-        expect_eq!(
-            compare(LessThan, a, b),
-            expect_if_comparable_type(a, false),
-            "{a:?} < {b:?}"
-        );
-
-        expect_eq!(
-            compare(GreaterThan, a, b),
-            expect_if_comparable_type(a, false),
-            "{a:?} > {b:?}"
-        );
-
-        expect_eq!(
-            compare(LessThanOrEqual, a, b),
-            expect_if_comparable_type(a, true),
-            "{a:?} <= {b:?}"
-        );
-
-        expect_eq!(
-            compare(GreaterThanOrEqual, a, b),
-            expect_if_comparable_type(a, true),
-            "{a:?} >= {b:?}"
-        );
-    }
-
-    // Test same-type comparisons where a < b
-    for (a, b) in smaller_values.iter().zip(larger_values.iter()) {
-        expect_eq!(
-            compare(Equal, a, b),
-            expect_if_comparable_type(a, false),
-            "{a:?} == {b:?}"
-        );
-
-        expect_eq!(
-            compare(NotEqual, a, b),
-            expect_if_comparable_type(a, true),
-            "{a:?} != {b:?}"
-        );
-
-        expect_eq!(
-            compare(LessThan, a, b),
-            expect_if_comparable_type(a, true),
-            "{a:?} < {b:?}"
-        );
-
-        expect_eq!(
-            compare(GreaterThan, a, b),
-            expect_if_comparable_type(a, false),
-            "{a:?} > {b:?}"
-        );
-
-        expect_eq!(
-            compare(LessThanOrEqual, a, b),
-            expect_if_comparable_type(a, true),
-            "{a:?} <= {b:?}"
-        );
-
-        expect_eq!(
-            compare(GreaterThanOrEqual, a, b),
-            expect_if_comparable_type(a, false),
-            "{a:?} >= {b:?}"
-        );
-    }
-}
-
 struct MinMaxTestFilter {
     min: Option<Scalar>,
     max: Option<Scalar>,
@@ -333,376 +154,53 @@ impl ParquetStatsProvider for MinMaxTestFilter {
     }
 }
 
-#[test]
-fn test_binary_eq_ne() {
-    use BinaryOperator::*;
-
-    const LO: Scalar = Scalar::Long(1);
-    const MID: Scalar = Scalar::Long(10);
-    const HI: Scalar = Scalar::Long(100);
-    let col = &Expression::column("x");
-
-    for inverted in [false, true] {
-        // negative test -- mismatched column type
-        expect_eq!(
-            MinMaxTestFilter::new(MID.into(), MID.into()).eval_binary(
-                Equal,
-                col,
-                &Expression::literal("10"),
-                inverted,
-            ),
-            None,
-            "{col} == '10' (min: {MID}, max: {MID}, inverted: {inverted})"
-        );
-
-        // quick test for literal-literal comparisons
-        expect_eq!(
-            MinMaxTestFilter::new(MID.into(), MID.into()).eval_binary(
-                Equal,
-                &MID.into(),
-                &MID.into(),
-                inverted,
-            ),
-            Some(!inverted),
-            "{MID} == {MID} (min: {MID}, max: {MID}, inverted: {inverted})"
-        );
-
-        // quick test for literal-column comparisons
-        expect_eq!(
-            MinMaxTestFilter::new(MID.into(), MID.into()).eval_binary(
-                Equal,
-                &MID.into(),
-                col,
-                inverted,
-            ),
-            Some(!inverted),
-            "{MID} == {col} (min: {MID}, max: {MID}, inverted: {inverted})"
-        );
-
-        expect_eq!(
-            MinMaxTestFilter::new(MID.into(), MID.into()).eval_binary(
-                Equal,
-                col,
-                &MID.into(),
-                inverted,
-            ),
-            Some(!inverted),
-            "{col} == {MID} (min: {MID}, max: {MID}, inverted: {inverted})"
-        );
-
-        expect_eq!(
-            MinMaxTestFilter::new(LO.into(), HI.into()).eval_binary(
-                Equal,
-                col,
-                &MID.into(),
-                inverted,
-            ),
-            Some(true), // min..max range includes both EQ and NE
-            "{col} == {MID} (min: {LO}, max: {HI}, inverted: {inverted})"
-        );
-
-        expect_eq!(
-            MinMaxTestFilter::new(LO.into(), MID.into()).eval_binary(
-                Equal,
-                col,
-                &HI.into(),
-                inverted,
-            ),
-            Some(inverted),
-            "{col} == {HI} (min: {LO}, max: {MID}, inverted: {inverted})"
-        );
-
-        expect_eq!(
-            MinMaxTestFilter::new(MID.into(), HI.into()).eval_binary(
-                Equal,
-                col,
-                &LO.into(),
-                inverted,
-            ),
-            Some(inverted),
-            "{col} == {LO} (min: {MID}, max: {HI}, inverted: {inverted})"
-        );
-
-        // negative test -- mismatched column type
-        expect_eq!(
-            MinMaxTestFilter::new(MID.into(), MID.into()).eval_binary(
-                NotEqual,
-                col,
-                &Expression::literal("10"),
-                inverted,
-            ),
-            None,
-            "{col} != '10' (min: {MID}, max: {MID}, inverted: {inverted})"
-        );
-
-        expect_eq!(
-            MinMaxTestFilter::new(MID.into(), MID.into()).eval_binary(
-                NotEqual,
-                col,
-                &MID.into(),
-                inverted,
-            ),
-            Some(inverted),
-            "{col} != {MID} (min: {MID}, max: {MID}, inverted: {inverted})"
-        );
-
-        expect_eq!(
-            MinMaxTestFilter::new(LO.into(), HI.into()).eval_binary(
-                NotEqual,
-                col,
-                &MID.into(),
-                inverted,
-            ),
-            Some(true), // min..max range includes both EQ and NE
-            "{col} != {MID} (min: {LO}, max: {HI}, inverted: {inverted})"
-        );
-
-        expect_eq!(
-            MinMaxTestFilter::new(LO.into(), MID.into()).eval_binary(
-                NotEqual,
-                col,
-                &HI.into(),
-                inverted,
-            ),
-            Some(!inverted),
-            "{col} != {HI} (min: {LO}, max: {MID}, inverted: {inverted})"
-        );
-
-        expect_eq!(
-            MinMaxTestFilter::new(MID.into(), HI.into()).eval_binary(
-                NotEqual,
-                col,
-                &LO.into(),
-                inverted,
-            ),
-            Some(!inverted),
-            "{col} != {LO} (min: {MID}, max: {HI}, inverted: {inverted})"
-        );
-    }
-}
 
 #[test]
-fn test_binary_lt_ge() {
-    use BinaryOperator::*;
-
-    const LO: Scalar = Scalar::Long(1);
-    const MID: Scalar = Scalar::Long(10);
-    const HI: Scalar = Scalar::Long(100);
-    let col = &Expression::column("x");
-
-    for inverted in [false, true] {
-        expect_eq!(
-            MinMaxTestFilter::new(MID.into(), MID.into()).eval_binary(
-                LessThan,
-                col,
-                &MID.into(),
-                inverted,
-            ),
-            Some(inverted),
-            "{col} < {MID} (min: {MID}, max: {MID}, inverted: {inverted})"
-        );
-
-        expect_eq!(
-            MinMaxTestFilter::new(LO.into(), HI.into()).eval_binary(
-                LessThan,
-                col,
-                &MID.into(),
-                inverted,
-            ),
-            Some(true), // min..max range includes both LT and GE
-            "{col} < {MID} (min: {LO}, max: {HI}, inverted: {inverted})"
-        );
-
-        expect_eq!(
-            MinMaxTestFilter::new(LO.into(), MID.into()).eval_binary(
-                LessThan,
-                col,
-                &HI.into(),
-                inverted,
-            ),
-            Some(!inverted),
-            "{col} < {HI} (min: {LO}, max: {MID}, inverted: {inverted})"
-        );
-
-        expect_eq!(
-            MinMaxTestFilter::new(MID.into(), HI.into()).eval_binary(
-                LessThan,
-                col,
-                &LO.into(),
-                inverted,
-            ),
-            Some(inverted),
-            "{col} < {LO} (min: {MID}, max: {HI}, inverted: {inverted})"
-        );
-
-        expect_eq!(
-            MinMaxTestFilter::new(MID.into(), MID.into()).eval_binary(
-                GreaterThanOrEqual,
-                col,
-                &MID.into(),
-                inverted,
-            ),
-            Some(!inverted),
-            "{col} >= {MID} (min: {MID}, max: {MID}, inverted: {inverted})"
-        );
-
-        expect_eq!(
-            MinMaxTestFilter::new(LO.into(), HI.into()).eval_binary(
-                GreaterThanOrEqual,
-                col,
-                &MID.into(),
-                inverted,
-            ),
-            Some(true), // min..max range includes both EQ and NE
-            "{col} >= {MID} (min: {LO}, max: {HI}, inverted: {inverted})"
-        );
-
-        expect_eq!(
-            MinMaxTestFilter::new(LO.into(), MID.into()).eval_binary(
-                GreaterThanOrEqual,
-                col,
-                &HI.into(),
-                inverted,
-            ),
-            Some(inverted),
-            "{col} >= {HI} (min: {LO}, max: {MID}, inverted: {inverted})"
-        );
-
-        expect_eq!(
-            MinMaxTestFilter::new(MID.into(), HI.into()).eval_binary(
-                GreaterThanOrEqual,
-                col,
-                &LO.into(),
-                inverted,
-            ),
-            Some(!inverted),
-            "{col} >= {LO} (min: {MID}, max: {HI}, inverted: {inverted})"
-        );
-    }
-}
-
-#[test]
-fn test_binary_le_gt() {
-    use BinaryOperator::*;
-
-    const LO: Scalar = Scalar::Long(1);
-    const MID: Scalar = Scalar::Long(10);
-    const HI: Scalar = Scalar::Long(100);
-    let col = &Expression::column("x");
-
-    for inverted in [false, true] {
-        // negative test -- mismatched column type
-        expect_eq!(
-            MinMaxTestFilter::new(MID.into(), MID.into()).eval_binary(
-                LessThanOrEqual,
-                col,
-                &Expression::literal("10"),
-                inverted,
-            ),
-            None,
-            "{col} <= '10' (min: {MID}, max: {MID}, inverted: {inverted})"
-        );
-
-        expect_eq!(
-            MinMaxTestFilter::new(MID.into(), MID.into()).eval_binary(
-                LessThanOrEqual,
-                col,
-                &MID.into(),
-                inverted,
-            ),
-            Some(!inverted),
-            "{col} <= {MID} (min: {MID}, max: {MID}, inverted: {inverted})"
-        );
+fn test_eval_binary_comparisons() {
+    let col = &Expr::column("x");
+    let five = &Scalar::from(5);
+    let ten = &Scalar::from(10);
+    let fifteen = &Scalar::from(15);
+    let null = &Scalar::Null(DataType::INTEGER);
+
+    let expressions = [
+        Expr::lt(col.clone(), ten.clone()),
+        Expr::le(col.clone(), ten.clone()),
+        Expr::eq(col.clone(), ten.clone()),
+        Expr::ne(col.clone(), ten.clone()),
+        Expr::gt(col.clone(), ten.clone()),
+        Expr::ge(col.clone(), ten.clone()),
+    ];
 
-        expect_eq!(
-            MinMaxTestFilter::new(LO.into(), HI.into()).eval_binary(
-                LessThanOrEqual,
-                col,
-                &MID.into(),
-                inverted,
-            ),
-            Some(true), // min..max range includes both LT and GE
-            "{col} <= {MID} (min: {LO}, max: {HI}, inverted: {inverted})"
-        );
+    let do_test = |min: &Scalar, max: &Scalar, expected: &[Option<bool>]| {
+        let filter = MinMaxTestFilter::new(Some(min.clone()), Some(max.clone()));
+        for (expr, expect) in expressions.iter().zip(expected.iter()) {
+            expect_eq!(
+                filter.eval_expr(&expr, false),
+                *expect,
+                "{expr:#?} with [{min}..{max}]"
+            );
+        }
+    };
 
-        expect_eq!(
-            MinMaxTestFilter::new(LO.into(), MID.into()).eval_binary(
-                LessThanOrEqual,
-                col,
-                &HI.into(),
-                inverted,
-            ),
-            Some(!inverted),
-            "{col} <= {HI} (min: {LO}, max: {MID}, inverted: {inverted})"
-        );
+    // value < min = max (15..15 = 10, 15..15 <= 10, etc)
+    do_test(fifteen, fifteen, &[FALSE, FALSE, FALSE, TRUE, TRUE, TRUE]);
 
-        expect_eq!(
-            MinMaxTestFilter::new(MID.into(), HI.into()).eval_binary(
-                LessThanOrEqual,
-                col,
-                &LO.into(),
-                inverted,
-            ),
-            Some(inverted),
-            "{col} <= {LO} (min: {MID}, max: {HI}, inverted: {inverted})"
-        );
+    // min = max = value (10..10 = 10, 10..10 <= 10, etc)
+    //
+    // NOTE: missing min or max stat produces NULL output if the expression needed it.
+    do_test(ten, ten, &[FALSE, TRUE, TRUE, FALSE, FALSE, TRUE]);
+    do_test(null, ten, &[NULL, NULL, NULL, NULL, FALSE, TRUE]);
+    do_test(ten, null, &[FALSE, TRUE, NULL, NULL, NULL, NULL]);
 
-        // negative test -- mismatched column type
-        expect_eq!(
-            MinMaxTestFilter::new(MID.into(), MID.into()).eval_binary(
-                GreaterThan,
-                col,
-                &Expression::literal("10"),
-                inverted,
-            ),
-            None,
-            "{col} > '10' (min: {MID}, max: {MID}, inverted: {inverted})"
-        );
+    // min = max < value (5..5 = 10, 5..5 <= 10, etc)
+    do_test(five, five, &[TRUE, TRUE, FALSE, TRUE, FALSE, FALSE]);
 
-        expect_eq!(
-            MinMaxTestFilter::new(MID.into(), MID.into()).eval_binary(
-                GreaterThan,
-                col,
-                &MID.into(),
-                inverted,
-            ),
-            Some(inverted),
-            "{col} > {MID} (min: {MID}, max: {MID}, inverted: {inverted})"
-        );
+    // value = min < max (5..15 = 10, 5..15 <= 10, etc)
+    do_test(ten, fifteen, &[FALSE, TRUE, TRUE, TRUE, TRUE, TRUE]);
 
-        expect_eq!(
-            MinMaxTestFilter::new(LO.into(), HI.into()).eval_binary(
-                GreaterThan,
-                col,
-                &MID.into(),
-                inverted,
-            ),
-            Some(true), // min..max range includes both EQ and NE
-            "{col} > {MID} (min: {LO}, max: {HI}, inverted: {inverted})"
-        );
-
-        expect_eq!(
-            MinMaxTestFilter::new(LO.into(), MID.into()).eval_binary(
-                GreaterThan,
-                col,
-                &HI.into(),
-                inverted,
-            ),
-            Some(inverted),
-            "{col} > {HI} (min: {LO}, max: {MID}, inverted: {inverted})"
-        );
-
-        expect_eq!(
-            MinMaxTestFilter::new(MID.into(), HI.into()).eval_binary(
-                GreaterThan,
-                col,
-                &LO.into(),
-                inverted,
-            ),
-            Some(!inverted),
-            "{col} > {LO} (min: {MID}, max: {HI}, inverted: {inverted})"
-        );
-    }
+    // min < value < max (5..15 = 10, 5..15 <= 10, etc)
+    do_test(five, fifteen, &[TRUE, TRUE, TRUE, TRUE, TRUE, TRUE]);
 }
 
 struct NullCountTestFilter {
@@ -736,59 +234,29 @@ impl ParquetStatsProvider for NullCountTestFilter {
 }
 
 #[test]
-fn test_not_null() {
-    use UnaryOperator::IsNull;
-
-    let col = &Expression::column("x");
-    for inverted in [false, true] {
-        expect_eq!(
-            NullCountTestFilter::new(None, 10).eval_unary(IsNull, col, inverted),
-            None,
-            "{col} IS NULL (nullcount: None, rowcount: 10, inverted: {inverted})"
-        );
-
-        expect_eq!(
-            NullCountTestFilter::new(Some(0), 10).eval_unary(IsNull, col, inverted),
-            Some(inverted),
-            "{col} IS NULL (nullcount: 0, rowcount: 10, inverted: {inverted})"
-        );
+fn test_eval_is_null() {
+    let col = &Expr::column("x");
+    let expressions = [Expr::is_null(col.clone()), !Expr::is_null(col.clone())];
+
+    let do_test = |nullcount: i64, expected: &[Option<bool>]| {
+        let filter = NullCountTestFilter::new(Some(nullcount), 2);
+        for (expr, expect) in expressions.iter().zip(expected) {
+            expect_eq!(
+                filter.eval_expr(expr, false),
+                *expect,
+                "{expr:#?} ({nullcount} nulls)"
+            );
+        }
+    };
 
-        expect_eq!(
-            NullCountTestFilter::new(Some(5), 10).eval_unary(IsNull, col, inverted),
-            Some(true),
-            "{col} IS NULL (nullcount: 5, rowcount: 10, inverted: {inverted})"
-        );
+    // no nulls
+    do_test(0, &[FALSE, TRUE]);
 
-        expect_eq!(
-            NullCountTestFilter::new(Some(10), 10).eval_unary(IsNull, col, inverted),
-            Some(!inverted),
-            "{col} IS NULL (nullcount: 10, rowcount: 10, inverted: {inverted})"
-        );
-    }
-}
+    // some nulls
+    do_test(1, &[TRUE, TRUE]);
 
-#[test]
-fn test_bool_col() {
-    use Scalar::Boolean;
-    const TRUE: Scalar = Boolean(true);
-    const FALSE: Scalar = Boolean(false);
-    for inverted in [false, true] {
-        expect_eq!(
-            MinMaxTestFilter::new(TRUE.into(), TRUE.into()).eval_column("x", inverted),
-            Some(!inverted),
-            "x as boolean (min: TRUE, max: TRUE, inverted: {inverted})"
-        );
-        expect_eq!(
-            MinMaxTestFilter::new(FALSE.into(), TRUE.into()).eval_column("x", inverted),
-            Some(true),
-            "x as boolean (min: FALSE, max: TRUE, inverted: {inverted})"
-        );
-        expect_eq!(
-            MinMaxTestFilter::new(FALSE.into(), FALSE.into()).eval_column("x", inverted),
-            Some(inverted),
-            "x as boolean (min: FALSE, max: FALSE, inverted: {inverted})"
-        );
-    }
+    // all nulls
+    do_test(2, &[TRUE, FALSE]);
 }
 
 struct AllNullTestFilter;
@@ -812,44 +280,44 @@ impl ParquetStatsProvider for AllNullTestFilter {
 
 #[test]
 fn test_sql_where() {
-    let col = &Expression::column("x");
-    let val = &Expression::literal(1);
-    const NULL: Expression = Expression::Literal(Scalar::Null(DataType::BOOLEAN));
-    const FALSE: Expression = Expression::Literal(Scalar::Boolean(false));
-    const TRUE: Expression = Expression::Literal(Scalar::Boolean(true));
+    let col = &Expr::column("x");
+    let val = &Expr::literal(1);
+    const NULL: Expr = Expr::Literal(Scalar::Null(DataType::BOOLEAN));
+    const FALSE: Expr = Expr::Literal(Scalar::Boolean(false));
+    const TRUE: Expr = Expr::Literal(Scalar::Boolean(true));
 
     // Basic sanity checks
     expect_eq!(AllNullTestFilter.eval_sql_where(val), None, "WHERE {val}");
     expect_eq!(AllNullTestFilter.eval_sql_where(col), None, "WHERE {col}");
     expect_eq!(
-        AllNullTestFilter.eval_sql_where(&Expression::is_null(col.clone())),
+        AllNullTestFilter.eval_sql_where(&Expr::is_null(col.clone())),
         Some(true), // No injected NULL checks
         "WHERE {col} IS NULL"
     );
     expect_eq!(
-        AllNullTestFilter.eval_sql_where(&Expression::lt(TRUE, FALSE)),
+        AllNullTestFilter.eval_sql_where(&Expr::lt(TRUE, FALSE)),
         Some(false), // Injected NULL checks don't short circuit when inputs are NOT NULL
         "WHERE {TRUE} < {FALSE}"
     );
 
     // Constrast normal vs SQL WHERE semantics - comparison
     expect_eq!(
-        AllNullTestFilter.eval_expr(&Expression::lt(col.clone(), val.clone()), false),
+        AllNullTestFilter.eval_expr(&Expr::lt(col.clone(), val.clone()), false),
         None,
         "{col} < {val}"
     );
     expect_eq!(
-        AllNullTestFilter.eval_sql_where(&Expression::lt(col.clone(), val.clone())),
+        AllNullTestFilter.eval_sql_where(&Expr::lt(col.clone(), val.clone())),
         Some(false),
         "WHERE {col} < {val}"
     );
     expect_eq!(
-        AllNullTestFilter.eval_expr(&Expression::lt(val.clone(), col.clone()), false),
+        AllNullTestFilter.eval_expr(&Expr::lt(val.clone(), col.clone()), false),
         None,
         "{val} < {col}"
     );
     expect_eq!(
-        AllNullTestFilter.eval_sql_where(&Expression::lt(val.clone(), col.clone())),
+        AllNullTestFilter.eval_sql_where(&Expr::lt(val.clone(), col.clone())),
         Some(false),
         "WHERE {val} < {col}"
     );
@@ -857,16 +325,16 @@ fn test_sql_where() {
     // Constrast normal vs SQL WHERE semantics - comparison inside AND
     expect_eq!(
         AllNullTestFilter.eval_expr(
-            &Expression::and(NULL, Expression::lt(col.clone(), val.clone())),
+            &Expr::and(NULL, Expr::lt(col.clone(), val.clone())),
             false
         ),
         None,
         "{NULL} AND {col} < {val}"
     );
     expect_eq!(
-        AllNullTestFilter.eval_sql_where(&Expression::and(
+        AllNullTestFilter.eval_sql_where(&Expr::and(
             NULL,
-            Expression::lt(col.clone(), val.clone()),
+            Expr::lt(col.clone(), val.clone()),
         )),
         Some(false),
         "WHERE {NULL} AND {col} < {val}"
@@ -874,16 +342,16 @@ fn test_sql_where() {
 
     expect_eq!(
         AllNullTestFilter.eval_expr(
-            &Expression::and(TRUE, Expression::lt(col.clone(), val.clone())),
+            &Expr::and(TRUE, Expr::lt(col.clone(), val.clone())),
             false
         ),
         None, // NULL (from the NULL check) is stronger than TRUE
         "{TRUE} AND {col} < {val}"
     );
     expect_eq!(
-        AllNullTestFilter.eval_sql_where(&Expression::and(
+        AllNullTestFilter.eval_sql_where(&Expr::and(
             TRUE,
-            Expression::lt(col.clone(), val.clone()),
+            Expr::lt(col.clone(), val.clone()),
         )),
         Some(false), // FALSE (from the NULL check) is stronger than TRUE
         "WHERE {TRUE} AND {col} < {val}"
@@ -892,9 +360,9 @@ fn test_sql_where() {
     // Contrast normal vs. SQL WHERE semantics - comparison inside AND inside AND
     expect_eq!(
         AllNullTestFilter.eval_expr(
-            &Expression::and(
+            &Expr::and(
                 TRUE,
-                Expression::and(NULL, Expression::lt(col.clone(), val.clone())),
+                Expr::and(NULL, Expr::lt(col.clone(), val.clone())),
             ),
             false,
         ),
@@ -902,9 +370,9 @@ fn test_sql_where() {
         "{TRUE} AND ({NULL} AND {col} < {val})"
     );
     expect_eq!(
-        AllNullTestFilter.eval_sql_where(&Expression::and(
+        AllNullTestFilter.eval_sql_where(&Expr::and(
             TRUE,
-            Expression::and(NULL, Expression::lt(col.clone(), val.clone())),
+            Expr::and(NULL, Expr::lt(col.clone(), val.clone())),
         )),
         Some(false),
         "WHERE {TRUE} AND ({NULL} AND {col} < {val})"
@@ -913,9 +381,9 @@ fn test_sql_where() {
     // Semantics are the same for comparison inside OR inside AND
     expect_eq!(
         AllNullTestFilter.eval_expr(
-            &Expression::or(
+            &Expr::or(
                 FALSE,
-                Expression::and(NULL, Expression::lt(col.clone(), val.clone())),
+                Expr::and(NULL, Expr::lt(col.clone(), val.clone())),
             ),
             false,
         ),
@@ -923,9 +391,9 @@ fn test_sql_where() {
         "{FALSE} OR ({NULL} AND {col} < {val})"
     );
     expect_eq!(
-        AllNullTestFilter.eval_sql_where(&Expression::or(
+        AllNullTestFilter.eval_sql_where(&Expr::or(
             FALSE,
-            Expression::and(NULL, Expression::lt(col.clone(), val.clone())),
+            Expr::and(NULL, Expr::lt(col.clone(), val.clone())),
         )),
         None,
         "WHERE {FALSE} OR ({NULL} AND {col} < {val})"
diff --git a/kernel/src/scan/data_skipping.rs b/kernel/src/scan/data_skipping.rs
index 094c215b4..34ca7e9ed 100644
--- a/kernel/src/scan/data_skipping.rs
+++ b/kernel/src/scan/data_skipping.rs
@@ -260,13 +260,21 @@ impl DataSkippingPredicateEvaluator for DataSkippingPredicateCreator {
         if inverted {
             op = op.invert();
         }
-        // NOTE: By flattening, AND can produce TRUE where NULL would otherwise be expected. But
-        // this is for data skipping, which only cares about FALSE. So we accept and keep it simple.
-        let exprs = exprs.into_iter();
-        let exprs: Vec<_> = match op {
-            VariadicOperator::And => exprs.flatten().collect(),
-            VariadicOperator::Or => exprs.collect::<Option<_>>()?,
-        };
+        // NOTE: We can potentially see a LOT of NULL inputs in a big WHERE clause with lots of
+        // unsupported data skipping operations. We can't "just" flatten them all away for AND,
+        // because could produce TRUE where NULL would otherwise be expected. Similarly, we don't
+        // want to "just" try_collect inputs for OR, because that can cause OR to produce NULL where
+        // FALSE would otherwise be expected. So, we filter out all nulls except the first,
+        // observing that one NULL is enough to produce the correct behavior during predicate eval.
+        let mut have_null = false;
+        let exprs: Vec<_> = exprs.into_iter().flat_map(|e| match e {
+            Some(expr) => Some(expr),
+            None if have_null => None,
+            None => {
+                have_null = true;
+                Some(Expr::null_literal(DataType::BOOLEAN))
+            }
+        }).collect();
         Some(Expr::variadic(op, exprs))
     }
 }
diff --git a/kernel/src/scan/data_skipping/tests.rs b/kernel/src/scan/data_skipping/tests.rs
index 175ea0b72..018eb2f76 100644
--- a/kernel/src/scan/data_skipping/tests.rs
+++ b/kernel/src/scan/data_skipping/tests.rs
@@ -105,52 +105,49 @@ fn test_eval_binary_comparisons() {
 
 #[test]
 fn test_eval_variadic() {
-    // NOTE: The data skipping logic can produce TRUE instead of NULL in some cases. This is
-    // harmless, because data skipping only cares about FALSE (NULL and TRUE are treated the same).
     let test_cases = &[
-        // Every combo of 0, 1 and 2 inputs
-        (&[] as &[Option<bool>], TRUE, FALSE, FALSE, TRUE),
-        (&[TRUE], TRUE, TRUE, FALSE, FALSE),
-        (&[FALSE], FALSE, FALSE, TRUE, TRUE),
-        (&[NULL], TRUE, NULL, NULL, TRUE), // NULL -> TRUE
-        (&[TRUE, TRUE], TRUE, TRUE, FALSE, FALSE),
-        (&[TRUE, FALSE], FALSE, TRUE, TRUE, FALSE),
-        (&[TRUE, NULL], TRUE, NULL, NULL, FALSE), // NULL -> TRUE
-        (&[FALSE, TRUE], FALSE, TRUE, TRUE, FALSE),
-        (&[FALSE, FALSE], FALSE, FALSE, TRUE, TRUE),
-        (&[FALSE, NULL], FALSE, NULL, NULL, TRUE), // NULL -> TRUE
-        (&[NULL, TRUE], TRUE, NULL, NULL, FALSE),  // NULL -> TRUE
-        (&[NULL, FALSE], FALSE, NULL, NULL, TRUE), // NULL -> TRUE
-        (&[NULL, NULL], TRUE, NULL, NULL, TRUE),   // NULL -> TRUE
+        (&[]as &[Option<bool>], TRUE, FALSE),
+        (&[TRUE], TRUE, TRUE),
+        (&[FALSE], FALSE, FALSE),
+        (&[NULL], NULL, NULL),
+        (&[TRUE, TRUE], TRUE, TRUE),
+        (&[TRUE, FALSE], FALSE, TRUE),
+        (&[TRUE, NULL], NULL, TRUE),
+        (&[FALSE, TRUE], FALSE, TRUE),
+        (&[FALSE, FALSE], FALSE, FALSE),
+        (&[FALSE, NULL], FALSE, NULL),
+        (&[NULL, TRUE], NULL, TRUE),
+        (&[NULL, FALSE], FALSE, NULL),
+        (&[NULL, NULL], NULL, NULL),
         // Every combo of 1:2
-        (&[TRUE, FALSE, FALSE], FALSE, TRUE, TRUE, FALSE),
-        (&[FALSE, TRUE, FALSE], FALSE, TRUE, TRUE, FALSE),
-        (&[FALSE, FALSE, TRUE], FALSE, TRUE, TRUE, FALSE),
-        (&[TRUE, NULL, NULL], TRUE, NULL, NULL, FALSE), // NULL -> TRUE
-        (&[NULL, TRUE, NULL], TRUE, NULL, NULL, FALSE), // NULL -> TRUE
-        (&[NULL, NULL, TRUE], TRUE, NULL, NULL, FALSE), // NULL -> TRUE
-        (&[FALSE, TRUE, TRUE], FALSE, TRUE, TRUE, FALSE),
-        (&[TRUE, FALSE, TRUE], FALSE, TRUE, TRUE, FALSE),
-        (&[TRUE, TRUE, FALSE], FALSE, TRUE, TRUE, FALSE),
-        (&[FALSE, NULL, NULL], FALSE, NULL, NULL, TRUE), // NULL -> TRUE
-        (&[NULL, FALSE, NULL], FALSE, NULL, NULL, TRUE), // NULL -> TRUE
-        (&[NULL, NULL, FALSE], FALSE, NULL, NULL, TRUE), // NULL -> TRUE
-        (&[NULL, TRUE, TRUE], TRUE, NULL, NULL, FALSE),  // NULL -> TRUE
-        (&[TRUE, NULL, TRUE], TRUE, NULL, NULL, FALSE),  // NULL -> TRUE
-        (&[TRUE, TRUE, NULL], TRUE, NULL, NULL, FALSE),  // NULL -> TRUE
-        (&[NULL, FALSE, FALSE], FALSE, NULL, NULL, TRUE), // NULL -> TRUE
-        (&[FALSE, NULL, FALSE], FALSE, NULL, NULL, TRUE), // NULL -> TRUE
-        (&[FALSE, FALSE, NULL], FALSE, NULL, NULL, TRUE), // NULL -> TRUE
+        (&[TRUE, FALSE, FALSE], FALSE, TRUE),
+        (&[FALSE, TRUE, FALSE], FALSE, TRUE),
+        (&[FALSE, FALSE, TRUE], FALSE, TRUE),
+        (&[TRUE, NULL, NULL], NULL, TRUE),
+        (&[NULL, TRUE, NULL], NULL, TRUE),
+        (&[NULL, NULL, TRUE], NULL, TRUE),
+        (&[FALSE, TRUE, TRUE], FALSE, TRUE),
+        (&[TRUE, FALSE, TRUE], FALSE, TRUE),
+        (&[TRUE, TRUE, FALSE], FALSE, TRUE),
+        (&[FALSE, NULL, NULL], FALSE, NULL),
+        (&[NULL, FALSE, NULL], FALSE, NULL),
+        (&[NULL, NULL, FALSE], FALSE, NULL),
+        (&[NULL, TRUE, TRUE], NULL, TRUE),
+        (&[TRUE, NULL, TRUE], NULL, TRUE),
+        (&[TRUE, TRUE, NULL], NULL, TRUE),
+        (&[NULL, FALSE, FALSE], FALSE, NULL),
+        (&[FALSE, NULL, FALSE], FALSE, NULL),
+        (&[FALSE, FALSE, NULL], FALSE, NULL),
         // Every unique ordering of 3
-        (&[TRUE, FALSE, NULL], FALSE, NULL, NULL, FALSE),
-        (&[TRUE, NULL, FALSE], FALSE, NULL, NULL, FALSE),
-        (&[FALSE, TRUE, NULL], FALSE, NULL, NULL, FALSE),
-        (&[FALSE, NULL, TRUE], FALSE, NULL, NULL, FALSE),
-        (&[NULL, TRUE, FALSE], FALSE, NULL, NULL, FALSE),
-        (&[NULL, FALSE, TRUE], FALSE, NULL, NULL, FALSE),
+        (&[TRUE, FALSE, NULL], FALSE, TRUE),
+        (&[TRUE, NULL, FALSE], FALSE, TRUE),
+        (&[FALSE, TRUE, NULL], FALSE, TRUE),
+        (&[FALSE, NULL, TRUE], FALSE, TRUE),
+        (&[NULL, TRUE, FALSE], FALSE, TRUE),
+        (&[NULL, FALSE, TRUE], FALSE, TRUE),
     ];
     let filter = DefaultPredicateEvaluator::from(UnimplementedColumnResolver);
-    for (inputs, expect_and, expect_or, expect_not_and, expect_not_or) in test_cases {
+    for (inputs, expect_and, expect_or) in test_cases {
         let inputs: Vec<_> = inputs
             .iter()
             .map(|val| match val {
@@ -160,34 +157,34 @@ fn test_eval_variadic() {
             .collect();
 
         let expr = Expr::and_from(inputs.clone());
-        let pred = as_data_skipping_predicate(&expr, false);
+        let pred = as_data_skipping_predicate(&expr, false).unwrap();
         expect_eq!(
-            pred.and_then(|p| filter.eval_expr(&p, false)),
+            filter.eval_expr(&pred, false),
             *expect_and,
             "AND({inputs:?})"
         );
 
         let expr = Expr::or_from(inputs.clone());
-        let pred = as_data_skipping_predicate(&expr, false);
+        let pred = as_data_skipping_predicate(&expr, false).unwrap();
         expect_eq!(
-            pred.and_then(|p| filter.eval_expr(&p, false)),
+            filter.eval_expr(&pred, false),
             *expect_or,
             "OR({inputs:?})"
         );
 
         let expr = Expr::and_from(inputs.clone());
-        let pred = as_data_skipping_predicate(&expr, true);
+        let pred = as_data_skipping_predicate(&expr, true).unwrap();
         expect_eq!(
-            pred.and_then(|p| filter.eval_expr(&p, false)),
-            *expect_not_and,
+            filter.eval_expr(&pred, false),
+            expect_and.map(|val| !val),
             "NOT AND({inputs:?})"
         );
 
         let expr = Expr::or_from(inputs.clone());
-        let pred = as_data_skipping_predicate(&expr, true);
+        let pred = as_data_skipping_predicate(&expr, true).unwrap();
         expect_eq!(
-            pred.and_then(|p| filter.eval_expr(&p, false)),
-            *expect_not_or,
+            filter.eval_expr(&pred, false),
+            expect_or.map(|val| !val),
             "NOT OR({inputs:?})"
         );
     }