diff --git a/cpp/src/arrow/dataset/file_parquet.cc b/cpp/src/arrow/dataset/file_parquet.cc index 051287492b78b..93eaa7a659d14 100644 --- a/cpp/src/arrow/dataset/file_parquet.cc +++ b/cpp/src/arrow/dataset/file_parquet.cc @@ -367,15 +367,13 @@ std::optional ParquetFileFragment::EvaluateStatisticsAsExpr auto field_expr = compute::field_ref(field_ref); bool may_have_null = !statistics.HasNullCount() || statistics.null_count() > 0; - bool has_null = statistics.HasNullCount() && statistics.null_count() > 0; // Optimize for corner case where all values are nulls if (statistics.num_values() == 0) { - if (has_null) { - return is_null(std::move(field_expr)); - } - // If there are no values and no nulls, it might be empty or contains - // only null. - return std::nullopt; + // If `statistics.HasNullCount()`, it means the all the values are nulls. + // + // If there are no values and no nulls, it might be empty or all values + // are nulls. In this case, we also return a null expression. + return is_null(std::move(field_expr)); } std::shared_ptr min, max; diff --git a/cpp/src/arrow/dataset/file_parquet_test.cc b/cpp/src/arrow/dataset/file_parquet_test.cc index 8e57d34cba136..2c05dcd9be459 100644 --- a/cpp/src/arrow/dataset/file_parquet_test.cc +++ b/cpp/src/arrow/dataset/file_parquet_test.cc @@ -865,12 +865,13 @@ TEST(TestParquetStatistics, NoNullCount) { auto stat_expression = ParquetFileFragment::EvaluateStatisticsAsExpression(*field, *stats); + ASSERT_TRUE(stat_expression.has_value()); EXPECT_EQ(stat_expression->ToString(), "(((x >= 1) and (x <= 100)) or is_null(x, {nan_is_null=false}))"); } { - // Special case: when num_value is 0, if has_null, it would return - // "is_null", otherwise it cannot gurantees anything + // Special case: when num_value is 0, it would return + // "is_null". ::parquet::EncodedStatistics encoded_stats; encoded_stats.has_null_count = true; encoded_stats.null_count = 1; @@ -878,13 +879,15 @@ TEST(TestParquetStatistics, NoNullCount) { auto stats = ::parquet::Statistics::Make(&descr, &encoded_stats, /*num_values=*/0); auto stat_expression = ParquetFileFragment::EvaluateStatisticsAsExpression(*field, *stats); + ASSERT_TRUE(stat_expression.has_value()); EXPECT_EQ(stat_expression->ToString(), "is_null(x, {nan_is_null=false})"); encoded_stats.has_null_count = false; encoded_stats.all_null_value = false; stats = ::parquet::Statistics::Make(&descr, &encoded_stats, /*num_values=*/0); stat_expression = ParquetFileFragment::EvaluateStatisticsAsExpression(*field, *stats); - EXPECT_FALSE(stat_expression.has_value()); + ASSERT_TRUE(stat_expression.has_value()); + EXPECT_EQ(stat_expression->ToString(), "is_null(x, {nan_is_null=false})"); } }