Skip to content

Commit

Permalink
apply suggestions
Browse files Browse the repository at this point in the history
  • Loading branch information
mapleFU committed Sep 5, 2024
1 parent c92d31f commit 374b5f7
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 10 deletions.
12 changes: 5 additions & 7 deletions cpp/src/arrow/dataset/file_parquet.cc
Original file line number Diff line number Diff line change
Expand Up @@ -367,15 +367,13 @@ std::optional<compute::Expression> ParquetFileFragment::EvaluateStatisticsAsExpr
auto field_expr = compute::field_ref(field_ref);

bool may_have_null = !statistics.HasNullCount() || statistics.null_count() > 0;
bool has_null = statistics.HasNullCount() && statistics.null_count() > 0;
// Optimize for corner case where all values are nulls
if (statistics.num_values() == 0) {
if (has_null) {
return is_null(std::move(field_expr));
}
// If there are no values and no nulls, it might be empty or contains
// only null.
return std::nullopt;
// If `statistics.HasNullCount()`, it means the all the values are nulls.
//
// If there are no values and no nulls, it might be empty or all values
// are nulls. In this case, we also return a null expression.
return is_null(std::move(field_expr));
}

std::shared_ptr<Scalar> min, max;
Expand Down
9 changes: 6 additions & 3 deletions cpp/src/arrow/dataset/file_parquet_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -865,26 +865,29 @@ TEST(TestParquetStatistics, NoNullCount) {

auto stat_expression =
ParquetFileFragment::EvaluateStatisticsAsExpression(*field, *stats);
ASSERT_TRUE(stat_expression.has_value());
EXPECT_EQ(stat_expression->ToString(),
"(((x >= 1) and (x <= 100)) or is_null(x, {nan_is_null=false}))");
}
{
// Special case: when num_value is 0, if has_null, it would return
// "is_null", otherwise it cannot gurantees anything
// Special case: when num_value is 0, it would return
// "is_null".
::parquet::EncodedStatistics encoded_stats;
encoded_stats.has_null_count = true;
encoded_stats.null_count = 1;
encoded_stats.all_null_value = true;
auto stats = ::parquet::Statistics::Make(&descr, &encoded_stats, /*num_values=*/0);
auto stat_expression =
ParquetFileFragment::EvaluateStatisticsAsExpression(*field, *stats);
ASSERT_TRUE(stat_expression.has_value());
EXPECT_EQ(stat_expression->ToString(), "is_null(x, {nan_is_null=false})");

encoded_stats.has_null_count = false;
encoded_stats.all_null_value = false;
stats = ::parquet::Statistics::Make(&descr, &encoded_stats, /*num_values=*/0);
stat_expression = ParquetFileFragment::EvaluateStatisticsAsExpression(*field, *stats);
EXPECT_FALSE(stat_expression.has_value());
ASSERT_TRUE(stat_expression.has_value());
EXPECT_EQ(stat_expression->ToString(), "is_null(x, {nan_is_null=false})");
}
}

Expand Down

0 comments on commit 374b5f7

Please sign in to comment.