Skip to content

Commit

Permalink
Remove java ColumnView.copyWithBooleanColumnAsValidity (#16660)
Browse files Browse the repository at this point in the history
This depends on NVIDIA/spark-rapids#11399

Essentially ifElse is faster than this API and this API is not safe to use generically.

NVIDIA/spark-rapids#11397 (comment)

So I am removing it after replacing all calls to it with calls to `ifElse/cudf::copy_if_else`

Authors:
  - Robert (Bobby) Evans (https://github.com/revans2)

Approvers:
  - Alessandro Bellina (https://github.com/abellina)
  - Mike Wilson (https://github.com/hyperbolic2346)

URL: #16660
  • Loading branch information
revans2 authored Sep 1, 2024
1 parent 4ad4b23 commit 7605958
Show file tree
Hide file tree
Showing 5 changed files with 1 addition and 141 deletions.
38 changes: 0 additions & 38 deletions java/src/main/java/ai/rapids/cudf/ColumnView.java
Original file line number Diff line number Diff line change
Expand Up @@ -913,25 +913,6 @@ public final ColumnVector mergeAndSetValidity(BinaryOp mergeOp, ColumnView... co
return new ColumnVector(bitwiseMergeAndSetValidity(getNativeView(), columnViews, mergeOp.nativeId));
}

/**
* Creates a deep copy of a column while replacing the validity mask. The validity mask is the
* device_vector equivalent of the boolean column given as argument.
*
* The boolColumn must have the same number of rows as the current column.
* The result column will have the same number of rows as the current column.
* For all indices `i` where the boolColumn is `true`, the result column will have a valid value at index i.
* For all other values (i.e. `false` or `null`), the result column will have nulls.
*
* If the current column has a null at a given index `i`, and the new validity mask is `true` at index `i`,
* then the row value is undefined.
*
* @param boolColumn bool column whose value is to be used as the validity mask.
* @return Deep copy of the column with replaced validity mask.
*/
public final ColumnVector copyWithBooleanColumnAsValidity(ColumnView boolColumn) {
return new ColumnVector(copyWithBooleanColumnAsValidity(getNativeView(), boolColumn.getNativeView()));
}

/////////////////////////////////////////////////////////////////////////////
// DATE/TIME
/////////////////////////////////////////////////////////////////////////////
Expand Down Expand Up @@ -4767,25 +4748,6 @@ private static native long clamper(long nativeView, long loScalarHandle, long lo
private static native long bitwiseMergeAndSetValidity(long baseHandle, long[] viewHandles,
int nullConfig) throws CudfException;

/**
* Native method to deep copy a column while replacing the null mask. The null mask is the
* device_vector equivalent of the boolean column given as argument.
*
* The boolColumn must have the same number of rows as the exemplar column.
* The result column will have the same number of rows as the exemplar.
* For all indices `i` where the boolean column is `true`, the result column will have a valid value at index i.
* For all other values (i.e. `false` or `null`), the result column will have nulls.
*
* If the exemplar column has a null at a given index `i`, and the new validity mask is `true` at index `i`,
* then the resultant row value is undefined.
*
* @param exemplarViewHandle column view of the column that is deep copied.
* @param boolColumnViewHandle bool column whose value is to be used as the null mask.
* @return Deep copy of the column with replaced null mask.
*/
private static native long copyWithBooleanColumnAsValidity(long exemplarViewHandle,
long boolColumnViewHandle) throws CudfException;

////////
// Native cudf::column_view life cycle and metadata access methods. Life cycle methods
// should typically only be called from the OffHeap inner class.
Expand Down
15 changes: 0 additions & 15 deletions java/src/main/native/src/ColumnViewJni.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2090,21 +2090,6 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_bitwiseMergeAndSetValidit
CATCH_STD(env, 0);
}

JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_copyWithBooleanColumnAsValidity(
JNIEnv* env, jobject j_object, jlong exemplar_handle, jlong validity_column_handle)
{
JNI_NULL_CHECK(env, exemplar_handle, "ColumnView handle is null", 0);
JNI_NULL_CHECK(env, validity_column_handle, "Validity column handle is null", 0);
try {
cudf::jni::auto_set_device(env);
auto const exemplar = *reinterpret_cast<cudf::column_view*>(exemplar_handle);
auto const validity = *reinterpret_cast<cudf::column_view*>(validity_column_handle);
return release_as_jlong(
cudf::jni::new_column_with_boolean_column_as_validity(exemplar, validity));
}
CATCH_STD(env, 0);
}

////////
// Native cudf::column_view life cycle and metadata access methods. Life cycle methods
// should typically only be called from the CudfColumn inner class.
Expand Down
31 changes: 0 additions & 31 deletions java/src/main/native/src/ColumnViewJni.cu
Original file line number Diff line number Diff line change
Expand Up @@ -43,37 +43,6 @@

namespace cudf::jni {

std::unique_ptr<cudf::column> new_column_with_boolean_column_as_validity(
cudf::column_view const& exemplar, cudf::column_view const& validity_column)
{
CUDF_EXPECTS(validity_column.type().id() == type_id::BOOL8,
"Validity column must be of type bool");
CUDF_EXPECTS(validity_column.size() == exemplar.size(),
"Exemplar and validity columns must have the same size");

auto validity_device_view = cudf::column_device_view::create(validity_column);
auto validity_begin = cudf::detail::make_optional_iterator<bool>(
*validity_device_view, cudf::nullate::DYNAMIC{validity_column.has_nulls()});
auto validity_end = validity_begin + validity_device_view->size();
auto [null_mask, null_count] = cudf::detail::valid_if(
validity_begin,
validity_end,
[] __device__(auto optional_bool) { return optional_bool.value_or(false); },
cudf::get_default_stream(),
rmm::mr::get_current_device_resource());
auto const exemplar_without_null_mask =
cudf::column_view{exemplar.type(),
exemplar.size(),
exemplar.head<void>(),
nullptr,
0,
exemplar.offset(),
std::vector<cudf::column_view>{exemplar.child_begin(), exemplar.child_end()}};
auto deep_copy = std::make_unique<cudf::column>(exemplar_without_null_mask);
deep_copy->set_null_mask(std::move(null_mask), null_count);
return deep_copy;
}

std::unique_ptr<cudf::column> generate_list_offsets(cudf::column_view const& list_length,
rmm::cuda_stream_view stream)
{
Expand Down
16 changes: 0 additions & 16 deletions java/src/main/native/src/ColumnViewJni.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,22 +22,6 @@

namespace cudf::jni {

/**
* @brief Creates a deep copy of the exemplar column, with its validity set to the equivalent
* of the boolean `validity` column's value.
*
* The bool_column must have the same number of rows as the exemplar column.
* The result column will have the same number of rows as the exemplar.
* For all indices `i` where the boolean column is `true`, the result column will have a valid value
* at index i. For all other values (i.e. `false` or `null`), the result column will have nulls.
*
* @param exemplar The column to be deep copied.
* @param bool_column bool column whose value is to be used as the validity.
* @return Deep copy of the exemplar, with the replaced validity.
*/
std::unique_ptr<cudf::column> new_column_with_boolean_column_as_validity(
cudf::column_view const& exemplar, cudf::column_view const& bool_column);

/**
* @brief Generates list offsets with lengths of each list.
*
Expand Down
42 changes: 1 addition & 41 deletions java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/*
*
* Copyright (c) 2019-2023, NVIDIA CORPORATION.
* Copyright (c) 2019-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -6395,46 +6395,6 @@ void testReplaceSameIndexColumnInStruct() {
assertTrue(e.getMessage().contains("Duplicate mapping found for replacing child index"));
}

@Test
void testCopyWithBooleanColumnAsValidity() {
final Boolean T = true;
final Boolean F = false;
final Integer X = null;

// Straight-line: Invalidate every other row.
try (ColumnVector exemplar = ColumnVector.fromBoxedInts(1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
ColumnVector validity = ColumnVector.fromBoxedBooleans(F, T, F, T, F, T, F, T, F, T);
ColumnVector expected = ColumnVector.fromBoxedInts(X, 2, X, 4, X, 6, X, 8, X, 10);
ColumnVector result = exemplar.copyWithBooleanColumnAsValidity(validity)) {
assertColumnsAreEqual(expected, result);
}

// Straight-line: Invalidate all Rows.
try (ColumnVector exemplar = ColumnVector.fromBoxedInts(1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
ColumnVector validity = ColumnVector.fromBoxedBooleans(F, F, F, F, F, F, F, F, F, F);
ColumnVector expected = ColumnVector.fromBoxedInts(X, X, X, X, X, X, X, X, X, X);
ColumnVector result = exemplar.copyWithBooleanColumnAsValidity(validity)) {
assertColumnsAreEqual(expected, result);
}

// Nulls in the validity column are treated as invalid.
try (ColumnVector exemplar = ColumnVector.fromBoxedInts(1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
ColumnVector validity = ColumnVector.fromBoxedBooleans(F, T, F, T, F, T, F, null, F, null);
ColumnVector expected = ColumnVector.fromBoxedInts(X, 2, X, 4, X, 6, X, X, X, X);
ColumnVector result = exemplar.copyWithBooleanColumnAsValidity(validity)) {
assertColumnsAreEqual(expected, result);
}

// Negative case: Mismatch in row count.
Exception x = assertThrows(CudfException.class, () -> {
try (ColumnVector exemplar = ColumnVector.fromBoxedInts(1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
ColumnVector validity = ColumnVector.fromBoxedBooleans(F, T, F, T);
ColumnVector result = exemplar.copyWithBooleanColumnAsValidity(validity)) {
}
});
assertTrue(x.getMessage().contains("Exemplar and validity columns must have the same size"));
}

@Test
void testSegmentedGather() {
HostColumnVector.DataType dt = new ListType(true, new BasicType(true, DType.STRING));
Expand Down

0 comments on commit 7605958

Please sign in to comment.