Skip to content

Commit

Permalink
Merge branch 'branch-24.12' into all_host_alloc_single_api_v3
Browse files Browse the repository at this point in the history
  • Loading branch information
revans2 committed Oct 30, 2024
2 parents ca35305 + 6c2eb4e commit 18df720
Show file tree
Hide file tree
Showing 16 changed files with 671 additions and 166 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/auto-assign.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,5 +13,5 @@ jobs:
steps:
- uses: actions-ecosystem/action-add-assignees@v1
with:
repo_token: "${{ secrets.GITHUB_TOKEN }}"
github_token: "${{ secrets.GITHUB_TOKEN }}"
assignees: ${{ github.actor }}
2 changes: 1 addition & 1 deletion conda/environments/all_cuda-118_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ dependencies:
- numpy>=1.23,<3.0a0
- numpydoc
- nvcc_linux-64=11.8
- nvcomp==4.0.1
- nvcomp==4.1.0.6
- nvtx>=0.2.1
- openpyxl
- packaging
Expand Down
2 changes: 1 addition & 1 deletion conda/environments/all_cuda-125_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ dependencies:
- numba-cuda>=0.0.13
- numpy>=1.23,<3.0a0
- numpydoc
- nvcomp==4.0.1
- nvcomp==4.1.0.6
- nvtx>=0.2.1
- openpyxl
- packaging
Expand Down
2 changes: 1 addition & 1 deletion conda/recipes/libcudf/conda_build_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ spdlog_version:
- ">=1.14.1,<1.15"

nvcomp_version:
- "=4.0.1"
- "=4.1.0.6"

zlib_version:
- ">=1.2.13"
Expand Down
26 changes: 13 additions & 13 deletions cpp/benchmarks/ast/transform.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,14 +52,14 @@ enum class TreeType {
template <typename key_type, TreeType tree_type, bool reuse_columns, bool Nullable>
static void BM_ast_transform(nvbench::state& state)
{
auto const table_size = static_cast<cudf::size_type>(state.get_int64("table_size"));
auto const num_rows = static_cast<cudf::size_type>(state.get_int64("num_rows"));
auto const tree_levels = static_cast<cudf::size_type>(state.get_int64("tree_levels"));

// Create table data
auto const n_cols = reuse_columns ? 1 : tree_levels + 1;
auto const source_table =
create_sequence_table(cycle_dtypes({cudf::type_to_id<key_type>()}, n_cols),
row_count{table_size},
row_count{num_rows},
Nullable ? std::optional<double>{0.5} : std::nullopt);
auto table = source_table->view();

Expand Down Expand Up @@ -99,8 +99,8 @@ static void BM_ast_transform(nvbench::state& state)
auto const& expression_tree_root = expressions.back();

// Use the number of bytes read from global memory
state.add_global_memory_reads<key_type>(static_cast<size_t>(table_size) * (tree_levels + 1));
state.add_global_memory_writes<key_type>(table_size);
state.add_global_memory_reads<key_type>(static_cast<size_t>(num_rows) * (tree_levels + 1));
state.add_global_memory_writes<key_type>(num_rows);

state.exec(nvbench::exec_tag::sync,
[&](nvbench::launch&) { cudf::compute_column(table, expression_tree_root); });
Expand All @@ -109,15 +109,15 @@ static void BM_ast_transform(nvbench::state& state)
template <cudf::ast::ast_operator cmp_op, cudf::ast::ast_operator reduce_op>
static void BM_string_compare_ast_transform(nvbench::state& state)
{
auto const string_width = static_cast<cudf::size_type>(state.get_int64("string_width"));
auto const num_rows = static_cast<cudf::size_type>(state.get_int64("num_rows"));
auto const num_comparisons = static_cast<cudf::size_type>(state.get_int64("num_comparisons"));
auto const hit_rate = static_cast<cudf::size_type>(state.get_int64("hit_rate"));
auto const string_width = static_cast<cudf::size_type>(state.get_int64("string_width"));
auto const num_rows = static_cast<cudf::size_type>(state.get_int64("num_rows"));
auto const tree_levels = static_cast<cudf::size_type>(state.get_int64("tree_levels"));
auto const hit_rate = static_cast<cudf::size_type>(state.get_int64("hit_rate"));

CUDF_EXPECTS(num_comparisons > 0, "benchmarks require 1 or more comparisons");
CUDF_EXPECTS(tree_levels > 0, "benchmarks require 1 or more comparisons");

// Create table data
auto const num_cols = num_comparisons * 2;
auto const num_cols = tree_levels * 2;
std::vector<std::unique_ptr<cudf::column>> columns;
std::for_each(
thrust::make_counting_iterator(0), thrust::make_counting_iterator(num_cols), [&](size_t) {
Expand Down Expand Up @@ -150,7 +150,7 @@ static void BM_string_compare_ast_transform(nvbench::state& state)
expressions.emplace_back(cudf::ast::operation(cmp_op, column_refs[0], column_refs[1]));

std::for_each(thrust::make_counting_iterator(1),
thrust::make_counting_iterator(num_comparisons),
thrust::make_counting_iterator(tree_levels),
[&](size_t idx) {
auto const& lhs = expressions.back();
auto const& rhs = expressions.emplace_back(
Expand All @@ -177,7 +177,7 @@ static void BM_string_compare_ast_transform(nvbench::state& state)
NVBENCH_BENCH(name) \
.set_name(#name) \
.add_int64_axis("tree_levels", {1, 5, 10}) \
.add_int64_axis("table_size", {100'000, 1'000'000, 10'000'000, 100'000'000})
.add_int64_axis("num_rows", {100'000, 1'000'000, 10'000'000, 100'000'000})

AST_TRANSFORM_BENCHMARK_DEFINE(
ast_int32_imbalanced_unique, int32_t, TreeType::IMBALANCED_LEFT, false, false);
Expand All @@ -202,7 +202,7 @@ AST_TRANSFORM_BENCHMARK_DEFINE(
.set_name(#name) \
.add_int64_axis("string_width", {32, 64, 128, 256}) \
.add_int64_axis("num_rows", {32768, 262144, 2097152}) \
.add_int64_axis("num_comparisons", {1, 2, 3, 4}) \
.add_int64_axis("tree_levels", {1, 2, 3, 4}) \
.add_int64_axis("hit_rate", {50, 100})

AST_STRING_COMPARE_TRANSFORM_BENCHMARK_DEFINE(ast_string_equal_logical_and,
Expand Down
26 changes: 13 additions & 13 deletions cpp/benchmarks/binaryop/binaryop.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,18 +40,18 @@ enum class TreeType {
template <typename key_type, TreeType tree_type, bool reuse_columns>
static void BM_binaryop_transform(nvbench::state& state)
{
auto const table_size{static_cast<cudf::size_type>(state.get_int64("table_size"))};
auto const num_rows{static_cast<cudf::size_type>(state.get_int64("num_rows"))};
auto const tree_levels{static_cast<cudf::size_type>(state.get_int64("tree_levels"))};

// Create table data
auto const n_cols = reuse_columns ? 1 : tree_levels + 1;
auto const source_table = create_sequence_table(
cycle_dtypes({cudf::type_to_id<key_type>()}, n_cols), row_count{table_size});
cycle_dtypes({cudf::type_to_id<key_type>()}, n_cols), row_count{num_rows});
cudf::table_view table{*source_table};

// Use the number of bytes read from global memory
state.add_global_memory_reads<key_type>(static_cast<size_t>(table_size) * (tree_levels + 1));
state.add_global_memory_writes<key_type>(table_size);
state.add_global_memory_reads<key_type>(static_cast<size_t>(num_rows) * (tree_levels + 1));
state.add_global_memory_writes<key_type>(num_rows);

state.exec(nvbench::exec_tag::sync, [&](nvbench::launch&) {
// Execute tree that chains additions like (((a + b) + c) + d)
Expand All @@ -74,15 +74,15 @@ static void BM_binaryop_transform(nvbench::state& state)
template <cudf::binary_operator cmp_op, cudf::binary_operator reduce_op>
static void BM_string_compare_binaryop_transform(nvbench::state& state)
{
auto const string_width = static_cast<cudf::size_type>(state.get_int64("string_width"));
auto const num_rows = static_cast<cudf::size_type>(state.get_int64("num_rows"));
auto const num_comparisons = static_cast<cudf::size_type>(state.get_int64("num_comparisons"));
auto const hit_rate = static_cast<cudf::size_type>(state.get_int64("hit_rate"));
auto const string_width = static_cast<cudf::size_type>(state.get_int64("string_width"));
auto const num_rows = static_cast<cudf::size_type>(state.get_int64("num_rows"));
auto const tree_levels = static_cast<cudf::size_type>(state.get_int64("tree_levels"));
auto const hit_rate = static_cast<cudf::size_type>(state.get_int64("hit_rate"));

CUDF_EXPECTS(num_comparisons > 0, "benchmarks require 1 or more comparisons");
CUDF_EXPECTS(tree_levels > 0, "benchmarks require 1 or more comparisons");

// Create table data
auto const num_cols = num_comparisons * 2;
auto const num_cols = tree_levels * 2;
std::vector<std::unique_ptr<cudf::column>> columns;
std::for_each(
thrust::make_counting_iterator(0), thrust::make_counting_iterator(num_cols), [&](size_t) {
Expand Down Expand Up @@ -113,7 +113,7 @@ static void BM_string_compare_binaryop_transform(nvbench::state& state)
cudf::binary_operation(table.get_column(0), table.get_column(1), cmp_op, bool_type, stream);
std::for_each(
thrust::make_counting_iterator(1),
thrust::make_counting_iterator(num_comparisons),
thrust::make_counting_iterator(tree_levels),
[&](size_t idx) {
std::unique_ptr<cudf::column> comparison = cudf::binary_operation(
table.get_column(idx * 2), table.get_column(idx * 2 + 1), cmp_op, bool_type, stream);
Expand All @@ -133,7 +133,7 @@ static void BM_string_compare_binaryop_transform(nvbench::state& state)
} \
NVBENCH_BENCH(name) \
.add_int64_axis("tree_levels", {1, 2, 5, 10}) \
.add_int64_axis("table_size", {100'000, 1'000'000, 10'000'000, 100'000'000})
.add_int64_axis("num_rows", {100'000, 1'000'000, 10'000'000, 100'000'000})

BINARYOP_TRANSFORM_BENCHMARK_DEFINE(binaryop_int32_imbalanced_unique,
int32_t,
Expand All @@ -158,7 +158,7 @@ BINARYOP_TRANSFORM_BENCHMARK_DEFINE(binaryop_double_imbalanced_unique,
.set_name(#name) \
.add_int64_axis("string_width", {32, 64, 128, 256}) \
.add_int64_axis("num_rows", {32768, 262144, 2097152}) \
.add_int64_axis("num_comparisons", {1, 2, 3, 4}) \
.add_int64_axis("tree_levels", {1, 2, 3, 4}) \
.add_int64_axis("hit_rate", {50, 100})

STRING_COMPARE_BINARYOP_TRANSFORM_BENCHMARK_DEFINE(string_compare_binaryop_transform,
Expand Down
12 changes: 6 additions & 6 deletions cpp/benchmarks/binaryop/compiled_binaryop.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,10 @@
template <typename TypeLhs, typename TypeRhs, typename TypeOut>
void BM_compiled_binaryop(nvbench::state& state, cudf::binary_operator binop)
{
auto const table_size = static_cast<cudf::size_type>(state.get_int64("table_size"));
auto const num_rows = static_cast<cudf::size_type>(state.get_int64("num_rows"));

auto const source_table = create_random_table(
{cudf::type_to_id<TypeLhs>(), cudf::type_to_id<TypeRhs>()}, row_count{table_size});
{cudf::type_to_id<TypeLhs>(), cudf::type_to_id<TypeRhs>()}, row_count{num_rows});

auto lhs = cudf::column_view(source_table->get_column(0));
auto rhs = cudf::column_view(source_table->get_column(1));
Expand All @@ -37,9 +37,9 @@ void BM_compiled_binaryop(nvbench::state& state, cudf::binary_operator binop)
cudf::binary_operation(lhs, rhs, binop, output_dtype);

// use number of bytes read and written to global memory
state.add_global_memory_reads<TypeLhs>(table_size);
state.add_global_memory_reads<TypeRhs>(table_size);
state.add_global_memory_writes<TypeOut>(table_size);
state.add_global_memory_reads<TypeLhs>(num_rows);
state.add_global_memory_reads<TypeRhs>(num_rows);
state.add_global_memory_writes<TypeOut>(num_rows);

state.exec(nvbench::exec_tag::sync,
[&](nvbench::launch&) { cudf::binary_operation(lhs, rhs, binop, output_dtype); });
Expand All @@ -55,7 +55,7 @@ void BM_compiled_binaryop(nvbench::state& state, cudf::binary_operator binop)
} \
NVBENCH_BENCH(name) \
.set_name("compiled_binary_op_" BM_STRINGIFY(name)) \
.add_int64_axis("table_size", {10'000, 100'000, 1'000'000, 10'000'000, 100'000'000})
.add_int64_axis("num_rows", {10'000, 100'000, 1'000'000, 10'000'000, 100'000'000})

#define build_name(a, b, c, d) a##_##b##_##c##_##d

Expand Down
11 changes: 11 additions & 0 deletions cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -1483,6 +1483,17 @@ struct, and therefore `cudf::struct_view` is the data type of a `cudf::column` o

`cudf::type_dispatcher` dispatches to the `struct_view` data type when invoked on a `STRUCT` column.

# Empty Columns

The libcudf columns support empty, typed content. These columns have no data and no validity mask.
Empty strings or lists columns may or may not contain a child offsets column.
It is undefined behavior (UB) to access the offsets child of an empty strings or lists column.
Nested columns like lists and structs may require other children columns to provide the
nested structure of the empty types.

Use `cudf::make_empty_column()` to create fixed-width and strings columns.
Use `cudf::empty_like()` to create an empty column from an existing `cudf::column_view`.

# cuIO: file reading and writing

cuIO is a component of libcudf that provides GPU-accelerated reading and writing of data file
Expand Down
Loading

0 comments on commit 18df720

Please sign in to comment.