Skip to content

Commit

Permalink
Move strings/numeric convert benchmarks to nvbench
Browse files Browse the repository at this point in the history
  • Loading branch information
davidwendt committed Nov 6, 2024
1 parent ac5b3ed commit 3e8338a
Show file tree
Hide file tree
Showing 3 changed files with 79 additions and 174 deletions.
4 changes: 2 additions & 2 deletions cpp/benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -358,8 +358,6 @@ ConfigureBench(
STRINGS_BENCH
string/convert_datetime.cpp
string/convert_durations.cpp
string/convert_fixed_point.cpp
string/convert_numerics.cpp
string/copy.cu
string/factory.cu
string/filter.cpp
Expand All @@ -375,6 +373,8 @@ ConfigureNVBench(
string/char_types.cpp
string/combine.cpp
string/contains.cpp
string/convert_fixed_point.cpp
string/convert_numerics.cpp
string/copy_if_else.cpp
string/copy_range.cpp
string/count.cpp
Expand Down
111 changes: 33 additions & 78 deletions cpp/benchmarks/string/convert_fixed_point.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,93 +16,48 @@

#include <benchmarks/common/generate_input.hpp>
#include <benchmarks/fixture/benchmark_fixture.hpp>
#include <benchmarks/synchronization/synchronization.hpp>

#include <cudf/strings/convert/convert_fixed_point.hpp>
#include <cudf/strings/convert/convert_floats.hpp>
#include <cudf/types.hpp>

namespace {
#include <nvbench/nvbench.cuh>

std::unique_ptr<cudf::column> get_strings_column(cudf::size_type rows)
{
auto result =
create_random_column(cudf::type_id::FLOAT32, row_count{static_cast<cudf::size_type>(rows)});
return cudf::strings::from_floats(result->view());
}

} // anonymous namespace

class StringsToFixedPoint : public cudf::benchmark {};

template <typename fixed_point_type>
void convert_to_fixed_point(benchmark::State& state)
{
auto const rows = static_cast<cudf::size_type>(state.range(0));
auto const strings_col = get_strings_column(rows);
auto const strings_view = cudf::strings_column_view(strings_col->view());
auto const dtype = cudf::data_type{cudf::type_to_id<fixed_point_type>(), numeric::scale_type{-2}};

for (auto _ : state) {
cuda_event_timer raii(state, true);
auto volatile results = cudf::strings::to_fixed_point(strings_view, dtype);
}
using Types = nvbench::type_list<numeric::decimal32, numeric::decimal64>;

// bytes_processed = bytes_input + bytes_output
state.SetBytesProcessed(
state.iterations() *
(strings_view.chars_size(cudf::get_default_stream()) + rows * cudf::size_of(dtype)));
}

class StringsFromFixedPoint : public cudf::benchmark {};
NVBENCH_DECLARE_TYPE_STRINGS(numeric::decimal32, "decimal32", "decimal32");
NVBENCH_DECLARE_TYPE_STRINGS(numeric::decimal64, "decimal64", "decimal64");

template <typename fixed_point_type>
void convert_from_fixed_point(benchmark::State& state)
template <typename DataType>
void bench_convert_fixed_point(nvbench::state& state, nvbench::type_list<DataType>)
{
auto const rows = static_cast<cudf::size_type>(state.range(0));
auto const strings_col = get_strings_column(rows);
auto const dtype = cudf::data_type{cudf::type_to_id<fixed_point_type>(), numeric::scale_type{-2}};
auto const fp_col =
cudf::strings::to_fixed_point(cudf::strings_column_view(strings_col->view()), dtype);

std::unique_ptr<cudf::column> results = nullptr;

for (auto _ : state) {
cuda_event_timer raii(state, true);
results = cudf::strings::from_fixed_point(fp_col->view());
auto const num_rows = static_cast<cudf::size_type>(state.get_int64("num_rows"));
auto const from_num = state.get_string("dir") == "from";

auto const data_type = cudf::data_type{cudf::type_to_id<DataType>(), numeric::scale_type{-2}};
auto const fp_col = create_random_column(data_type.id(), row_count{num_rows});

auto const strings_col = cudf::strings::from_fixed_point(fp_col->view());
auto const sv = cudf::strings_column_view(strings_col->view());

auto stream = cudf::get_default_stream();
state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value()));

if (from_num) {
state.add_global_memory_reads<int8_t>(num_rows * cudf::size_of(data_type));
state.add_global_memory_writes<int8_t>(sv.chars_size(stream));
state.exec(nvbench::exec_tag::sync,
[&](nvbench::launch& launch) { cudf::strings::to_fixed_point(sv, data_type); });
} else {
state.add_global_memory_reads<int8_t>(sv.chars_size(stream));
state.add_global_memory_writes<int8_t>(num_rows * cudf::size_of(data_type));
state.exec(nvbench::exec_tag::sync,
[&](nvbench::launch& launch) { cudf::strings::from_fixed_point(fp_col->view()); });
}

// bytes_processed = bytes_input + bytes_output
state.SetBytesProcessed(
state.iterations() *
(cudf::strings_column_view(results->view()).chars_size(cudf::get_default_stream()) +
rows * cudf::size_of(dtype)));
}

#define CONVERT_TO_FIXED_POINT_BMD(name, fixed_point_type) \
BENCHMARK_DEFINE_F(StringsToFixedPoint, name)(::benchmark::State & state) \
{ \
convert_to_fixed_point<fixed_point_type>(state); \
} \
BENCHMARK_REGISTER_F(StringsToFixedPoint, name) \
->RangeMultiplier(4) \
->Range(1 << 12, 1 << 24) \
->UseManualTime() \
->Unit(benchmark::kMicrosecond);

#define CONVERT_FROM_FIXED_POINT_BMD(name, fixed_point_type) \
BENCHMARK_DEFINE_F(StringsFromFixedPoint, name)(::benchmark::State & state) \
{ \
convert_from_fixed_point<fixed_point_type>(state); \
} \
BENCHMARK_REGISTER_F(StringsFromFixedPoint, name) \
->RangeMultiplier(4) \
->Range(1 << 12, 1 << 24) \
->UseManualTime() \
->Unit(benchmark::kMicrosecond);

CONVERT_TO_FIXED_POINT_BMD(strings_to_decimal32, numeric::decimal32);
CONVERT_TO_FIXED_POINT_BMD(strings_to_decimal64, numeric::decimal64);

CONVERT_FROM_FIXED_POINT_BMD(strings_from_decimal32, numeric::decimal32);
CONVERT_FROM_FIXED_POINT_BMD(strings_from_decimal64, numeric::decimal64);
NVBENCH_BENCH_TYPES(bench_convert_fixed_point, NVBENCH_TYPE_AXES(Types))
.set_name("fixed_point")
.set_type_axes_names({"DataType"})
.add_string_axis("dir", {"to", "from"})
.add_int64_axis("num_rows", {1 << 16, 1 << 18, 1 << 20, 1 << 22});
138 changes: 44 additions & 94 deletions cpp/benchmarks/string/convert_numerics.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,117 +16,67 @@

#include <benchmarks/common/generate_input.hpp>
#include <benchmarks/fixture/benchmark_fixture.hpp>
#include <benchmarks/synchronization/synchronization.hpp>

#include <cudf/strings/convert/convert_floats.hpp>
#include <cudf/strings/convert/convert_integers.hpp>
#include <cudf/types.hpp>

namespace {
#include <nvbench/nvbench.cuh>

template <typename NumericType>
std::unique_ptr<cudf::column> get_numerics_column(cudf::size_type rows)
{
return create_random_column(cudf::type_to_id<NumericType>(), row_count{rows});
}
namespace {

template <typename NumericType>
std::unique_ptr<cudf::column> get_strings_column(cudf::size_type rows)
std::unique_ptr<cudf::column> get_strings_column(cudf::column_view const& nv)
{
auto const numerics_col = get_numerics_column<NumericType>(rows);
if constexpr (std::is_floating_point_v<NumericType>) {
return cudf::strings::from_floats(numerics_col->view());
return cudf::strings::from_floats(nv);
} else {
return cudf::strings::from_integers(numerics_col->view());
}
}
} // anonymous namespace

class StringsToNumeric : public cudf::benchmark {};

template <typename NumericType>
void convert_to_number(benchmark::State& state)
{
auto const rows = static_cast<cudf::size_type>(state.range(0));

auto const strings_col = get_strings_column<NumericType>(rows);
auto const strings_view = cudf::strings_column_view(strings_col->view());
auto const col_type = cudf::type_to_id<NumericType>();

for (auto _ : state) {
cuda_event_timer raii(state, true);
if constexpr (std::is_floating_point_v<NumericType>) {
cudf::strings::to_floats(strings_view, cudf::data_type{col_type});
} else {
cudf::strings::to_integers(strings_view, cudf::data_type{col_type});
}
return cudf::strings::from_integers(nv);
}

// bytes_processed = bytes_input + bytes_output
state.SetBytesProcessed(
state.iterations() *
(strings_view.chars_size(cudf::get_default_stream()) + rows * sizeof(NumericType)));
}
} // namespace

class StringsFromNumeric : public cudf::benchmark {};
using Types = nvbench::type_list<float, double, int32_t, int64_t, uint8_t, uint16_t>;

template <typename NumericType>
void convert_from_number(benchmark::State& state)
void bench_convert_number(nvbench::state& state, nvbench::type_list<NumericType>)
{
auto const rows = static_cast<cudf::size_type>(state.range(0));

auto const numerics_col = get_numerics_column<NumericType>(rows);
auto const numerics_view = numerics_col->view();

std::unique_ptr<cudf::column> results = nullptr;

for (auto _ : state) {
cuda_event_timer raii(state, true);
if constexpr (std::is_floating_point_v<NumericType>)
results = cudf::strings::from_floats(numerics_view);
else
results = cudf::strings::from_integers(numerics_view);
auto const num_rows = static_cast<cudf::size_type>(state.get_int64("num_rows"));
auto const from_num = state.get_string("dir") == "from";

auto const data_type = cudf::data_type(cudf::type_to_id<NumericType>());
auto const num_col = create_random_column(data_type.id(), row_count{num_rows});

auto const strings_col = get_strings_column<NumericType>(num_col->view());
auto const sv = cudf::strings_column_view(strings_col->view());

auto stream = cudf::get_default_stream();
state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value()));

if (from_num) {
state.add_global_memory_reads<NumericType>(num_rows);
state.add_global_memory_writes<int8_t>(sv.chars_size(stream));
state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
if constexpr (std::is_floating_point_v<NumericType>) {
cudf::strings::to_floats(sv, data_type);
} else {
cudf::strings::to_integers(sv, data_type);
}
});
} else {
state.add_global_memory_reads<int8_t>(sv.chars_size(stream));
state.add_global_memory_writes<NumericType>(num_rows);
state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
if constexpr (std::is_floating_point_v<NumericType>)
cudf::strings::from_floats(num_col->view());
else
cudf::strings::from_integers(num_col->view());
});
}

// bytes_processed = bytes_input + bytes_output
state.SetBytesProcessed(
state.iterations() *
(cudf::strings_column_view(results->view()).chars_size(cudf::get_default_stream()) +
rows * sizeof(NumericType)));
}

#define CONVERT_TO_NUMERICS_BD(name, type) \
BENCHMARK_DEFINE_F(StringsToNumeric, name)(::benchmark::State & state) \
{ \
convert_to_number<type>(state); \
} \
BENCHMARK_REGISTER_F(StringsToNumeric, name) \
->RangeMultiplier(4) \
->Range(1 << 10, 1 << 17) \
->UseManualTime() \
->Unit(benchmark::kMicrosecond);

#define CONVERT_FROM_NUMERICS_BD(name, type) \
BENCHMARK_DEFINE_F(StringsFromNumeric, name)(::benchmark::State & state) \
{ \
convert_from_number<type>(state); \
} \
BENCHMARK_REGISTER_F(StringsFromNumeric, name) \
->RangeMultiplier(4) \
->Range(1 << 10, 1 << 17) \
->UseManualTime() \
->Unit(benchmark::kMicrosecond);

CONVERT_TO_NUMERICS_BD(strings_to_float32, float);
CONVERT_TO_NUMERICS_BD(strings_to_float64, double);
CONVERT_TO_NUMERICS_BD(strings_to_int32, int32_t);
CONVERT_TO_NUMERICS_BD(strings_to_int64, int64_t);
CONVERT_TO_NUMERICS_BD(strings_to_uint8, uint8_t);
CONVERT_TO_NUMERICS_BD(strings_to_uint16, uint16_t);

CONVERT_FROM_NUMERICS_BD(strings_from_float32, float);
CONVERT_FROM_NUMERICS_BD(strings_from_float64, double);
CONVERT_FROM_NUMERICS_BD(strings_from_int32, int32_t);
CONVERT_FROM_NUMERICS_BD(strings_from_int64, int64_t);
CONVERT_FROM_NUMERICS_BD(strings_from_uint8, uint8_t);
CONVERT_FROM_NUMERICS_BD(strings_from_uint16, uint16_t);
NVBENCH_BENCH_TYPES(bench_convert_number, NVBENCH_TYPE_AXES(Types))
.set_name("numeric")
.set_type_axes_names({"NumericType"})
.add_string_axis("dir", {"to", "from"})
.add_int64_axis("num_rows", {1 << 16, 1 << 18, 1 << 20, 1 << 22});

0 comments on commit 3e8338a

Please sign in to comment.