-
Notifications
You must be signed in to change notification settings - Fork 1.6k
Optimize std::transform
for vector<bool>
#5769
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
// Copyright (c) Microsoft Corporation. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
|
||
#include <benchmark/benchmark.h> | ||
// | ||
#include <algorithm> | ||
#include <cstddef> | ||
#include <random> | ||
#include <vector> | ||
|
||
#include "skewed_allocator.hpp" | ||
#include "utility.hpp" | ||
|
||
using namespace std; | ||
|
||
template <class Pred> | ||
void transform_one_input_aligned(benchmark::State& state) { | ||
const auto size = static_cast<size_t>(state.range(0)); | ||
auto source = random_vector<bool, not_highly_aligned_allocator>(size); | ||
vector<bool> dest(size, false); | ||
|
||
for (auto _ : state) { | ||
benchmark::DoNotOptimize(source); | ||
transform(source.begin(), source.end(), dest.begin(), Pred{}); | ||
benchmark::DoNotOptimize(dest); | ||
} | ||
} | ||
|
||
template <class Pred> | ||
void transform_two_inputs_aligned(benchmark::State& state) { | ||
const auto size = static_cast<size_t>(state.range(0)); | ||
auto source1 = random_vector<bool, not_highly_aligned_allocator>(size); | ||
auto source2 = random_vector<bool, not_highly_aligned_allocator>(size, 1729u); | ||
vector<bool> dest(size, false); | ||
|
||
for (auto _ : state) { | ||
benchmark::DoNotOptimize(source1); | ||
benchmark::DoNotOptimize(source2); | ||
transform(source1.begin(), source1.end(), source2.begin(), dest.begin(), Pred{}); | ||
benchmark::DoNotOptimize(dest); | ||
} | ||
} | ||
|
||
void common_args(auto bm) { | ||
bm->RangeMultiplier(64)->Range(64, 64 << 10); | ||
} | ||
|
||
BENCHMARK(transform_two_inputs_aligned<logical_and<>>)->Apply(common_args); | ||
BENCHMARK(transform_two_inputs_aligned<logical_or<>>)->Apply(common_args); | ||
BENCHMARK(transform_one_input_aligned<logical_not<>>)->Apply(common_args); | ||
|
||
BENCHMARK_MAIN(); |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4004,6 +4004,48 @@ _CONSTEXPR20 _OutIt _Copy_vbool(_VbIt _First, _VbIt _Last, _OutIt _Dest) { | |
return _DestEnd; | ||
} | ||
|
||
template <class _VbIt, class _OutIt, class _Mapped_fn> | ||
_CONSTEXPR20 _OutIt _Transform_vbool_aligned( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I moved this out to For accessing For throughput it does not look useful either. For reference, 0f24d45 is the commit where this movement was made. |
||
const _VbIt _First, const _VbIt _Last, _OutIt _Dest, const _Mapped_fn _Mapped_func) { | ||
auto _First_ptr = _First._Myptr; | ||
const auto _Last_ptr = _Last._Myptr; | ||
auto _Dest_ptr = const_cast<_Vbase*>(_Dest._Myptr); | ||
|
||
for (; _First_ptr != _Last_ptr; ++_First_ptr, ++_Dest_ptr) { | ||
*_Dest_ptr = _Mapped_func(*_First_ptr); | ||
} | ||
|
||
if (_Last._Myoff != 0) { | ||
const _Vbase _Mask = (_Vbase{1} << _Last._Myoff) - 1; | ||
*_Dest_ptr = (*_Dest_ptr & ~_Mask) | (_Mapped_func(*_First_ptr) & _Mask); | ||
_Dest._Myoff = _Last._Myoff; | ||
} | ||
|
||
return _Dest; | ||
} | ||
|
||
template <class _VbIt, class _OutIt, class _Mapped_fn> | ||
_CONSTEXPR20 _OutIt _Transform_vbool_aligned( | ||
const _VbIt _First1, const _VbIt _Last1, const _VbIt _First2, _OutIt _Dest, const _Mapped_fn _Mapped_func) { | ||
auto _First1_ptr = _First1._Myptr; | ||
auto _First2_ptr = _First2._Myptr; | ||
const auto _Last1_ptr = _Last1._Myptr; | ||
auto _Dest_ptr = const_cast<_Vbase*>(_Dest._Myptr); | ||
|
||
for (; _First1_ptr != _Last1_ptr; ++_First1_ptr, ++_First2_ptr, ++_Dest_ptr) { | ||
*_Dest_ptr = _Mapped_func(*_First1_ptr, *_First2_ptr); | ||
} | ||
|
||
if (_Last1._Myoff != 0) { | ||
const _Vbase _Mask = (_Vbase{1} << _Last1._Myoff) - 1; | ||
*_Dest_ptr = (*_Dest_ptr & ~_Mask) | (_Mapped_func(*_First1_ptr, *_First2_ptr) & _Mask); | ||
_Dest._Myoff = _Last1._Myoff; | ||
} | ||
|
||
_Dest._Myptr = _Dest_ptr; | ||
return _Dest; | ||
} | ||
|
||
#undef _ASAN_VECTOR_MODIFY | ||
#undef _ASAN_VECTOR_REMOVE | ||
#undef _ASAN_VECTOR_CREATE | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Alternatively, we can map to
_Map_vb_functor
itself and haveoperator()
right here to save onestruct
.