Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -141,3 +141,4 @@ add_benchmark(vector_bool_copy src/vector_bool_copy.cpp)
add_benchmark(vector_bool_copy_n src/vector_bool_copy_n.cpp)
add_benchmark(vector_bool_count src/vector_bool_count.cpp)
add_benchmark(vector_bool_move src/vector_bool_move.cpp)
add_benchmark(vector_bool_transform src/vector_bool_transform.cpp)
6 changes: 3 additions & 3 deletions benchmarks/inc/utility.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,10 @@
#include <type_traits>
#include <vector>

template <class Contained, template <class> class Alloc = std::allocator>
std::vector<Contained, Alloc<Contained>> random_vector(size_t n) {
template <class Contained, template <class> class Alloc = std::allocator, class... Seed>
std::vector<Contained, Alloc<Contained>> random_vector(size_t n, Seed... seed) {
std::vector<Contained, Alloc<Contained>> res(n);
std::mt19937_64 prng;
std::mt19937_64 prng{seed...};

if constexpr (std::is_same_v<Contained, bool>) {
std::generate(res.begin(), res.end(), [&prng] { return static_cast<bool>(prng() & 1); });
Expand Down
52 changes: 52 additions & 0 deletions benchmarks/src/vector_bool_transform.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
// Copyright (c) Microsoft Corporation.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

#include <benchmark/benchmark.h>
//
#include <algorithm>
#include <cstddef>
#include <random>
#include <vector>

#include "skewed_allocator.hpp"
#include "utility.hpp"

using namespace std;

template <class Pred>
void transform_one_input_aligned(benchmark::State& state) {
const auto size = static_cast<size_t>(state.range(0));
auto source = random_vector<bool, not_highly_aligned_allocator>(size);
vector<bool> dest(size, false);

for (auto _ : state) {
benchmark::DoNotOptimize(source);
transform(source.begin(), source.end(), dest.begin(), Pred{});
benchmark::DoNotOptimize(dest);
}
}

template <class Pred>
void transform_two_inputs_aligned(benchmark::State& state) {
const auto size = static_cast<size_t>(state.range(0));
auto source1 = random_vector<bool, not_highly_aligned_allocator>(size);
auto source2 = random_vector<bool, not_highly_aligned_allocator>(size, 1729u);
vector<bool> dest(size, false);

for (auto _ : state) {
benchmark::DoNotOptimize(source1);
benchmark::DoNotOptimize(source2);
transform(source1.begin(), source1.end(), source2.begin(), dest.begin(), Pred{});
benchmark::DoNotOptimize(dest);
}
}

void common_args(auto bm) {
bm->RangeMultiplier(64)->Range(64, 64 << 10);
}

BENCHMARK(transform_two_inputs_aligned<logical_and<>>)->Apply(common_args);
BENCHMARK(transform_two_inputs_aligned<logical_or<>>)->Apply(common_args);
BENCHMARK(transform_one_input_aligned<logical_not<>>)->Apply(common_args);

BENCHMARK_MAIN();
27 changes: 27 additions & 0 deletions stl/inc/algorithm
Original file line number Diff line number Diff line change
Expand Up @@ -4063,13 +4063,30 @@ _CONSTEXPR20 void iter_swap(_FwdIt1 _Left, _FwdIt2 _Right) { // swap *_Left and
swap(*_Left, *_Right); // intentional ADL
}

template <class _VbIt, class _OutIt, class _Mapped_fn>
_CONSTEXPR20 _OutIt _Transform_vbool_aligned(
const _VbIt _First, const _VbIt _Last, _OutIt _Dest, const _Mapped_fn _Mapped_func);

template <class _VbIt, class _OutIt, class _Mapped_fn>
_CONSTEXPR20 _OutIt _Transform_vbool_aligned(
const _VbIt _First1, const _VbIt _Last1, const _VbIt _First2, _OutIt _Dest, const _Mapped_fn _Mapped_func);

_EXPORT_STD template <class _InIt, class _OutIt, class _Fn>
_CONSTEXPR20 _OutIt transform(const _InIt _First, const _InIt _Last, _OutIt _Dest, _Fn _Func) {
// transform [_First, _Last) with _Func
_STD _Adl_verify_range(_First, _Last);
auto _UFirst = _STD _Get_unwrapped(_First);
const auto _ULast = _STD _Get_unwrapped(_Last);
auto _UDest = _STD _Get_unwrapped_n(_Dest, _STD _Idl_distance<_InIt>(_UFirst, _ULast));

if constexpr (_Is_vb_iterator<_InIt> && _Is_vb_iterator<_OutIt, true> && !is_same_v<_Map_vb_functor_t<_Fn>, void>) {
if (_UFirst._Myoff == 0 && _UDest._Myoff == 0) {
_UDest = _Transform_vbool_aligned(_UFirst, _ULast, _UDest, _Map_vb_functor_t<_Fn>{});
_STD _Seek_wrapped(_Dest, _UDest);
return _Dest;
}
}

for (; _UFirst != _ULast; ++_UFirst, (void) ++_UDest) {
*_UDest = _Func(*_UFirst);
}
Expand All @@ -4093,6 +4110,16 @@ _CONSTEXPR20 _OutIt transform(
const auto _Count = _STD _Idl_distance<_InIt1>(_UFirst1, _ULast1);
auto _UFirst2 = _STD _Get_unwrapped_n(_First2, _Count);
auto _UDest = _STD _Get_unwrapped_n(_Dest, _Count);

if constexpr (_Is_vb_iterator<_InIt1> && _Is_vb_iterator<_InIt2> && _Is_vb_iterator<_OutIt, true>
&& !is_same_v<_Map_vb_functor_t<_Fn>, void>) {
if (_UFirst1._Myoff == 0 && _UFirst2._Myoff == 0 && _UDest._Myoff == 0) {
_UDest = _Transform_vbool_aligned(_UFirst1, _ULast1, _UFirst2, _UDest, _Map_vb_functor_t<_Fn>{});
_STD _Seek_wrapped(_Dest, _UDest);
return _Dest;
}
}

for (; _UFirst1 != _ULast1; ++_UFirst1, (void) ++_UFirst2, ++_UDest) {
*_UDest = _Func(*_UFirst1, *_UFirst2);
}
Expand Down
50 changes: 50 additions & 0 deletions stl/inc/functional
Original file line number Diff line number Diff line change
Expand Up @@ -258,6 +258,56 @@ struct bit_not<void> {
using is_transparent = int;
};

struct _Bit_xnor {
template <class _Ty1, class _Ty2>
_NODISCARD constexpr auto operator()(_Ty1&& _Left, _Ty2&& _Right) const -> decltype(~(_Left ^ _Right)) {
return ~(_Left ^ _Right);
}
};

template <class _Ty>
constexpr bool _Is_vbool_functor_arg = is_same_v<_Ty, void> || is_integral_v<_Ty>;

template <class _Ty>
struct _Map_vb_functor<logical_and<_Ty>> {
using _Type = conditional_t<_Is_vbool_functor_arg<_Ty>, bit_and<>, void>;
};

template <class _Ty>
struct _Map_vb_functor<bit_and<_Ty>> {
using _Type = conditional_t<_Is_vbool_functor_arg<_Ty>, bit_and<>, void>;
};

template <class _Ty>
struct _Map_vb_functor<logical_or<_Ty>> {
using _Type = conditional_t<_Is_vbool_functor_arg<_Ty>, bit_or<>, void>;
};

template <class _Ty>
struct _Map_vb_functor<bit_or<_Ty>> {
using _Type = conditional_t<_Is_vbool_functor_arg<_Ty>, bit_or<>, void>;
};

template <class _Ty>
struct _Map_vb_functor<not_equal_to<_Ty>> {
using _Type = conditional_t<_Is_vbool_functor_arg<_Ty>, bit_xor<>, void>;
};

template <class _Ty>
struct _Map_vb_functor<bit_xor<_Ty>> {
using _Type = conditional_t<_Is_vbool_functor_arg<_Ty>, bit_xor<>, void>;
};

template <class _Ty>
struct _Map_vb_functor<equal_to<_Ty>> {
using _Type = conditional_t<_Is_vbool_functor_arg<_Ty>, _Bit_xnor, void>;
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Alternatively, we can map to _Map_vb_functor itself and have operator() right here to save one struct.

};

template <class _Ty>
struct _Map_vb_functor<logical_not<_Ty>> {
using _Type = conditional_t<_Is_vbool_functor_arg<_Ty>, bit_not<>, void>;
};

#if _HAS_DEPRECATED_NEGATORS
_STL_DISABLE_DEPRECATED_WARNING
_EXPORT_STD template <class _Fn>
Expand Down
42 changes: 42 additions & 0 deletions stl/inc/vector
Original file line number Diff line number Diff line change
Expand Up @@ -4004,6 +4004,48 @@ _CONSTEXPR20 _OutIt _Copy_vbool(_VbIt _First, _VbIt _Last, _OutIt _Dest) {
return _DestEnd;
}

template <class _VbIt, class _OutIt, class _Mapped_fn>
_CONSTEXPR20 _OutIt _Transform_vbool_aligned(
Copy link
Contributor Author

@AlexGuteniev AlexGuteniev Oct 15, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I moved this out to <vector> from <algorithms> because other algorithms are moved out.
However, I'm not sure if it is useful.

For accessing vector<bool> representation it is not strictly necessary. Most of things are template-dependent member functions and datas. The only exception is _Vbase, which can be still deduced from iterators.

For throughput it does not look useful either. <vector> is more frequent than <algorithm> so it appears more useful to off-load <vector> instead.

For reference, 0f24d45 is the commit where this movement was made.

const _VbIt _First, const _VbIt _Last, _OutIt _Dest, const _Mapped_fn _Mapped_func) {
auto _First_ptr = _First._Myptr;
const auto _Last_ptr = _Last._Myptr;
auto _Dest_ptr = const_cast<_Vbase*>(_Dest._Myptr);

for (; _First_ptr != _Last_ptr; ++_First_ptr, ++_Dest_ptr) {
*_Dest_ptr = _Mapped_func(*_First_ptr);
}

if (_Last._Myoff != 0) {
const _Vbase _Mask = (_Vbase{1} << _Last._Myoff) - 1;
*_Dest_ptr = (*_Dest_ptr & ~_Mask) | (_Mapped_func(*_First_ptr) & _Mask);
_Dest._Myoff = _Last._Myoff;
}

return _Dest;
}

template <class _VbIt, class _OutIt, class _Mapped_fn>
_CONSTEXPR20 _OutIt _Transform_vbool_aligned(
const _VbIt _First1, const _VbIt _Last1, const _VbIt _First2, _OutIt _Dest, const _Mapped_fn _Mapped_func) {
auto _First1_ptr = _First1._Myptr;
auto _First2_ptr = _First2._Myptr;
const auto _Last1_ptr = _Last1._Myptr;
auto _Dest_ptr = const_cast<_Vbase*>(_Dest._Myptr);

for (; _First1_ptr != _Last1_ptr; ++_First1_ptr, ++_First2_ptr, ++_Dest_ptr) {
*_Dest_ptr = _Mapped_func(*_First1_ptr, *_First2_ptr);
}

if (_Last1._Myoff != 0) {
const _Vbase _Mask = (_Vbase{1} << _Last1._Myoff) - 1;
*_Dest_ptr = (*_Dest_ptr & ~_Mask) | (_Mapped_func(*_First1_ptr, *_First2_ptr) & _Mask);
_Dest._Myoff = _Last1._Myoff;
}

_Dest._Myptr = _Dest_ptr;
return _Dest;
}

#undef _ASAN_VECTOR_MODIFY
#undef _ASAN_VECTOR_REMOVE
#undef _ASAN_VECTOR_CREATE
Expand Down
8 changes: 8 additions & 0 deletions stl/inc/xutility
Original file line number Diff line number Diff line change
Expand Up @@ -4889,6 +4889,14 @@ _OutCtgIt _Copy_memmove_n(_CtgIt _First, const size_t _Object_count, _OutCtgIt _
return _STD _Copy_memmove_tail(_First_ch, _STD move(_Dest), _Byte_count, _Object_count);
}

template <class _Fn>
struct _Map_vb_functor {
using _Type = void;
};

template <class _Fn>
using _Map_vb_functor_t = typename _Map_vb_functor<_Fn>::_Type;

template <class _It, bool _RequiresMutable = false>
constexpr bool _Is_vb_iterator = false;

Expand Down
96 changes: 96 additions & 0 deletions tests/std/tests/GH_000625_vector_bool_optimization/test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,100 @@ constexpr bool source_raw[] = { //
true, false, true, false, true, true, true, false, //
true, false, true, false, true, true, true, false};

CONSTEXPR20 void test_transform_helper(const size_t length) {
// Only no offset case

// This test data is not random, but irregular enough to ensure confidence in the tests
constexpr bool source2_raw[] = {//
true, true, false, false, false, false, true, true, //
true, true, false, false, false, false, true, true, //
true, true, false, false, false, false, true, true, //
true, true, false, false, false, false, true, true, //
true, true, false, false, false, false, true, true, //
true, true, false, false, false, false, true, true, //
true, true, false, false, false, false, true, true, //
true, true, false, false, false, false, true, true, //
true, true, false, false, false, false, true, true, //
true, true, false, false, false, false, true, true, //
true, true, false, false, false, false, true, true, //
true, true, false, false, false, false, true, true, //
true, true, false, false, false, false, true, true, //
true, true, false, false, false, false, true, true, //
true, true, false, false, false, false, true, true, //
true, true, false, false, false, false, true, true, //
true, true, false, false, false, false, true, true};

#if _HAS_CXX17
static_assert(size(source_raw) == size(source2_raw));
#endif // _HAS_CXX17

bool and_expected_raw[size(source_raw)];
bool or_expected_raw[size(source_raw)];
bool xor_expected_raw[size(source_raw)];
bool xnor_expected_raw[size(source_raw)];
bool not_expected_raw[size(source_raw)];

transform(begin(source_raw), end(source_raw), begin(source2_raw), begin(and_expected_raw), logical_and<>{});
transform(begin(source_raw), end(source_raw), begin(source2_raw), begin(or_expected_raw), logical_or<>{});
transform(begin(source_raw), end(source_raw), begin(source2_raw), begin(xor_expected_raw), not_equal_to<>{});
transform(begin(source_raw), end(source_raw), begin(source2_raw), begin(xnor_expected_raw), equal_to<>{});
transform(begin(source_raw), end(source_raw), begin(not_expected_raw), logical_not<>{});

const vector<bool> source1(source_raw, source_raw + length);
const vector<bool> source2(source2_raw, source2_raw + length);

vector<bool> and_expected(and_expected_raw, and_expected_raw + length);
vector<bool> or_expected(or_expected_raw, or_expected_raw + length);
vector<bool> xor_expected(xor_expected_raw, xor_expected_raw + length);
vector<bool> xnor_expected(xnor_expected_raw, xnor_expected_raw + length);
vector<bool> not_expected(not_expected_raw, not_expected_raw + length);

and_expected.resize(length + 3, false);
or_expected.resize(length + 3, false);
xor_expected.resize(length + 3, false);
xnor_expected.resize(length + 3, false);
not_expected.resize(length + 3, false);

vector<bool> and_actual(length + 3);
vector<bool> or_actual(length + 3);
vector<bool> xor_actual(length + 3);
vector<bool> xnor_actual(length + 3);
vector<bool> not_actual(length + 3);

transform(source1.begin(), source1.begin() + length, source2.begin(), and_actual.begin(), logical_and<>{});
assert(and_actual == and_expected);

transform(source1.begin(), source1.begin() + length, source2.begin(), or_actual.begin(), logical_or<>{});
assert(or_actual == or_expected);

transform(source1.begin(), source1.begin() + length, source2.begin(), xor_actual.begin(), not_equal_to<>{});
assert(xor_actual == xor_expected);

transform(source1.begin(), source1.begin() + length, source2.begin(), xnor_actual.begin(), equal_to<>{});
assert(xnor_actual == xnor_expected);

transform(source1.begin(), source1.begin() + length, not_actual.begin(), logical_not<>{});
assert(not_actual == not_expected);
}

CONSTEXPR20 bool test_transform() {
// Empty range
test_transform_helper(0);

// One block, ends within block
test_transform_helper(15);

// One block, ends at block boundary
test_transform_helper(blockSize);

// Multiple blocks, within block
test_transform_helper(3 * blockSize + 5);

// Multiple blocks, ends at block boundary
test_transform_helper(4 * blockSize);
return true;
}

CONSTEXPR20 void test_fill_helper(const size_t length) {
// No offset
{
Expand Down Expand Up @@ -1385,6 +1479,7 @@ static_assert(test_gh_5345<120, 31>());
static_assert(test_fill());
static_assert(test_find());
static_assert(test_count());
static_assert(test_transform());

#if defined(__clang__) || defined(__EDG__) // TRANSITION, VSO-2574489
static_assert(test_copy_part_1());
Expand All @@ -1396,6 +1491,7 @@ int main() {
test_fill();
test_find();
test_count();
test_transform();
test_copy_part_1();
test_copy_part_2();

Expand Down