From 66517875bec9348ea31ea3120a5fcf8487875120 Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Mon, 6 Oct 2025 20:22:34 +0300 Subject: [PATCH 01/14] benchmark --- benchmarks/CMakeLists.txt | 1 + benchmarks/inc/utility.hpp | 6 +-- benchmarks/src/vector_bool_transform.cpp | 52 ++++++++++++++++++++++++ 3 files changed, 56 insertions(+), 3 deletions(-) create mode 100644 benchmarks/src/vector_bool_transform.cpp diff --git a/benchmarks/CMakeLists.txt b/benchmarks/CMakeLists.txt index 02b2bc91bbd..75feaf7f7c9 100644 --- a/benchmarks/CMakeLists.txt +++ b/benchmarks/CMakeLists.txt @@ -141,3 +141,4 @@ add_benchmark(vector_bool_copy src/vector_bool_copy.cpp) add_benchmark(vector_bool_copy_n src/vector_bool_copy_n.cpp) add_benchmark(vector_bool_count src/vector_bool_count.cpp) add_benchmark(vector_bool_move src/vector_bool_move.cpp) +add_benchmark(vector_bool_transform src/vector_bool_transform.cpp) diff --git a/benchmarks/inc/utility.hpp b/benchmarks/inc/utility.hpp index a8f9eda29a5..54db900fbe0 100644 --- a/benchmarks/inc/utility.hpp +++ b/benchmarks/inc/utility.hpp @@ -10,10 +10,10 @@ #include #include -template class Alloc = std::allocator> -std::vector> random_vector(size_t n) { +template class Alloc = std::allocator, class... Seed> +std::vector> random_vector(size_t n, Seed... seed) { std::vector> res(n); - std::mt19937_64 prng; + std::mt19937_64 prng{seed...}; if constexpr (std::is_same_v) { std::generate(res.begin(), res.end(), [&prng] { return static_cast(prng() & 1); }); diff --git a/benchmarks/src/vector_bool_transform.cpp b/benchmarks/src/vector_bool_transform.cpp new file mode 100644 index 00000000000..eac202f20bb --- /dev/null +++ b/benchmarks/src/vector_bool_transform.cpp @@ -0,0 +1,52 @@ +// Copyright (c) Microsoft Corporation. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include +// +#include +#include +#include +#include + +#include "skewed_allocator.hpp" +#include "utility.hpp" + +using namespace std; + +template +void transform_one_input_aligned(benchmark::State& state) { + const auto size = static_cast(state.range(0)); + auto source = random_vector(size); + vector dest(size, false); + + for (auto _ : state) { + benchmark::DoNotOptimize(source); + transform(source.begin(), source.end(), dest.begin(), Pred{}); + benchmark::DoNotOptimize(dest); + } +} + +template +void transform_two_inputs_aligned(benchmark::State& state) { + const auto size = static_cast(state.range(0)); + auto source1 = random_vector(size); + auto source2 = random_vector(size, 1729u); + vector dest(size, false); + + for (auto _ : state) { + benchmark::DoNotOptimize(source1); + benchmark::DoNotOptimize(source2); + transform(source1.begin(), source1.end(), source2.begin(), dest.begin(), Pred{}); + benchmark::DoNotOptimize(dest); + } +} + +void common_args(auto bm) { + bm->RangeMultiplier(64)->Range(64, 64 << 10); +} + +BENCHMARK(transform_two_inputs_aligned>)->Apply(common_args); +BENCHMARK(transform_two_inputs_aligned>)->Apply(common_args); +BENCHMARK(transform_one_input_aligned>)->Apply(common_args); + +BENCHMARK_MAIN(); From 8578a69c8a444f4f63c6892a346812860d917994 Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Tue, 7 Oct 2025 08:59:49 +0300 Subject: [PATCH 02/14] coverage --- .../test.cpp | 96 +++++++++++++++++++ 1 file changed, 96 insertions(+) diff --git a/tests/std/tests/GH_000625_vector_bool_optimization/test.cpp b/tests/std/tests/GH_000625_vector_bool_optimization/test.cpp index c75796a0714..f7683b976b0 100644 --- a/tests/std/tests/GH_000625_vector_bool_optimization/test.cpp +++ b/tests/std/tests/GH_000625_vector_bool_optimization/test.cpp @@ -45,6 +45,100 @@ constexpr bool source_raw[] = { // true, false, true, false, true, true, true, false, // true, false, true, false, true, true, true, false}; +CONSTEXPR20 void test_transform_helper(const size_t length) { + // Only no offset case + + // This test data is not random, but irregular enough to ensure confidence in the tests + constexpr bool source2_raw[] = {// + true, true, false, false, false, false, true, true, // + true, true, false, false, false, false, true, true, // + true, true, false, false, false, false, true, true, // + true, true, false, false, false, false, true, true, // + true, true, false, false, false, false, true, true, // + true, true, false, false, false, false, true, true, // + true, true, false, false, false, false, true, true, // + true, true, false, false, false, false, true, true, // + true, true, false, false, false, false, true, true, // + true, true, false, false, false, false, true, true, // + true, true, false, false, false, false, true, true, // + true, true, false, false, false, false, true, true, // + true, true, false, false, false, false, true, true, // + true, true, false, false, false, false, true, true, // + true, true, false, false, false, false, true, true, // + true, true, false, false, false, false, true, true, // + true, true, false, false, false, false, true, true}; + +#if _HAS_CXX17 + static_assert(size(source_raw) == size(source2_raw)); +#endif // _HAS_CXX17 + + bool and_expected_raw[size(source_raw)]; + bool or_expected_raw[size(source_raw)]; + bool xor_expected_raw[size(source_raw)]; + bool xnor_expected_raw[size(source_raw)]; + bool not_expected_raw[size(source_raw)]; + + transform(begin(source_raw), end(source_raw), begin(source2_raw), begin(and_expected_raw), logical_and<>{}); + transform(begin(source_raw), end(source_raw), begin(source2_raw), begin(or_expected_raw), logical_or<>{}); + transform(begin(source_raw), end(source_raw), begin(source2_raw), begin(xor_expected_raw), not_equal_to<>{}); + transform(begin(source_raw), end(source_raw), begin(source2_raw), begin(xnor_expected_raw), equal_to<>{}); + transform(begin(source_raw), end(source_raw), begin(not_expected_raw), logical_not<>{}); + + const vector source1(source_raw, source_raw + length); + const vector source2(source2_raw, source2_raw + length); + + vector and_expected(and_expected_raw, and_expected_raw + length); + vector or_expected(or_expected_raw, or_expected_raw + length); + vector xor_expected(xor_expected_raw, xor_expected_raw + length); + vector xnor_expected(xnor_expected_raw, xnor_expected_raw + length); + vector not_expected(not_expected_raw, not_expected_raw + length); + + and_expected.resize(length + 3, false); + or_expected.resize(length + 3, false); + xor_expected.resize(length + 3, false); + xnor_expected.resize(length + 3, false); + not_expected.resize(length + 3, false); + + vector and_actual(length + 3); + vector or_actual(length + 3); + vector xor_actual(length + 3); + vector xnor_actual(length + 3); + vector not_actual(length + 3); + + transform(source1.begin(), source1.begin() + length, source2.begin(), and_actual.begin(), logical_and<>{}); + assert(and_actual == and_expected); + + transform(source1.begin(), source1.begin() + length, source2.begin(), or_actual.begin(), logical_or<>{}); + assert(or_actual == or_expected); + + transform(source1.begin(), source1.begin() + length, source2.begin(), xor_actual.begin(), not_equal_to<>{}); + assert(xor_actual == xor_expected); + + transform(source1.begin(), source1.begin() + length, source2.begin(), xnor_actual.begin(), equal_to<>{}); + assert(xnor_actual == xnor_expected); + + transform(source1.begin(), source1.begin() + length, not_actual.begin(), logical_not<>{}); + assert(not_actual == not_expected); +} + +CONSTEXPR20 bool test_transform() { + // Empty range + test_transform_helper(0); + + // One block, ends within block + test_transform_helper(15); + + // One block, ends at block boundary + test_transform_helper(blockSize); + + // Multiple blocks, within block + test_transform_helper(3 * blockSize + 5); + + // Multiple blocks, ends at block boundary + test_transform_helper(4 * blockSize); + return true; +} + CONSTEXPR20 void test_fill_helper(const size_t length) { // No offset { @@ -1385,6 +1479,7 @@ static_assert(test_gh_5345<120, 31>()); static_assert(test_fill()); static_assert(test_find()); static_assert(test_count()); +static_assert(test_transform()); #if defined(__clang__) || defined(__EDG__) // TRANSITION, VSO-2574489 static_assert(test_copy_part_1()); @@ -1396,6 +1491,7 @@ int main() { test_fill(); test_find(); test_count(); + test_transform(); test_copy_part_1(); test_copy_part_2(); From dc9cb954daee6de84f664fb76becd10c9945d807 Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Tue, 7 Oct 2025 10:27:31 +0300 Subject: [PATCH 03/14] optimization --- stl/inc/algorithm | 41 +++++++++++++++++++++++++++++++++++++ stl/inc/functional | 50 ++++++++++++++++++++++++++++++++++++++++++++++ stl/inc/xutility | 8 ++++++++ 3 files changed, 99 insertions(+) diff --git a/stl/inc/algorithm b/stl/inc/algorithm index 85c0a14b7dd..bac5471064e 100644 --- a/stl/inc/algorithm +++ b/stl/inc/algorithm @@ -4070,6 +4070,25 @@ _CONSTEXPR20 _OutIt transform(const _InIt _First, const _InIt _Last, _OutIt _Des auto _UFirst = _STD _Get_unwrapped(_First); const auto _ULast = _STD _Get_unwrapped(_Last); auto _UDest = _STD _Get_unwrapped_n(_Dest, _STD _Idl_distance<_InIt>(_UFirst, _ULast)); + + if constexpr (_Is_vb_iterator<_InIt> && _Is_vb_iterator<_OutIt, true> && !is_same_v<_Map_vb_functor_t<_Fn>, void>) { + if (_UFirst._Myoff == 0 && _UDest._Myoff == 0) { + using _Vbase_ = decay_t; + + const _Map_vb_functor_t<_Fn> _Mapped_fn{}; + auto _First_ptr = _UFirst._Myptr; + const auto _Last_ptr = _ULast._Myptr; + auto _Dest_ptr = const_cast<_Vbase_*>(_UDest._Myptr); + + for (; _First_ptr != _Last_ptr; ++_First_ptr, ++_Dest_ptr) { + *_Dest_ptr = _Mapped_fn(*_First_ptr); + } + + _UFirst._Myptr = _First_ptr; + _UDest._Myptr = _Dest_ptr; + } + } + for (; _UFirst != _ULast; ++_UFirst, (void) ++_UDest) { *_UDest = _Func(*_UFirst); } @@ -4093,6 +4112,28 @@ _CONSTEXPR20 _OutIt transform( const auto _Count = _STD _Idl_distance<_InIt1>(_UFirst1, _ULast1); auto _UFirst2 = _STD _Get_unwrapped_n(_First2, _Count); auto _UDest = _STD _Get_unwrapped_n(_Dest, _Count); + + if constexpr (_Is_vb_iterator<_InIt1> && _Is_vb_iterator<_InIt2> && _Is_vb_iterator<_OutIt, true> + && !is_same_v<_Map_vb_functor_t<_Fn>, void>) { + if (_UFirst1._Myoff == 0 && _UFirst2._Myoff == 0 && _UDest._Myoff == 0) { + using _Vbase_ = decay_t; + + const _Map_vb_functor_t<_Fn> _Mapped_fn{}; + auto _First1_ptr = _UFirst1._Myptr; + auto _First2_ptr = _UFirst2._Myptr; + const auto _Last1_ptr = _ULast1._Myptr; + auto _Dest_ptr = const_cast<_Vbase_*>(_UDest._Myptr); + + for (; _First1_ptr != _Last1_ptr; ++_First1_ptr, ++_First2_ptr, ++_Dest_ptr) { + *_Dest_ptr = _Mapped_fn(*_First1_ptr, *_First2_ptr); + } + + _UFirst1._Myptr = _First1_ptr; + _UFirst2._Myptr = _First2_ptr; + _UDest._Myptr = _Dest_ptr; + } + } + for (; _UFirst1 != _ULast1; ++_UFirst1, (void) ++_UFirst2, ++_UDest) { *_UDest = _Func(*_UFirst1, *_UFirst2); } diff --git a/stl/inc/functional b/stl/inc/functional index 71cc7d7f3a0..f41378f15d1 100644 --- a/stl/inc/functional +++ b/stl/inc/functional @@ -258,6 +258,56 @@ struct bit_not { using is_transparent = int; }; +struct _Bit_xnor { + template + _NODISCARD constexpr auto operator()(_Ty1&& _Left, _Ty2&& _Right) const -> decltype(~(_Left ^ _Right)) { + return ~(_Left ^ _Right); + } +}; + +template +constexpr bool _Is_vbool_functor_arg = is_same_v<_Ty, void> || is_integral_v<_Ty>; + +template +struct _Map_vb_functor> { + using _Type = conditional_t<_Is_vbool_functor_arg<_Ty>, bit_and<>, void>; +}; + +template +struct _Map_vb_functor> { + using _Type = conditional_t<_Is_vbool_functor_arg<_Ty>, bit_and<>, void>; +}; + +template +struct _Map_vb_functor> { + using _Type = conditional_t<_Is_vbool_functor_arg<_Ty>, bit_or<>, void>; +}; + +template +struct _Map_vb_functor> { + using _Type = conditional_t<_Is_vbool_functor_arg<_Ty>, bit_or<>, void>; +}; + +template +struct _Map_vb_functor> { + using _Type = conditional_t<_Is_vbool_functor_arg<_Ty>, bit_xor<>, void>; +}; + +template +struct _Map_vb_functor> { + using _Type = conditional_t<_Is_vbool_functor_arg<_Ty>, bit_xor<>, void>; +}; + +template +struct _Map_vb_functor> { + using _Type = conditional_t<_Is_vbool_functor_arg<_Ty>, _Bit_xnor, void>; +}; + +template +struct _Map_vb_functor> { + using _Type = conditional_t<_Is_vbool_functor_arg<_Ty>, bit_not<>, void>; +}; + #if _HAS_DEPRECATED_NEGATORS _STL_DISABLE_DEPRECATED_WARNING _EXPORT_STD template diff --git a/stl/inc/xutility b/stl/inc/xutility index fe79367452b..ee761035aa2 100644 --- a/stl/inc/xutility +++ b/stl/inc/xutility @@ -4889,6 +4889,14 @@ _OutCtgIt _Copy_memmove_n(_CtgIt _First, const size_t _Object_count, _OutCtgIt _ return _STD _Copy_memmove_tail(_First_ch, _STD move(_Dest), _Byte_count, _Object_count); } +template +struct _Map_vb_functor { + using _Type = void; +}; + +template +using _Map_vb_functor_t = typename _Map_vb_functor<_Fn>::_Type; + template constexpr bool _Is_vb_iterator = false; From e09858c12565091916f78aa5531ecd37c411ac90 Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Tue, 7 Oct 2025 12:29:32 +0300 Subject: [PATCH 04/14] tails --- stl/inc/algorithm | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/stl/inc/algorithm b/stl/inc/algorithm index bac5471064e..ebb61c3203f 100644 --- a/stl/inc/algorithm +++ b/stl/inc/algorithm @@ -4084,8 +4084,15 @@ _CONSTEXPR20 _OutIt transform(const _InIt _First, const _InIt _Last, _OutIt _Des *_Dest_ptr = _Mapped_fn(*_First_ptr); } - _UFirst._Myptr = _First_ptr; - _UDest._Myptr = _Dest_ptr; + if (_ULast._Myoff != 0) { + const _Vbase_ _Mask = ((_Vbase_{1} << _ULast._Myoff) - 1); + *_Dest_ptr = (*_Dest_ptr & ~_Mask) | (_Mapped_fn(*_First_ptr) & _Mask); + _UDest._Myoff = _ULast._Myoff; + } + + _UDest._Myptr = _Dest_ptr; + _STD _Seek_wrapped(_Dest, _UDest); + return _Dest; } } @@ -4128,9 +4135,15 @@ _CONSTEXPR20 _OutIt transform( *_Dest_ptr = _Mapped_fn(*_First1_ptr, *_First2_ptr); } - _UFirst1._Myptr = _First1_ptr; - _UFirst2._Myptr = _First2_ptr; - _UDest._Myptr = _Dest_ptr; + if (_ULast1._Myoff != 0) { + const _Vbase_ _Mask = ((_Vbase_{1} << _ULast1._Myoff) - 1); + *_Dest_ptr = (*_Dest_ptr & ~_Mask) | (_Mapped_fn(*_First1_ptr, *_First2_ptr) & _Mask); + _UDest._Myoff = _ULast1._Myoff; + } + + _UDest._Myptr = _Dest_ptr; + _STD _Seek_wrapped(_Dest, _UDest); + return _Dest; } } From 461defb15482b8b1eb58aec59ae0d2a20b486ee7 Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Tue, 7 Oct 2025 14:05:21 +0300 Subject: [PATCH 05/14] -parens --- stl/inc/algorithm | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/stl/inc/algorithm b/stl/inc/algorithm index ebb61c3203f..ef6cd1bd2c7 100644 --- a/stl/inc/algorithm +++ b/stl/inc/algorithm @@ -4085,7 +4085,7 @@ _CONSTEXPR20 _OutIt transform(const _InIt _First, const _InIt _Last, _OutIt _Des } if (_ULast._Myoff != 0) { - const _Vbase_ _Mask = ((_Vbase_{1} << _ULast._Myoff) - 1); + const _Vbase_ _Mask = (_Vbase_{1} << _ULast._Myoff) - 1; *_Dest_ptr = (*_Dest_ptr & ~_Mask) | (_Mapped_fn(*_First_ptr) & _Mask); _UDest._Myoff = _ULast._Myoff; } @@ -4136,7 +4136,7 @@ _CONSTEXPR20 _OutIt transform( } if (_ULast1._Myoff != 0) { - const _Vbase_ _Mask = ((_Vbase_{1} << _ULast1._Myoff) - 1); + const _Vbase_ _Mask = (_Vbase_{1} << _ULast1._Myoff) - 1; *_Dest_ptr = (*_Dest_ptr & ~_Mask) | (_Mapped_fn(*_First1_ptr, *_First2_ptr) & _Mask); _UDest._Myoff = _ULast1._Myoff; } From 0f24d45309f2282fe296a95ee843bb084b6130d6 Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Fri, 10 Oct 2025 20:56:30 +0300 Subject: [PATCH 06/14] move out --- stl/inc/algorithm | 47 ++++++++++------------------------------------- stl/inc/vector | 42 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+), 37 deletions(-) diff --git a/stl/inc/algorithm b/stl/inc/algorithm index ef6cd1bd2c7..cf7322000d0 100644 --- a/stl/inc/algorithm +++ b/stl/inc/algorithm @@ -4063,6 +4063,14 @@ _CONSTEXPR20 void iter_swap(_FwdIt1 _Left, _FwdIt2 _Right) { // swap *_Left and swap(*_Left, *_Right); // intentional ADL } +template +_CONSTEXPR20 _OutIt _Transform_vbool_aligned( + const _VbIt _First, const _VbIt _Last, _OutIt _Dest, const _Mapped_fn _Mapped_func); + +template +_CONSTEXPR20 _OutIt _Transform_vbool_aligned( + const _VbIt _First1, const _VbIt _Last1, const _VbIt _First2, _OutIt _Dest, const _Mapped_fn _Mapped_func); + _EXPORT_STD template _CONSTEXPR20 _OutIt transform(const _InIt _First, const _InIt _Last, _OutIt _Dest, _Fn _Func) { // transform [_First, _Last) with _Func @@ -4073,24 +4081,7 @@ _CONSTEXPR20 _OutIt transform(const _InIt _First, const _InIt _Last, _OutIt _Des if constexpr (_Is_vb_iterator<_InIt> && _Is_vb_iterator<_OutIt, true> && !is_same_v<_Map_vb_functor_t<_Fn>, void>) { if (_UFirst._Myoff == 0 && _UDest._Myoff == 0) { - using _Vbase_ = decay_t; - - const _Map_vb_functor_t<_Fn> _Mapped_fn{}; - auto _First_ptr = _UFirst._Myptr; - const auto _Last_ptr = _ULast._Myptr; - auto _Dest_ptr = const_cast<_Vbase_*>(_UDest._Myptr); - - for (; _First_ptr != _Last_ptr; ++_First_ptr, ++_Dest_ptr) { - *_Dest_ptr = _Mapped_fn(*_First_ptr); - } - - if (_ULast._Myoff != 0) { - const _Vbase_ _Mask = (_Vbase_{1} << _ULast._Myoff) - 1; - *_Dest_ptr = (*_Dest_ptr & ~_Mask) | (_Mapped_fn(*_First_ptr) & _Mask); - _UDest._Myoff = _ULast._Myoff; - } - - _UDest._Myptr = _Dest_ptr; + _UDest = _Transform_vbool_aligned(_UFirst, _ULast, _UDest, _Map_vb_functor_t<_Fn>{}); _STD _Seek_wrapped(_Dest, _UDest); return _Dest; } @@ -4123,25 +4114,7 @@ _CONSTEXPR20 _OutIt transform( if constexpr (_Is_vb_iterator<_InIt1> && _Is_vb_iterator<_InIt2> && _Is_vb_iterator<_OutIt, true> && !is_same_v<_Map_vb_functor_t<_Fn>, void>) { if (_UFirst1._Myoff == 0 && _UFirst2._Myoff == 0 && _UDest._Myoff == 0) { - using _Vbase_ = decay_t; - - const _Map_vb_functor_t<_Fn> _Mapped_fn{}; - auto _First1_ptr = _UFirst1._Myptr; - auto _First2_ptr = _UFirst2._Myptr; - const auto _Last1_ptr = _ULast1._Myptr; - auto _Dest_ptr = const_cast<_Vbase_*>(_UDest._Myptr); - - for (; _First1_ptr != _Last1_ptr; ++_First1_ptr, ++_First2_ptr, ++_Dest_ptr) { - *_Dest_ptr = _Mapped_fn(*_First1_ptr, *_First2_ptr); - } - - if (_ULast1._Myoff != 0) { - const _Vbase_ _Mask = (_Vbase_{1} << _ULast1._Myoff) - 1; - *_Dest_ptr = (*_Dest_ptr & ~_Mask) | (_Mapped_fn(*_First1_ptr, *_First2_ptr) & _Mask); - _UDest._Myoff = _ULast1._Myoff; - } - - _UDest._Myptr = _Dest_ptr; + _UDest = _Transform_vbool_aligned(_UFirst1, _ULast1, _UFirst2, _UDest, _Map_vb_functor_t<_Fn>{}); _STD _Seek_wrapped(_Dest, _UDest); return _Dest; } diff --git a/stl/inc/vector b/stl/inc/vector index 6d4811a8d5e..3c19e621c49 100644 --- a/stl/inc/vector +++ b/stl/inc/vector @@ -4004,6 +4004,48 @@ _CONSTEXPR20 _OutIt _Copy_vbool(_VbIt _First, _VbIt _Last, _OutIt _Dest) { return _DestEnd; } +template +_CONSTEXPR20 _OutIt _Transform_vbool_aligned( + const _VbIt _First, const _VbIt _Last, _OutIt _Dest, const _Mapped_fn _Mapped_func) { + auto _First_ptr = _First._Myptr; + const auto _Last_ptr = _Last._Myptr; + auto _Dest_ptr = const_cast<_Vbase*>(_Dest._Myptr); + + for (; _First_ptr != _Last_ptr; ++_First_ptr, ++_Dest_ptr) { + *_Dest_ptr = _Mapped_func(*_First_ptr); + } + + if (_Last._Myoff != 0) { + const _Vbase _Mask = (_Vbase{1} << _Last._Myoff) - 1; + *_Dest_ptr = (*_Dest_ptr & ~_Mask) | (_Mapped_func(*_First_ptr) & _Mask); + _Dest._Myoff = _Last._Myoff; + } + + return _Dest; +} + +template +_CONSTEXPR20 _OutIt _Transform_vbool_aligned( + const _VbIt _First1, const _VbIt _Last1, const _VbIt _First2, _OutIt _Dest, const _Mapped_fn _Mapped_func) { + auto _First1_ptr = _First1._Myptr; + auto _First2_ptr = _First2._Myptr; + const auto _Last1_ptr = _Last1._Myptr; + auto _Dest_ptr = const_cast<_Vbase*>(_Dest._Myptr); + + for (; _First1_ptr != _Last1_ptr; ++_First1_ptr, ++_First2_ptr, ++_Dest_ptr) { + *_Dest_ptr = _Mapped_func(*_First1_ptr, *_First2_ptr); + } + + if (_Last1._Myoff != 0) { + const _Vbase _Mask = (_Vbase{1} << _Last1._Myoff) - 1; + *_Dest_ptr = (*_Dest_ptr & ~_Mask) | (_Mapped_func(*_First1_ptr, *_First2_ptr) & _Mask); + _Dest._Myoff = _Last1._Myoff; + } + + _Dest._Myptr = _Dest_ptr; + return _Dest; +} + #undef _ASAN_VECTOR_MODIFY #undef _ASAN_VECTOR_REMOVE #undef _ASAN_VECTOR_CREATE From fe63be7e47fec211d075acb7deeef1361e9867f1 Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Tue, 21 Oct 2025 06:00:31 -0700 Subject: [PATCH 07/14] Include `` for `logical_meow`. --- benchmarks/src/vector_bool_transform.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/benchmarks/src/vector_bool_transform.cpp b/benchmarks/src/vector_bool_transform.cpp index eac202f20bb..4f54882c18c 100644 --- a/benchmarks/src/vector_bool_transform.cpp +++ b/benchmarks/src/vector_bool_transform.cpp @@ -5,6 +5,7 @@ // #include #include +#include #include #include From b2739524b1c8a82ebc021050758629116381769f Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Tue, 21 Oct 2025 06:21:19 -0700 Subject: [PATCH 08/14] Use plain `type`. --- stl/inc/functional | 16 ++++++++-------- stl/inc/xutility | 4 ++-- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/stl/inc/functional b/stl/inc/functional index f9e19165178..ddfad5ed3a0 100644 --- a/stl/inc/functional +++ b/stl/inc/functional @@ -270,42 +270,42 @@ constexpr bool _Is_vbool_functor_arg = is_same_v<_Ty, void> || is_integral_v<_Ty template struct _Map_vb_functor> { - using _Type = conditional_t<_Is_vbool_functor_arg<_Ty>, bit_and<>, void>; + using type = conditional_t<_Is_vbool_functor_arg<_Ty>, bit_and<>, void>; }; template struct _Map_vb_functor> { - using _Type = conditional_t<_Is_vbool_functor_arg<_Ty>, bit_and<>, void>; + using type = conditional_t<_Is_vbool_functor_arg<_Ty>, bit_and<>, void>; }; template struct _Map_vb_functor> { - using _Type = conditional_t<_Is_vbool_functor_arg<_Ty>, bit_or<>, void>; + using type = conditional_t<_Is_vbool_functor_arg<_Ty>, bit_or<>, void>; }; template struct _Map_vb_functor> { - using _Type = conditional_t<_Is_vbool_functor_arg<_Ty>, bit_or<>, void>; + using type = conditional_t<_Is_vbool_functor_arg<_Ty>, bit_or<>, void>; }; template struct _Map_vb_functor> { - using _Type = conditional_t<_Is_vbool_functor_arg<_Ty>, bit_xor<>, void>; + using type = conditional_t<_Is_vbool_functor_arg<_Ty>, bit_xor<>, void>; }; template struct _Map_vb_functor> { - using _Type = conditional_t<_Is_vbool_functor_arg<_Ty>, bit_xor<>, void>; + using type = conditional_t<_Is_vbool_functor_arg<_Ty>, bit_xor<>, void>; }; template struct _Map_vb_functor> { - using _Type = conditional_t<_Is_vbool_functor_arg<_Ty>, _Bit_xnor, void>; + using type = conditional_t<_Is_vbool_functor_arg<_Ty>, _Bit_xnor, void>; }; template struct _Map_vb_functor> { - using _Type = conditional_t<_Is_vbool_functor_arg<_Ty>, bit_not<>, void>; + using type = conditional_t<_Is_vbool_functor_arg<_Ty>, bit_not<>, void>; }; #if _HAS_DEPRECATED_NEGATORS diff --git a/stl/inc/xutility b/stl/inc/xutility index 3c4994ffd85..d48a510f169 100644 --- a/stl/inc/xutility +++ b/stl/inc/xutility @@ -4884,11 +4884,11 @@ _OutCtgIt _Copy_memmove_n(_CtgIt _First, const size_t _Object_count, _OutCtgIt _ template struct _Map_vb_functor { - using _Type = void; + using type = void; }; template -using _Map_vb_functor_t = typename _Map_vb_functor<_Fn>::_Type; +using _Map_vb_functor_t = typename _Map_vb_functor<_Fn>::type; template constexpr bool _Is_vb_iterator = false; From 956aef83d133f48456fd74561b179973e8a15928 Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Tue, 21 Oct 2025 06:38:04 -0700 Subject: [PATCH 09/14] Make `_Bit_xnor` fully transparent. --- stl/inc/functional | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/stl/inc/functional b/stl/inc/functional index ddfad5ed3a0..2e74d4088e4 100644 --- a/stl/inc/functional +++ b/stl/inc/functional @@ -260,9 +260,12 @@ struct bit_not { struct _Bit_xnor { template - _NODISCARD constexpr auto operator()(_Ty1&& _Left, _Ty2&& _Right) const -> decltype(~(_Left ^ _Right)) { - return ~(_Left ^ _Right); + _NODISCARD constexpr auto operator()(_Ty1&& _Left, _Ty2&& _Right) const // + -> decltype(~(_STD forward<_Ty1>(_Left) ^ _STD forward<_Ty2>(_Right))) { + return ~(_STD forward<_Ty1>(_Left) ^ _STD forward<_Ty2>(_Right)); } + + using is_transparent = int; }; template From 057a04f0e57babcc37156208e61adee11b5224a5 Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Tue, 21 Oct 2025 06:43:09 -0700 Subject: [PATCH 10/14] Comment why `bit_not` isn't optimized. --- stl/inc/functional | 3 +++ 1 file changed, 3 insertions(+) diff --git a/stl/inc/functional b/stl/inc/functional index 2e74d4088e4..0ec049912b0 100644 --- a/stl/inc/functional +++ b/stl/inc/functional @@ -311,6 +311,9 @@ struct _Map_vb_functor> { using type = conditional_t<_Is_vbool_functor_arg<_Ty>, bit_not<>, void>; }; +// bit_not isn't mapped to itself because it emits MSVC warning C4804 "'~': unsafe use of type 'bool' in operation" +// and Clang -Wbool-operation "bitwise negation of a boolean expression; did you mean logical negation?". + #if _HAS_DEPRECATED_NEGATORS _STL_DISABLE_DEPRECATED_WARNING _EXPORT_STD template From 2ee901e8b8a7fd5b4f88f27a0eae43aa17675253 Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Tue, 21 Oct 2025 06:46:24 -0700 Subject: [PATCH 11/14] Drop const on value params in declarations. --- stl/inc/algorithm | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/stl/inc/algorithm b/stl/inc/algorithm index 1b18a2400ff..5c639f9ac2d 100644 --- a/stl/inc/algorithm +++ b/stl/inc/algorithm @@ -4064,12 +4064,11 @@ _CONSTEXPR20 void iter_swap(_FwdIt1 _Left, _FwdIt2 _Right) { // swap *_Left and } template -_CONSTEXPR20 _OutIt _Transform_vbool_aligned( - const _VbIt _First, const _VbIt _Last, _OutIt _Dest, const _Mapped_fn _Mapped_func); +_CONSTEXPR20 _OutIt _Transform_vbool_aligned(_VbIt _First, _VbIt _Last, _OutIt _Dest, _Mapped_fn _Mapped_func); template _CONSTEXPR20 _OutIt _Transform_vbool_aligned( - const _VbIt _First1, const _VbIt _Last1, const _VbIt _First2, _OutIt _Dest, const _Mapped_fn _Mapped_func); + _VbIt _First1, _VbIt _Last1, _VbIt _First2, _OutIt _Dest, _Mapped_fn _Mapped_func); _EXPORT_STD template _CONSTEXPR20 _OutIt transform(const _InIt _First, const _InIt _Last, _OutIt _Dest, _Fn _Func) { From c9749a6f4ff88c03a1b66d1c7333121e23475e97 Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Tue, 21 Oct 2025 07:06:54 -0700 Subject: [PATCH 12/14] Fix regression by handling varying constness. --- stl/inc/algorithm | 4 ++-- stl/inc/vector | 4 ++-- .../GH_000625_vector_bool_optimization/test.cpp | 12 +++++++++--- 3 files changed, 13 insertions(+), 7 deletions(-) diff --git a/stl/inc/algorithm b/stl/inc/algorithm index 5c639f9ac2d..d5832db3e16 100644 --- a/stl/inc/algorithm +++ b/stl/inc/algorithm @@ -4066,9 +4066,9 @@ _CONSTEXPR20 void iter_swap(_FwdIt1 _Left, _FwdIt2 _Right) { // swap *_Left and template _CONSTEXPR20 _OutIt _Transform_vbool_aligned(_VbIt _First, _VbIt _Last, _OutIt _Dest, _Mapped_fn _Mapped_func); -template +template _CONSTEXPR20 _OutIt _Transform_vbool_aligned( - _VbIt _First1, _VbIt _Last1, _VbIt _First2, _OutIt _Dest, _Mapped_fn _Mapped_func); + _VbIt1 _First1, _VbIt1 _Last1, _VbIt2 _First2, _OutIt _Dest, _Mapped_fn _Mapped_func); _EXPORT_STD template _CONSTEXPR20 _OutIt transform(const _InIt _First, const _InIt _Last, _OutIt _Dest, _Fn _Func) { diff --git a/stl/inc/vector b/stl/inc/vector index 3c19e621c49..44e7841a7ed 100644 --- a/stl/inc/vector +++ b/stl/inc/vector @@ -4024,9 +4024,9 @@ _CONSTEXPR20 _OutIt _Transform_vbool_aligned( return _Dest; } -template +template _CONSTEXPR20 _OutIt _Transform_vbool_aligned( - const _VbIt _First1, const _VbIt _Last1, const _VbIt _First2, _OutIt _Dest, const _Mapped_fn _Mapped_func) { + const _VbIt1 _First1, const _VbIt1 _Last1, const _VbIt2 _First2, _OutIt _Dest, const _Mapped_fn _Mapped_func) { auto _First1_ptr = _First1._Myptr; auto _First2_ptr = _First2._Myptr; const auto _Last1_ptr = _Last1._Myptr; diff --git a/tests/std/tests/GH_000625_vector_bool_optimization/test.cpp b/tests/std/tests/GH_000625_vector_bool_optimization/test.cpp index f7683b976b0..f5627947a7f 100644 --- a/tests/std/tests/GH_000625_vector_bool_optimization/test.cpp +++ b/tests/std/tests/GH_000625_vector_bool_optimization/test.cpp @@ -105,16 +105,22 @@ CONSTEXPR20 void test_transform_helper(const size_t length) { vector xnor_actual(length + 3); vector not_actual(length + 3); + // Also test combinations of vector::iterator and vector::const_iterator for the inputs. + + // iterator, iterator transform(source1.begin(), source1.begin() + length, source2.begin(), and_actual.begin(), logical_and<>{}); assert(and_actual == and_expected); - transform(source1.begin(), source1.begin() + length, source2.begin(), or_actual.begin(), logical_or<>{}); + // iterator, const_iterator + transform(source1.begin(), source1.begin() + length, source2.cbegin(), or_actual.begin(), logical_or<>{}); assert(or_actual == or_expected); - transform(source1.begin(), source1.begin() + length, source2.begin(), xor_actual.begin(), not_equal_to<>{}); + // const_iterator, iterator + transform(source1.cbegin(), source1.cbegin() + length, source2.begin(), xor_actual.begin(), not_equal_to<>{}); assert(xor_actual == xor_expected); - transform(source1.begin(), source1.begin() + length, source2.begin(), xnor_actual.begin(), equal_to<>{}); + // const_iterator, const_iterator + transform(source1.cbegin(), source1.cbegin() + length, source2.cbegin(), xnor_actual.begin(), equal_to<>{}); assert(xnor_actual == xnor_expected); transform(source1.begin(), source1.begin() + length, not_actual.begin(), logical_not<>{}); From 0993235e1d49bfafa61a1451b172cf925cd92ed6 Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Tue, 21 Oct 2025 08:05:32 -0700 Subject: [PATCH 13/14] Fix regression in unary transform's return value. --- stl/inc/vector | 1 + .../test.cpp | 19 ++++++++++++++----- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/stl/inc/vector b/stl/inc/vector index 44e7841a7ed..dafac7c5933 100644 --- a/stl/inc/vector +++ b/stl/inc/vector @@ -4021,6 +4021,7 @@ _CONSTEXPR20 _OutIt _Transform_vbool_aligned( _Dest._Myoff = _Last._Myoff; } + _Dest._Myptr = _Dest_ptr; return _Dest; } diff --git a/tests/std/tests/GH_000625_vector_bool_optimization/test.cpp b/tests/std/tests/GH_000625_vector_bool_optimization/test.cpp index f5627947a7f..80a1dbc2aae 100644 --- a/tests/std/tests/GH_000625_vector_bool_optimization/test.cpp +++ b/tests/std/tests/GH_000625_vector_bool_optimization/test.cpp @@ -108,23 +108,32 @@ CONSTEXPR20 void test_transform_helper(const size_t length) { // Also test combinations of vector::iterator and vector::const_iterator for the inputs. // iterator, iterator - transform(source1.begin(), source1.begin() + length, source2.begin(), and_actual.begin(), logical_and<>{}); + const auto and_ret = + transform(source1.begin(), source1.begin() + length, source2.begin(), and_actual.begin(), logical_and<>{}); assert(and_actual == and_expected); + assert(and_ret == and_actual.begin() + length); // iterator, const_iterator - transform(source1.begin(), source1.begin() + length, source2.cbegin(), or_actual.begin(), logical_or<>{}); + const auto or_ret = + transform(source1.begin(), source1.begin() + length, source2.cbegin(), or_actual.begin(), logical_or<>{}); assert(or_actual == or_expected); + assert(or_ret == or_actual.begin() + length); // const_iterator, iterator - transform(source1.cbegin(), source1.cbegin() + length, source2.begin(), xor_actual.begin(), not_equal_to<>{}); + const auto xor_ret = + transform(source1.cbegin(), source1.cbegin() + length, source2.begin(), xor_actual.begin(), not_equal_to<>{}); assert(xor_actual == xor_expected); + assert(xor_ret == xor_actual.begin() + length); // const_iterator, const_iterator - transform(source1.cbegin(), source1.cbegin() + length, source2.cbegin(), xnor_actual.begin(), equal_to<>{}); + const auto xnor_ret = + transform(source1.cbegin(), source1.cbegin() + length, source2.cbegin(), xnor_actual.begin(), equal_to<>{}); assert(xnor_actual == xnor_expected); + assert(xnor_ret == xnor_actual.begin() + length); - transform(source1.begin(), source1.begin() + length, not_actual.begin(), logical_not<>{}); + const auto not_ret = transform(source1.begin(), source1.begin() + length, not_actual.begin(), logical_not<>{}); assert(not_actual == not_expected); + assert(not_ret == not_actual.begin() + length); } CONSTEXPR20 bool test_transform() { From fddfc2c549ecfe496e4f95b3251174a2eb763eb1 Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Tue, 21 Oct 2025 08:57:04 -0700 Subject: [PATCH 14/14] Extract iterator/const_iterator, add scopes, test bit_meow. --- .../test.cpp | 90 +++++++++++++------ 1 file changed, 63 insertions(+), 27 deletions(-) diff --git a/tests/std/tests/GH_000625_vector_bool_optimization/test.cpp b/tests/std/tests/GH_000625_vector_bool_optimization/test.cpp index 80a1dbc2aae..41e73cff35a 100644 --- a/tests/std/tests/GH_000625_vector_bool_optimization/test.cpp +++ b/tests/std/tests/GH_000625_vector_bool_optimization/test.cpp @@ -106,34 +106,70 @@ CONSTEXPR20 void test_transform_helper(const size_t length) { vector not_actual(length + 3); // Also test combinations of vector::iterator and vector::const_iterator for the inputs. + const auto first1 = source1.begin(); + const auto cfirst1 = source1.cbegin(); + const auto first2 = source2.begin(); + const auto cfirst2 = source2.cbegin(); + const auto last1 = first1 + length; + const auto clast1 = cfirst1 + length; - // iterator, iterator - const auto and_ret = - transform(source1.begin(), source1.begin() + length, source2.begin(), and_actual.begin(), logical_and<>{}); - assert(and_actual == and_expected); - assert(and_ret == and_actual.begin() + length); - - // iterator, const_iterator - const auto or_ret = - transform(source1.begin(), source1.begin() + length, source2.cbegin(), or_actual.begin(), logical_or<>{}); - assert(or_actual == or_expected); - assert(or_ret == or_actual.begin() + length); - - // const_iterator, iterator - const auto xor_ret = - transform(source1.cbegin(), source1.cbegin() + length, source2.begin(), xor_actual.begin(), not_equal_to<>{}); - assert(xor_actual == xor_expected); - assert(xor_ret == xor_actual.begin() + length); - - // const_iterator, const_iterator - const auto xnor_ret = - transform(source1.cbegin(), source1.cbegin() + length, source2.cbegin(), xnor_actual.begin(), equal_to<>{}); - assert(xnor_actual == xnor_expected); - assert(xnor_ret == xnor_actual.begin() + length); - - const auto not_ret = transform(source1.begin(), source1.begin() + length, not_actual.begin(), logical_not<>{}); - assert(not_actual == not_expected); - assert(not_ret == not_actual.begin() + length); + { + auto and_ret = transform(first1, last1, first2, and_actual.begin(), logical_and<>{}); + assert(and_actual == and_expected); + assert(and_ret == and_actual.begin() + length); + + and_actual.assign(and_actual.size(), false); + + and_ret = transform(first1, last1, first2, and_actual.begin(), bit_and<>{}); + assert(and_actual == and_expected); + assert(and_ret == and_actual.begin() + length); + } + + { + auto or_ret = transform(first1, last1, cfirst2, or_actual.begin(), logical_or<>{}); + assert(or_actual == or_expected); + assert(or_ret == or_actual.begin() + length); + + or_actual.assign(or_actual.size(), false); + + or_ret = transform(first1, last1, cfirst2, or_actual.begin(), bit_or<>{}); + assert(or_actual == or_expected); + assert(or_ret == or_actual.begin() + length); + } + + { + auto xor_ret = transform(cfirst1, clast1, first2, xor_actual.begin(), not_equal_to<>{}); + assert(xor_actual == xor_expected); + assert(xor_ret == xor_actual.begin() + length); + + xor_actual.assign(xor_actual.size(), false); + + xor_ret = transform(cfirst1, clast1, first2, xor_actual.begin(), bit_xor<>{}); + assert(xor_actual == xor_expected); + assert(xor_ret == xor_actual.begin() + length); + } + + { + const auto xnor_ret = transform(cfirst1, clast1, cfirst2, xnor_actual.begin(), equal_to<>{}); + assert(xnor_actual == xnor_expected); + assert(xnor_ret == xnor_actual.begin() + length); + + // bit_xnor doesn't exist in the Standard + } + + { + auto not_ret = transform(first1, last1, not_actual.begin(), logical_not<>{}); + assert(not_actual == not_expected); + assert(not_ret == not_actual.begin() + length); + + not_actual.assign(not_actual.size(), false); + + // bit_not emits MSVC and Clang warnings, so it isn't optimized. + // Continue using logical_not to test vector::const_iterator: + not_ret = transform(cfirst1, clast1, not_actual.begin(), logical_not<>{}); + assert(not_actual == not_expected); + assert(not_ret == not_actual.begin() + length); + } } CONSTEXPR20 bool test_transform() {