Skip to content

Implement lane-wise modulo. #86

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 44 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
44 commits
Select commit Hold shift + click to select a range
295d602
new literals
jamierpond Apr 16, 2024
dbf8115
PR feedback
jamierpond Apr 19, 2024
2327807
tidy tests
jamierpond Apr 19, 2024
7c10d29
tidy reused code for booleanswaer
jamierpond Apr 19, 2024
0b8b214
update name
jamierpond Apr 19, 2024
e3a802d
style
jamierpond Apr 19, 2024
7ed6e88
test style
jamierpond Apr 19, 2024
03edd02
fix
jamierpond Apr 19, 2024
0648bb1
undef util
jamierpond Apr 19, 2024
0d0a833
simplify again
jamierpond Apr 19, 2024
33183cd
fmt
jamierpond Apr 19, 2024
a7d744d
Add to Array
jamierpond Apr 19, 2024
06af01c
rename
jamierpond Apr 20, 2024
ffc1120
nailed it
jamierpond Apr 20, 2024
5b837e6
cleanup
jamierpond Apr 20, 2024
c97318f
array tests?
jamierpond Apr 20, 2024
3dd0296
indentaion
jamierpond Apr 20, 2024
da9ccb9
update indent
jamierpond Apr 20, 2024
da64b7d
from array
jamierpond Apr 20, 2024
11ccd9e
more updates
jamierpond May 13, 2024
aab033a
wip
jamierpond May 15, 2024
ba6a5ba
rm unused
jamierpond May 15, 2024
f3d9f42
respect 80 chars
jamierpond May 15, 2024
783f189
undo clang formatting
jamierpond May 15, 2024
f9e28b1
format boolean swar
jamierpond May 15, 2024
d636521
improvementws
jamierpond May 26, 2024
d880691
indentation
jamierpond May 26, 2024
bbad583
make sure we understand equality
jamierpond May 26, 2024
f6a04f1
works
jamierpond May 26, 2024
e39d298
implement modulo
jamierpond May 26, 2024
0ca3600
modulo works!
jamierpond May 26, 2024
f7ffe70
snifae
jamierpond May 26, 2024
53e56f5
better example
jamierpond May 26, 2024
b62355a
Update SWAR.h
thecppzoo May 26, 2024
bf93d56
Update SWAR.h
thecppzoo May 26, 2024
79d3847
Attempt to sidestep MSVC bug
thecppzoo May 26, 2024
a585ad9
camelCawe
jamierpond May 27, 2024
711bb08
Merge branch 'jp/swar-to-array' into jp/ml-basics
jamierpond May 27, 2024
f1452cb
indentatino
jamierpond May 27, 2024
f45632a
auto
jamierpond May 27, 2024
79148d3
add utils
jamierpond May 27, 2024
08a73e1
tidy up
jamierpond May 27, 2024
0228573
weren't using that anyway
jamierpond May 27, 2024
0829a7e
tidy up tests
jamierpond May 27, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions inc/zoo/meta/BitmaskMaker.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@ struct BitmaskMaker {
static_assert(0xF0F0 == BitmaskMaker<uint16_t, 0xF0, 8>::value);
static_assert(0xEDFEDFED == BitmaskMaker<uint32_t, 0xFED, 12>::value);



}} // zoo::meta

#endif
84 changes: 76 additions & 8 deletions inc/zoo/swar/SWAR.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

#include "zoo/meta/log.h"

#include <array>
#include <type_traits>

#ifdef _MSC_VER
Expand All @@ -12,6 +13,16 @@

namespace zoo { namespace swar {

template <int NBits, typename T>
struct SWAR;

template <int NumBits, typename BaseType> struct Literals_t {
constexpr static void (SWAR<NumBits, BaseType>::*value)() = nullptr;
};

template <int NumBits, typename BaseType>
constexpr Literals_t<NumBits, BaseType> Literals{};

using u64 = uint64_t;
using u32 = uint32_t;
using u16 = uint16_t;
Expand Down Expand Up @@ -52,6 +63,7 @@ constexpr std::make_unsigned_t<T> lsbIndex(T v) noexcept {
template<int NBits_, typename T = uint64_t>
struct SWAR {
using type = std::make_unsigned_t<T>;
constexpr static auto Literal = Literals<NBits_, T>;
constexpr static inline type
NBits = NBits_,
BitWidth = sizeof(T) * 8,
Expand All @@ -62,13 +74,53 @@ struct SWAR {
AllOnes = ~std::make_unsigned_t<T>{0} >> PaddingBitsCount, // Also constructed in RobinHood utils: possible bug?
LeastSignificantBit = meta::BitmaskMaker<T, std::make_unsigned_t<T>{1}, NBits>::value,
MostSignificantBit = LeastSignificantBit << (NBits - 1),
LeastSignificantLaneMask =
sizeof(T) * 8 == NBits ? // needed to avoid shifting all bits
~T(0) :
~(~T(0) << NBits),
LeastSignificantLaneMask = []() {
if constexpr (NBits < sizeof(T) * 8) {
return (T(1) << NBits) - 1;
} else {
return ~T(0);
}
}(),
// Use LowerBits in favor of ~MostSignificantBit to not pollute
// "don't care" bits when non-power-of-two bit lane sizes are supported
LowerBits = MostSignificantBit - LeastSignificantBit;
LowerBits = MostSignificantBit - LeastSignificantBit,
MaxUnsignedLaneValue = LeastSignificantLaneMask;

template <typename InputIt>
constexpr static auto from_range(InputIt first, InputIt last) noexcept {
auto result = T{0};
for (; first != last; ++first) {
result = (result << NBits) | *first;
}
return result;
}

template <typename U>
constexpr static auto from_array(const U (&values)[Lanes]) noexcept {
using std::begin; using std::end;
return SWAR{from_range(begin(values), end(values))};
}

template <typename U>
constexpr static auto from_array(const std::array<T, Lanes> &values) noexcept {
using std::begin; using std::end;
return SWAR{from_range(begin(values), end(values))};
}

constexpr SWAR(const std::array<T, Lanes> &array) : m_v{from_range(array.begin(), array.end())} {}

template <typename Arg, std::size_t N, typename = std::enable_if_t<N == Lanes, int>>
constexpr
SWAR(Literals_t<NBits, T>, const Arg (&values)[N]) : m_v{from_array(values)} {}

constexpr std::array<T, Lanes> to_array() const noexcept {
std::array<T, Lanes> result = {};
for (int i = 0; i < Lanes; ++i) {
auto otherEnd = Lanes - i - 1;
result[otherEnd] = at(i);
}
return result;
}

SWAR() = default;
constexpr explicit SWAR(T v): m_v(v) {}
Expand Down Expand Up @@ -161,6 +213,12 @@ struct SWAR {
T m_v;
};

template <int NBits, typename T, typename Arg>
SWAR(Literals_t<NBits, T>, const Arg (&values)[SWAR<NBits, T>::Lanes]) -> SWAR<NBits, T>;

template <int NBits, typename T>
SWAR(Literals_t<NBits, T>, const std::array<T, SWAR<NBits, T>::Lanes>&) -> SWAR<NBits, T>;

/// Defining operator== on base SWAR types is entirely too error prone. Force a verbose invocation.
template<int NBits, typename T = uint64_t>
constexpr auto horizontalEquality(SWAR<NBits, T> left, SWAR<NBits, T> right) {
Expand Down Expand Up @@ -231,6 +289,10 @@ template<int NBits, typename T>
struct BooleanSWAR: SWAR<NBits, T> {
using Base = SWAR<NBits, T>;

template <std::size_t N>
constexpr BooleanSWAR(Literals_t<NBits, T>, const bool (&values)[N])
: Base(Literals<NBits, T>, values) { this->m_v <<= (NBits - 1); }

// Booleanness is stored in the MSBs
static constexpr auto MaskMSB =
broadcast<NBits, T>(Base(T(1) << (NBits -1)));
Expand All @@ -240,7 +302,7 @@ struct BooleanSWAR: SWAR<NBits, T> {
static constexpr auto MaskNonLSB = ~MaskLSB;
static constexpr auto MaskNonMSB = ~MaskMSB;
constexpr explicit BooleanSWAR(T v): Base(v) {}

constexpr BooleanSWAR clear(int bit) const noexcept {
constexpr auto Bit = T(1) << (NBits - 1);
return this->m_v ^ (Bit << (NBits * bit)); }
Expand All @@ -256,7 +318,7 @@ struct BooleanSWAR: SWAR<NBits, T> {
constexpr auto operator ~() const noexcept {
return BooleanSWAR(Base{Base::MostSignificantBit} ^ *this);
}

constexpr auto operator not() const noexcept {
return BooleanSWAR(MaskMSB ^ *this);
}
Expand Down Expand Up @@ -305,6 +367,12 @@ struct BooleanSWAR: SWAR<NBits, T> {
convertToBooleanSWAR(SWAR<NB, TT> arg) noexcept;
};

template <int NBits, typename T>
BooleanSWAR(
Literals_t<NBits, T>,
const bool (&values)[BooleanSWAR<NBits, T>::Lanes])
-> BooleanSWAR<NBits, T>;

template<int NBits, typename T>
constexpr BooleanSWAR<NBits, T>
convertToBooleanSWAR(SWAR<NBits, T> arg) noexcept {
Expand Down Expand Up @@ -381,7 +449,7 @@ greaterEqual(SWAR<NBits, T> left, SWAR<NBits, T> right) noexcept {
using S = swar::SWAR<NBits, T>;
const auto h = S::MostSignificantBit, x = left.value(), y = right.value(); // x=left, y= right is x < y
const auto z = (x|h) - (y&~h);
// bitwise ternary median!
// bitwise ternary median!
const auto t = h & ~median(x, ~y, z);
return ~BooleanSWAR<NBits, T>{static_cast<T>(t)}; // ~(x<y) === x >= y
}
Expand Down
64 changes: 64 additions & 0 deletions inc/zoo/swar/math.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
#pragma once
#include "SWAR.h"

namespace zoo::math {
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You know, should you have used maths, I would have let it slide. But since you didn't, if you try to use the British maths I will reject the renaming on the basis that there is no need to make a change like that
;-)

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wouldn't have let 'maths' slide.


template <typename IntegerType = size_t>
constexpr static
std::enable_if_t<std::is_integral_v<IntegerType>, bool>
isPowerOfTwo(IntegerType x) noexcept {
return x && (x & (x - 1)) == 0;
}

template <typename IntegerType = size_t, IntegerType X>
constexpr static
std::enable_if_t<std::is_integral_v<IntegerType>, bool>
isPowerOfTwo() noexcept {
return isPowerOfTwo(X);
}


template <size_t N, typename IntegerType = size_t>
constexpr static
std::enable_if_t<
std::is_integral_v<IntegerType> &&
isPowerOfTwo<size_t, N>(), size_t>
moduloPowerOfTwo(IntegerType x) noexcept {
return x & (N - 1);
}

}

namespace zoo::swar {
template <typename S>
constexpr static auto subtractOneUnsafe(S x) noexcept {
constexpr auto Ones = S::LeastSignificantBit;
auto x_minus_1 = S{x.value() - Ones};
return x_minus_1;
}
// todo subtract K unsafe using BitmaskMaker
// todo subtract K "saturated" using BitmaskMaker

template <typename S>
constexpr static auto isPowerOfTwo(S x) noexcept {
constexpr auto NBits = S::NBits;
using T = typename S::type;
auto greater_than_0 = greaterEqual(x, S{0});
auto x_minus_1 = subtractOneUnsafe(x);
auto zero = equals(S{x_minus_1.value() & x.value()}, S{0});
return greater_than_0 & zero;
}

template <size_t N, typename S>
constexpr static
std::enable_if_t<zoo::math::isPowerOfTwo<size_t, N>(), S>
moduloPowerOfTwo(const S x) noexcept {
constexpr auto N_minus_1 = N - 1;
constexpr auto N_in_lanes = zoo::meta::BitmaskMaker<typename S::type, N_minus_1, S::NBits>::value;
auto y = x.value() & N_in_lanes;
return S{y};
}

Comment on lines +54 to +61
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The way this is formulated is perhaps not useful:
For integral types, when the compiler can prove the modulus is a power of two, the compiler will, for sure, use bitwise conjunction rather than the assembler for the modulo instruction.
There remains the question of when the compiler doesn't know whether the modulus is a power of two, this would be only at runtime, and then, programmers have explicitly accounted for this improvement since ages, I don't think this way of going about it will give them an incentive to use this primitive.
This is useful for "weird" types that represent integers, like multi-precision integers, I don't know, that do not satisfy the "Integral" property required by isPowerOfTwo.
There is one aspect that I really like aobut this code: the structural guarantee that you won't pass a non-power-of-two as template argument, I see myself making some code with lane sizes not powers of two, and in some remote part of the code there is an assumption broken of the lane size being a powr of two, invoking bitwise conjuction, for "hilarity".


} // namespace zoo::swar

2 changes: 1 addition & 1 deletion test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ else()
set(
ZOO_TEST_SOURCES
${CATCH2_MAIN_SOURCE} ${TYPE_ERASURE_SOURCES} ${ALGORITHM_SOURCES}
${SWAR_SOURCES}
${SWAR_SOURCES}
${MISCELLANEA_SOURCES}
)

Expand Down
Loading