Skip to content

Commit

Permalink
lipol_ps becomes lipol_sse<DefaultBlock, firsttime> (surge-synthesize…
Browse files Browse the repository at this point in the history
…r#31)

And we port the behavior and methods from surge in with tests.
  • Loading branch information
baconpaul authored Apr 22, 2023
1 parent bf7fcaa commit e95e3ed
Show file tree
Hide file tree
Showing 2 changed files with 186 additions and 21 deletions.
153 changes: 139 additions & 14 deletions include/sst/basic-blocks/dsp/BlockInterpolators.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
#ifndef SST_BASIC_BLOCKS_DSP_BLOCK_INTERPOLATORS_H
#define SST_BASIC_BLOCKS_DSP_BLOCK_INTERPOLATORS_H

#include <cassert>

namespace sst::basic_blocks::dsp
{
template <class T, int defaultBlockSize, bool first_run_checks = true> struct lipol
Expand Down Expand Up @@ -66,16 +68,20 @@ template <class T, int defaultBlockSize, bool first_run_checks = true> struct li
bool first_run{true};
};

template <int blockSize, bool first_run_checks = true> struct lipol_sse
template <int maxBlockSize, bool first_run_checks = true> struct lipol_sse
{
static_assert(! (maxBlockSize & (maxBlockSize - 1)));

lipol_sse()
{
float zbq alignas(16)[4]{0.f, 0.25f, 0.5f, 0.75f};
float zbq alignas(16)[4]{0.25f, 0.5f, 0.75f, 1.00f};
zeroUpByQuarters = _mm_load_ps(zbq);
one = _mm_set1_ps(1.f);
zero = _mm_setzero_ps();
}
void set_target(float f)
{
current = target;
target = f;
if constexpr (first_run_checks)
{
Expand All @@ -88,36 +94,98 @@ template <int blockSize, bool first_run_checks = true> struct lipol_sse
updateLine();
}

void set_target_smoothed(float f)
{
constexpr float coef = 0.25;
constexpr float coef_m1 = 1 - coef;
current = target;
auto p1 = coef * f;
auto p2 = coef_m1 * target;
target = p1 + p2;
updateLine();
}

inline void instantize() { set_target_instant(target);}
void set_target_instantize(float f)
{
set_target_instant(f);
}
void set_target_instant(float f)
{
target = f;
current = f;
updateLine();
}
float get_target() const { return target; }

/*
* Out = in * linearly-interpolated-target
* Out = in * linearly-interpolated-target.
*
* When porting from Surge, surge made the block size explicit. That's a useful test
* that the port is correct so for now we add a block size quad argument to these
* and assert that they are correct.
*/
void multiply_block_to(float *__restrict in, float *__restrict out)
void multiply_block_to(float *__restrict in, float *__restrict out, int bsQuad=-1) const
{
assert(bsQuad == -1 || bsQuad == numRegisters);
for (int i = 0; i < numRegisters; ++i)
{
auto iv = _mm_load_ps(in + (i << 2));
auto ov = _mm_mul_ps(iv, line[i]);
_mm_store_ps(out + (i << 2), ov);
}
}

void multiply_block(float *in, int bsQuad=-1) const
{
assert(bsQuad == -1 || bsQuad == numRegisters);
for (int i = 0; i < numRegisters; ++i)
{
auto iv = _mm_load_ps(in + (i << 2));
auto ov = _mm_mul_ps(iv, line[i]);
_mm_store_ps(in + (i << 2), ov);
}
}

void multiply_2_blocks(float *__restrict in1, float *__restrict in2, int bsQuad = -1) const
{
multiply_block(in1, bsQuad);
multiply_block(in2, bsQuad);
}

void multiply_2_blocks_to(float *__restrict inL, float *__restrict inR, float *__restrict outL,
float *__restrict outR)
float *__restrict outR, int bsQuad = -1) const
{
multiply_block_to(inL, outL);
multiply_block_to(inR, outR);
multiply_block_to(inL, outL, bsQuad);
multiply_block_to(inR, outR, bsQuad);
}

/*
* MAC means "multiply-accumulate"
*/
void MAC_block_to(float *__restrict src, float *__restrict dst, int bsQuad = -1) const
{
assert(bsQuad == -1 || bsQuad == numRegisters);
for (int i = 0; i < numRegisters; ++i)
{
auto iv = _mm_load_ps(src + (i << 2));
auto dv = _mm_load_ps(dst + (i << 2));
auto ov = _mm_mul_ps(iv, line[i]);
auto mv = _mm_add_ps(ov, dv);
_mm_store_ps(dst + (i << 2), mv);
}
}
void MAC_2_blocks_to(float *__restrict src1, float *__restrict src2,
float *__restrict dst1, float *__restrict dst2, int bsQuad = -1) const
{
MAC_block_to(src1, dst1, bsQuad);
MAC_block_to(src2, dst2, bsQuad);
}

/*
* out = a * (1-t) + b * t
*/
void fade_blocks(float *__restrict inA, float *__restrict inB, float *__restrict out)
void fade_blocks(float *__restrict inA, float *__restrict inB, float *__restrict out) const
{
for (int i = 0; i < numRegisters; ++i)
{
Expand All @@ -130,14 +198,68 @@ template <int blockSize, bool first_run_checks = true> struct lipol_sse
}
}

void store_block(float *__restrict out)
void fade_block_to(float *__restrict src1, float *__restrict src2, float *__restrict dst, int bsQuad = -1) const
{
assert(bsQuad == -1 || bsQuad == numRegisters);
fade_blocks(src1, src2, dst);
}
void fade_2_blocks_to(float *__restrict src11, float *__restrict src12,
float *__restrict src21, float *__restrict src22,
float *__restrict dst1, float *__restrict dst2, int bsQuad = -1) const
{
fade_block_to(src11, src12, dst1, bsQuad);
fade_block_to(src21, src22, dst2, bsQuad);
}

void store_block(float *__restrict out, int bsQuad = -1) const
{
assert( bsQuad == -1 || bsQuad == numRegisters);
for (int i = 0; i < numRegisters; ++i)
{
_mm_store_ps(out + (i << 2), line[i]);
}
}

/*
* trixpan blocks:
* a = max(line, 0)
* b = min(line, o)
* tl = (1-a) * L - b * R
* tR = a * L + (1+b) * R
*/
void trixpan_blocks(float *__restrict L, float *__restrict R, float *__restrict dL,
float *__restrict dR, int bsQuad = -1)const
{
assert(bsQuad == -1 || bsQuad == numRegisters);

for (int i = 0; i < numRegisters; ++i)
{
auto a = _mm_max_ps(zero, line[i]);
auto b = _mm_min_ps(zero, line[i]);
auto l = _mm_load_ps(L + (i << 2));
auto r = _mm_load_ps(R + (i << 2));
auto tl = _mm_sub_ps(_mm_mul_ps(_mm_sub_ps(one, a), l),
_mm_mul_ps(b, r));
auto tr = _mm_add_ps(_mm_mul_ps(a, l),
_mm_mul_ps(_mm_add_ps(one, b), r));
_mm_store_ps(dL + (i<<2), tl);
_mm_store_ps(dR + (i<<2), tr);
}
}

void set_blocksize(size_t bs)
{
assert( ! ( bs & (bs-1)));
assert(bs <= maxBlockSize);
assert(bs >= 4);
blockSize = bs;
numRegisters = bs >> 2;
blockSizeInv = 1.f / blockSize;
registerSizeInv = 1.f / (blockSize >> 2);
}

int blockSize{maxBlockSize};

private:
void updateLine()
{
Expand All @@ -152,12 +274,15 @@ template <int blockSize, bool first_run_checks = true> struct lipol_sse
current = target;
}

static constexpr int numRegisters{blockSize >> 2};
static constexpr float blockSizeInv{1.f / blockSize};
static constexpr float registerSizeInv{1.f / (blockSize >> 2)};
__m128 line[numRegisters];
static constexpr int maxRegisters{maxBlockSize >> 2};

int numRegisters{maxBlockSize >> 2};
float blockSizeInv{1.f / blockSize};
float registerSizeInv{1.f / (blockSize >> 2)};

__m128 line[maxRegisters];
__m128 zeroUpByQuarters;
__m128 one;
__m128 one, zero;
float target{0.f}, current{0.f};
bool first_run{true};
};
Expand Down
54 changes: 47 additions & 7 deletions tests/dsp_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ TEST_CASE("lipol_sse basic", "[dsp]")
lip.store_block(where);
for (int i = 0; i < bs; i++)
{
REQUIRE(where[i] == Approx(prev + (t - prev) / bs * i).margin(1e-5));
REQUIRE(where[i] == Approx(prev + (t - prev) / bs * (i+1)).margin(1e-5));
}
prev = t;
}
Expand All @@ -51,7 +51,7 @@ TEST_CASE("lipol_sse basic", "[dsp]")
lip.store_block(where);
for (int i = 0; i < bs; i++)
{
REQUIRE(where[i] == Approx(prev + (t - prev) / bs * i).margin(1e-5));
REQUIRE(where[i] == Approx(prev + (t - prev) / bs * (i+1)).margin(1e-5));
}
prev = t;
}
Expand All @@ -70,7 +70,7 @@ TEST_CASE("lipol_sse basic", "[dsp]")
lip.store_block(where);
for (int i = 0; i < bs; i++)
{
REQUIRE(where[i] == Approx(prev + (t - prev) / bs * i).margin(1e-5));
REQUIRE(where[i] == Approx(prev + (t - prev) / bs * (i+1)).margin(1e-5));
}
prev = t;
}
Expand All @@ -90,7 +90,7 @@ TEST_CASE("lipol_sse basic", "[dsp]")
lip.store_block(where);
for (int i = 0; i < bs; i++)
{
REQUIRE(where[i] == Approx(prev + (t - prev) / bs * i).margin(1e-5));
REQUIRE(where[i] == Approx(prev + (t - prev) / bs * (i+1)).margin(1e-5));
}
prev = t;
}
Expand All @@ -110,7 +110,7 @@ TEST_CASE("lipol_sse basic", "[dsp]")
lip.store_block(where);
for (int i = 0; i < bs; i++)
{
REQUIRE(where[i] == Approx(prev + (t - prev) / bs * i).margin(1e-5));
REQUIRE(where[i] == Approx(prev + (t - prev) / bs * (i+1)).margin(1e-5));
}
prev = t;
}
Expand All @@ -134,7 +134,7 @@ TEST_CASE("lipol_sse multiply_block", "[dsp]")
lip.multiply_block_to(f, r);
for (int i = 0; i < bs; i++)
{
auto x = (0.2 + (0.6 - 0.2) / bs * i) * f[i];
auto x = (0.2 + (0.6 - 0.2) / bs * (i+1)) * f[i];
REQUIRE(x == Approx(r[i]).margin(1e-5));
}
}
Expand All @@ -158,7 +158,7 @@ TEST_CASE("lipol_sse fade_block", "[dsp]")
lip.fade_blocks(f, g, r);
for (int i = 0; i < bs; i++)
{
auto cx = (0.2 + (0.6 - 0.2) / bs * i);
auto cx = (0.2 + (0.6 - 0.2) / bs * (i+1));
auto rx = f[i] * (1 - cx) + g[i] * cx;
REQUIRE(rx == Approx(r[i]).margin(1e-5));
}
Expand Down Expand Up @@ -643,3 +643,43 @@ TEST_CASE("Sinc Delay Line", "[dsp]")
}
#endif
}



TEST_CASE("lipol_ps class", "[dsp]")
{
using lipol_ps = sst::basic_blocks::dsp::lipol_sse<64, false>;
lipol_ps mypol;
float prevtarget = -1.0;
mypol.set_target(prevtarget);
mypol.instantize();

constexpr size_t nfloat = 64;
constexpr size_t nfloat_quad = 16;
float storeTarget alignas(16)[nfloat];
assert(mypol.blockSize == nfloat);
mypol.store_block(storeTarget);

for (auto i = 0; i < nfloat; ++i)
REQUIRE(storeTarget[i] == prevtarget); // should be constant in the first instance

for (int i = 0; i < 10; ++i)
{
float target = (i) * (i) / 100.0;
mypol.set_target(target);

mypol.store_block(storeTarget, nfloat_quad);

REQUIRE(storeTarget[nfloat - 1] == Approx(target));

float dy = storeTarget[1] - storeTarget[0];
for (auto j = 1; j < nfloat; ++j)
{
REQUIRE(storeTarget[j] - storeTarget[j - 1] == Approx(dy).epsilon(1e-3));
}

REQUIRE(prevtarget + dy == Approx(storeTarget[0]));

prevtarget = target;
}
}

0 comments on commit e95e3ed

Please sign in to comment.