Skip to content

Commit

Permalink
Poly octave SIMD optimizations (#358)
Browse files Browse the repository at this point in the history
* Setting up Poly Octave filterbank as a struct of arrays

* SSE and AVX implementations

* Fixing NEON up2 implementation

* Fixing up2 processing on Intel

* Apply clang-format

* Missing include

---------

Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
  • Loading branch information
jatinchowdhury18 and github-actions[bot] committed May 18, 2024
1 parent cb19dc5 commit 6ef1720
Show file tree
Hide file tree
Showing 10 changed files with 761 additions and 238 deletions.
9 changes: 6 additions & 3 deletions modules/cmake/RuntimeSIMDLib.cmake
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
include(CheckCXXCompilerFlag)
function(make_lib_simd_runtime name file)
function(make_lib_simd_runtime name)
set(multiValueArgs SOURCES)
cmake_parse_arguments(ARG "" "" "${multiValueArgs}" ${ARGN})

add_library(${name}_sse_or_arm STATIC)
target_sources(${name}_sse_or_arm PRIVATE ${file})
target_sources(${name}_sse_or_arm PRIVATE ${ARG_SOURCES})

add_library(${name}_avx STATIC)
target_sources(${name}_avx PRIVATE ${file})
target_sources(${name}_avx PRIVATE ${ARG_SOURCES})
target_compile_definitions(${name}_avx PRIVATE BYOD_COMPILING_WITH_AVX=1)
if(WIN32)
CHECK_CXX_COMPILER_FLAG("/arch:AVX" COMPILER_OPT_ARCH_AVX_SUPPORTED)
Expand Down
22 changes: 15 additions & 7 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -167,9 +167,13 @@ if (NOT(${JAI_COMPILER} STREQUAL "JAI_COMPILER-NOTFOUND"))
target_compile_definitions(BYOD PRIVATE BYOD_BUILDING_JAI_MODULES=1)
endif()

# AVX/SSE files for accelerated neural nets
make_lib_simd_runtime(rnn_accelerated processors/drive/neural_utils/RNNAccelerated.cpp)
foreach(target IN ITEMS rnn_accelerated_sse_or_arm rnn_accelerated_avx)
# AVX/SSE files for accelerated neural nets and other DSP
make_lib_simd_runtime(dsp_accelerated
SOURCES
processors/drive/neural_utils/RNNAccelerated.cpp
processors/other/poly_octave/PolyOctaveV2FilterBankImpl.cpp
)
foreach(target IN ITEMS dsp_accelerated_sse_or_arm dsp_accelerated_avx)
target_link_libraries(${target}
PRIVATE
math_approx
Expand All @@ -182,17 +186,21 @@ foreach(target IN ITEMS rnn_accelerated_sse_or_arm rnn_accelerated_avx)
${CMAKE_CURRENT_SOURCE_DIR}/../modules/RTNeural
${CMAKE_CURRENT_SOURCE_DIR}/../modules/RTNeural/modules/xsimd/include
)
target_compile_definitions(${target} PRIVATE RTNEURAL_USE_XSIMD=1)
target_compile_definitions(${target}
PRIVATE
RTNEURAL_USE_XSIMD=1
_USE_MATH_DEFINES=1
)
set_target_properties(${target} PROPERTIES
POSITION_INDEPENDENT_CODE TRUE
VISIBILITY_INLINES_HIDDEN TRUE
C_VISIBILITY_PRESET hidden
CXX_VISIBILITY_PRESET hidden
)
endforeach()
target_compile_definitions(rnn_accelerated_sse_or_arm PRIVATE RTNEURAL_DEFAULT_ALIGNMENT=16 RTNEURAL_NAMESPACE=RTNeural_sse_arm)
target_compile_definitions(rnn_accelerated_avx PRIVATE RTNEURAL_DEFAULT_ALIGNMENT=32 RTNEURAL_NAMESPACE=RTNeural_avx)
target_link_libraries(BYOD PRIVATE rnn_accelerated)
target_compile_definitions(dsp_accelerated_sse_or_arm PRIVATE RTNEURAL_DEFAULT_ALIGNMENT=16 RTNEURAL_NAMESPACE=RTNeural_sse_arm)
target_compile_definitions(dsp_accelerated_avx PRIVATE RTNEURAL_DEFAULT_ALIGNMENT=32 RTNEURAL_NAMESPACE=RTNeural_avx)
target_link_libraries(BYOD PRIVATE dsp_accelerated)

# special flags for MSVC
if (MSVC)
Expand Down
61 changes: 47 additions & 14 deletions src/processors/other/poly_octave/PolyOctave.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#include "PolyOctave.h"
#include "PolyOctaveFilterBandHelpers.h"
#include "PolyOctaveV1FilterBankImpl.h"
#include "PolyOctaveV2FilterBankImpl.h"
#include "processors/ParameterHelpers.h"

namespace PolyOctaveTags
Expand Down Expand Up @@ -42,6 +43,14 @@ PolyOctave::PolyOctave (UndoManager* um)
uiOptions.powerColour = Colour { 0xffe70510 };
uiOptions.info.description = "A \"polyphonic octave generator\" effect.";
uiOptions.info.authors = StringArray { "Jatin Chowdhury" };

#if JUCE_INTEL
if (juce::SystemStats::hasAVX() && juce::SystemStats::hasFMA3())
{
juce::Logger::writeToLog ("Using Poly Octave with AVX SIMD instructions!");
use_avx = true;
}
#endif
}

ParamLayout PolyOctave::createParameterLayout()
Expand Down Expand Up @@ -94,9 +103,9 @@ void PolyOctave::prepare (double sampleRate, int samplesPerBlock)
}

mixOutBuffer.setSize (2, samplesPerBlock);
up1OutBuffer.setSize (2, 4 * samplesPerBlock + 8); // padding for SIMD
up2OutBuffer.setSize (2, 4 * samplesPerBlock + 8); // padding for SIMD
down1OutBuffer.setSize (2, 4 * samplesPerBlock + 8); // padding for SIMD
up1OutBuffer.setSize (2, 8 * samplesPerBlock + 32); // padding for SIMD
up2OutBuffer.setSize (2, 8 * samplesPerBlock + 32); // padding for SIMD
down1OutBuffer.setSize (2, samplesPerBlock);
}

void PolyOctave::processAudio (AudioBuffer<float>& buffer)
Expand Down Expand Up @@ -125,23 +134,47 @@ void PolyOctave::processAudio (AudioBuffer<float>& buffer)
chowdsp::BufferMath::applyGainSmoothedBuffer (down1OutBuffer, downOctaveGain);

// "up1" processing
for (auto [ch, data_in, data_out] : chowdsp::buffer_iters::zip_channels (std::as_const (buffer), up1OutBuffer))
for (const auto& [ch, data_in, data_out] : chowdsp::buffer_iters::zip_channels (std::as_const (buffer), up1OutBuffer))
{
poly_octave_v2::process<1> (octaveUpFilterBank[ch],
data_in.data(),
data_out.data(),
numSamples);
#if JUCE_INTEL
if (use_avx)
{
poly_octave_v2::process_avx<1> (octaveUpFilterBank[ch],
data_in.data(),
data_out.data(),
numSamples);
}
else
#endif
{
poly_octave_v2::process<1> (octaveUpFilterBank[ch],
data_in.data(),
data_out.data(),
numSamples);
}
}
upOctaveGain.process (numSamples);
chowdsp::BufferMath::applyGainSmoothedBuffer (up1OutBuffer, upOctaveGain);

// "up2" processing
for (auto [ch, data_in, data_out] : chowdsp::buffer_iters::zip_channels (std::as_const (buffer), up2OutBuffer))
for (const auto& [ch, data_in, data_out] : chowdsp::buffer_iters::zip_channels (std::as_const (buffer), up2OutBuffer))
{
poly_octave_v2::process<2> (octaveUp2FilterBank[ch],
data_in.data(),
data_out.data(),
numSamples);
#if JUCE_INTEL
if (use_avx)
{
poly_octave_v2::process_avx<2> (octaveUp2FilterBank[ch],
data_in.data(),
data_out.data(),
numSamples);
}
else
#endif
{
poly_octave_v2::process<2> (octaveUp2FilterBank[ch],
data_in.data(),
data_out.data(),
numSamples);
}
}
up2OctaveGain.process (numSamples);
chowdsp::BufferMath::applyGainSmoothedBuffer (up2OutBuffer, up2OctaveGain);
Expand Down
7 changes: 6 additions & 1 deletion src/processors/other/poly_octave/PolyOctave.h
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
#pragma once

#include "DelayPitchShifter.h"
#include "PolyOctaveFilterBankTypes.h"
#include "PolyOctaveV1FilterBank.h"
#include "PolyOctaveV2FilterBank.h"
#include "processors/BaseProcessor.h"

class PolyOctave : public BaseProcessor
Expand Down Expand Up @@ -61,5 +62,9 @@ class PolyOctave : public BaseProcessor
juce::AudioBuffer<float> up2OutBuffer;
juce::AudioBuffer<float> down1OutBuffer;

#if JUCE_INTEL
bool use_avx = false;
#endif

JUCE_DECLARE_NON_COPYABLE_WITH_LEAK_DETECTOR (PolyOctave)
};
38 changes: 0 additions & 38 deletions src/processors/other/poly_octave/PolyOctaveFilterBankTypes.h

This file was deleted.

13 changes: 13 additions & 0 deletions src/processors/other/poly_octave/PolyOctaveV1FilterBank.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#pragma once

#include <pch.h>

namespace poly_octave_v1
{
using float_2 = xsimd::batch<double>;
struct ComplexERBFilterBank
{
static constexpr size_t numFilterBands = 44;
std::array<chowdsp::IIRFilter<4, float_2>, numFilterBands / float_2::size> erbFilterReal, erbFilterImag;
};
} // namespace poly_octave_v1
Loading

0 comments on commit 6ef1720

Please sign in to comment.