diff --git a/.github/workflows/cmake.yml b/.github/workflows/cmake.yml index 064b6290..95944616 100644 --- a/.github/workflows/cmake.yml +++ b/.github/workflows/cmake.yml @@ -54,6 +54,15 @@ jobs: path: modules/BYOD-add-ons token: ${{ secrets.OUR_GITHUB_PAT }} + - name: Checkout Jai + uses: actions/checkout@v2 + if: github.actor == 'jatinchowdhury18' + with: + ref: main + repository: Chowdhury-DSP/jai-minimal + token: ${{ secrets.OUR_GITHUB_PAT }} + path: modules/jai + - name: Configure shell: bash env: diff --git a/modules/cmake/FindJaiCompiler.cmake b/modules/cmake/FindJaiCompiler.cmake new file mode 100644 index 00000000..555728e5 --- /dev/null +++ b/modules/cmake/FindJaiCompiler.cmake @@ -0,0 +1,18 @@ +if(NOT (IOS OR LINUX)) + if(WIN32) + set(JAI_COMPILER_EXE "jai.exe") + elseif(APPLE) + set(JAI_COMPILER_EXE "jai-macos") + else() + set(JAI_COMPILER_EXE "jai-linux") + endif() + + find_program(JAI_COMPILER + NAMES ${JAI_COMPILER_EXE} + HINTS ${CMAKE_SOURCE_DIR}/modules/jai/bin ${CMAKE_SOURCE_DIR}/../../Research/jai/bin + ) + message(STATUS "Jai compiler: ${JAI_COMPILER}") +else() + message(STATUS "Skipping Jai checks on this platform") + set(JAI_COMPILER "JAI_COMPILER-NOTFOUND") +endif() diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index d12fc365..9c7cb01f 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -3,6 +3,7 @@ if(NOT (IOS OR BUILD_RELEASE)) add_subdirectory(headless) endif() +# main source files target_sources(BYOD PRIVATE BYOD.cpp @@ -140,6 +141,7 @@ target_sources(BYOD PRIVATE processors/other/cry_baby/CryBabyNDK.cpp processors/other/spring_reverb/SpringReverb.cpp processors/other/spring_reverb/SpringReverbProcessor.cpp + processors/other/krusher/Krusher.cpp processors/utility/CleanGain.cpp processors/utility/FreqBandSplitter.cpp @@ -155,8 +157,15 @@ target_sources(BYOD PRIVATE processors/netlist_helpers/NetlistViewer.cpp ) -target_precompile_headers(BYOD PRIVATE pch.h) +# Jai files +include(${CMAKE_SOURCE_DIR}/modules/cmake/FindJaiCompiler.cmake) +if (NOT(${JAI_COMPILER} STREQUAL "JAI_COMPILER-NOTFOUND")) + message(STATUS "Configuring Jai compilation!") + add_subdirectory(jai) + target_compile_definitions(BYOD PRIVATE BYOD_BUILDING_JAI_MODULES=1) +endif() +# AVX/SSE files for accelerated neural nets make_lib_simd_runtime(rnn_accelerated processors/drive/neural_utils/RNNAccelerated.cpp) foreach(target IN ITEMS rnn_accelerated_sse_or_arm rnn_accelerated_avx) target_link_libraries(${target} PRIVATE config_flags juce::juce_recommended_lto_flags warning_flags) @@ -177,7 +186,10 @@ target_compile_definitions(rnn_accelerated_sse_or_arm PRIVATE RTNEURAL_DEFAULT_A target_compile_definitions(rnn_accelerated_avx PRIVATE RTNEURAL_DEFAULT_ALIGNMENT=32) target_link_libraries(BYOD PRIVATE rnn_accelerated) +# special flags for MSVC if (MSVC) target_compile_options(BYOD PRIVATE /bigobj) endif () +# pre-compiled header +target_precompile_headers(BYOD PRIVATE pch.h) diff --git a/src/jai/.gitignore b/src/jai/.gitignore new file mode 100644 index 00000000..33a4f18a --- /dev/null +++ b/src/jai/.gitignore @@ -0,0 +1,4 @@ +.build/ +*.lib +*.a +*_jai_lib.h diff --git a/src/jai/CMakeLists.txt b/src/jai/CMakeLists.txt new file mode 100644 index 00000000..e5f306e4 --- /dev/null +++ b/src/jai/CMakeLists.txt @@ -0,0 +1,42 @@ +if(WIN32) + set(JAI_LIBRARY_FILE "byod_jai_lib.lib") +else() + set(JAI_LIBRARY_FILE "byod_jai_lib.a") +endif() + +add_custom_command(OUTPUT ${CMAKE_CURRENT_SOURCE_DIR}/${JAI_LIBRARY_FILE} + COMMAND ${JAI_COMPILER} build.jai + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} + DEPENDS build.jai krusher/bit_reduction.jai krusher/lofi_downsampler.jai +) +add_custom_target(jai_library_build DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/${JAI_LIBRARY_FILE}) + +add_library(byod_jai_lib STATIC IMPORTED GLOBAL) +add_dependencies(byod_jai_lib jai_library_build) + +set_target_properties(byod_jai_lib + PROPERTIES + IMPORTED_LOCATION ${CMAKE_CURRENT_SOURCE_DIR}/${JAI_LIBRARY_FILE} +) + +target_link_libraries(BYOD PRIVATE byod_jai_lib) +target_sources(BYOD PRIVATE SharedJaiContext.cpp stb_sprintf.cpp) +target_compile_definitions(BYOD PRIVATE STB_SPRINTF_IMPLEMENTATION=1) + +if((CMAKE_CXX_COMPILER_ID STREQUAL "Clang") OR (CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang") OR (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")) + string(JOIN " " STB_CXX_FLAGS + "-Wno-language-extension-token" + "-Wno-zero-as-null-pointer-constant" + "-Wno-cast-align" + "-Wno-implicit-fallthrough" + "-Wno-conditional-uninitialized" + "-Wno-duplicate-decl-specifier" + "-Wno-unreachable-code" + ) + set_source_files_properties(stb_sprintf.cpp + TARGET_DIRECTORY BYOD + PROPERTIES COMPILE_FLAGS "${STB_CXX_FLAGS}" + ) +endif() + +#TODO: remove stb_sprintf once Jai.Basic no longer depends on it! diff --git a/src/jai/SharedJaiContext.cpp b/src/jai/SharedJaiContext.cpp new file mode 100644 index 00000000..00df66e7 --- /dev/null +++ b/src/jai/SharedJaiContext.cpp @@ -0,0 +1,12 @@ +#include "SharedJaiContext.h" +#include "byod_jai_lib.h" + +JaiContextWrapper::JaiContextWrapper() +{ + internal = __jai_runtime_init (0, nullptr); +} + +JaiContextWrapper::~JaiContextWrapper() +{ + __jai_runtime_fini (internal); +} diff --git a/src/jai/SharedJaiContext.h b/src/jai/SharedJaiContext.h new file mode 100644 index 00000000..7f4fc089 --- /dev/null +++ b/src/jai/SharedJaiContext.h @@ -0,0 +1,17 @@ +#pragma once + +#include + +struct jai_Context; +struct JaiContextWrapper +{ + JaiContextWrapper(); + ~JaiContextWrapper(); + + operator jai_Context*() { return internal; }; // NOLINT + +private: + jai_Context* internal = nullptr; +}; + +using SharedJaiContext = juce::SharedResourcePointer; diff --git a/src/jai/build.jai b/src/jai/build.jai new file mode 100644 index 00000000..2237c7c4 --- /dev/null +++ b/src/jai/build.jai @@ -0,0 +1,44 @@ +#import "Basic"; +#import "Compiler"; +#import "generate_c_header"; + +#run build(); + +SRC_FILES :: string.[ + "krusher/lofi_downsampler.jai", + "krusher/bit_reduction.jai" +]; + +build :: () { + header_info : Header_Info; + header_info.jai_type_prefix = "jai_"; + + w := compiler_create_workspace(); + + target_options := get_build_options(w); + target_options.output_executable_name = "byod_jai_lib"; + target_options.output_type = .STATIC_LIBRARY; // specifies output to be a static library + target_options.backend = .LLVM; + target_options.text_output_flags = 1; + set_optimization(*target_options, .OPTIMIZED); + + set_build_options(target_options, w); + + compiler_begin_intercept(w); + for file, _ : SRC_FILES { + add_build_file(tprint("%/%", #filepath, file), w); + } + while true { + message := compiler_wait_for_message(); + handle_message(*header_info, message); + if message.kind == { + case .COMPLETE; + break; + } + } + compiler_end_intercept(w); + + generate_header(*header_info, "byod_jai_lib.h"); + + set_build_options_dc(.{do_output=false}); // No executable for this workspace. +} diff --git a/src/jai/krusher/bit_reduction.jai b/src/jai/krusher/bit_reduction.jai new file mode 100644 index 00000000..6829d58c --- /dev/null +++ b/src/jai/krusher/bit_reduction.jai @@ -0,0 +1,159 @@ +Krusher_Bit_Reducer_Filter_State :: struct { + p1: s32; + p2: s32; +} + +#program_export +krusher_bit_reduce_process_block :: (buffer: **float, + num_channels: s32, + num_samples: s32, + filter_index: s32, + bit_depth: s32, + filter_states: *Krusher_Bit_Reducer_Filter_State) #c_call { + small_block_size : s32 : 16; + samples_int : [small_block_size]s16 = ---; + + for channel : 0..num_channels-1 { + samples_remaining : s32 = num_samples; + while samples_remaining > 0 { + samples_to_process := ifx samples_remaining > small_block_size then small_block_size else samples_remaining; + defer { samples_remaining -= samples_to_process; } + + samples_float_span : []float32; + samples_float_span.data = buffer[channel] + num_samples - samples_remaining; + samples_float_span.count = samples_to_process; + + memset(*samples_int, 0, size_of(s16) * small_block_size); + samples_int_span : []s16; + samples_int_span.data = samples_int.data; + samples_int_span.count = samples_to_process; + + convert_float_to_int(samples_float_span, samples_int_span); + + if (bit_depth < 12) { + br_data := bit_reduce_encode(samples_int_span, bit_depth); + bit_reduce_decode(br_data, samples_int_span, cast(BR_Filter) filter_index, *filter_states[channel]); + } + + convert_int_to_float(samples_int_span, samples_float_span); + } + } +} + +#scope_file +BIT_MASKS :: u16.[ + 0, // 0 + 0x0001, // 1 + 0x0003, // 2 + 0x0007, // 3 + 0x000F, // 4 + 0x001F, // 5 + 0x003F, // 6 + 0x007F, // 7 + 0x00FF, // 8 + 0x01FF, // 9 + 0x03FF, // 10 + 0x07FF, // 11 + 0x0FFF, // 12 + 0x1FFF, // 13 + 0x3FFF, // 14 + 0x7FFF, // 15 +]; + +Bit_Reduction_Block :: struct { + shift_amount: u8; + data: [16] u16; +} + +BR_Filter :: enum { + TYPE_0; + TYPE_1; + TYPE_2; + TYPE_3; +} + +encode_sample :: inline (shift: u8, bit_depth: s32, x: s16) -> u16 #no_context { + value_unsigned := cast(u16) (x + (1 << 8)); + return cast(u16) (value_unsigned >> shift) & BIT_MASKS[bit_depth]; +} + +decode_sample :: inline (shift: u8, x: u16) -> s16 #no_context { + return cast(s16) (cast(u16) (x << shift) - (1 << 8)); +} + +bit_reduce_decode :: (using br_block: Bit_Reduction_Block, + out: []s16, + filter: BR_Filter, + state: *Krusher_Bit_Reducer_Filter_State) #no_context { + + type1_filter :: inline (nibble_2r: s16, using state: *Krusher_Bit_Reducer_Filter_State) -> s16 #no_context { + y := cast(s32) nibble_2r + ((p1 * 15) >> 4); + p2 = 0; + p1 = y; + return cast(s16) (y >> 4); + } + + type2_filter :: inline (nibble_2r: s16, using state: *Krusher_Bit_Reducer_Filter_State) -> s16 #no_context { + y := cast(s32) nibble_2r + ((p1 * 61) >> 5) - ((p2 * 15) >> 4); + p2 = p1; + p1 = y; + return cast(s16) (y >> 5); + } + + type3_filter :: inline (nibble_2r: s16, using state: *Krusher_Bit_Reducer_Filter_State) -> s16 #no_context { + y := cast(s32) nibble_2r + ((p1 * 115) >> 6) - ((p2 * 13) >> 4); + p2 = p1; + p1 = y; + return cast(s16) (y >> 6); + } + + for br_sample, i : data { + if #complete filter == { + case .TYPE_0; out[i] = decode_sample(shift_amount, br_sample); + case .TYPE_1; out[i] = type1_filter (decode_sample(shift_amount, br_sample), state); + case .TYPE_2; out[i] = type2_filter (decode_sample(shift_amount, br_sample), state); + case .TYPE_3; out[i] = type3_filter (decode_sample(shift_amount, br_sample), state); + } + } +} + +bit_reduce_encode :: (pcm_data: []s16, bit_depth: s32) -> Bit_Reduction_Block #no_context { + shift_best : u8 = 0; + err_min : float64 = Math.FLOAT64_MAX; + + for s : cast(u8) 0.. cast(u8) (16 - bit_depth) { + err_square_accum : float64 = 0.0; + for pcm_sample, _ : pcm_data { + pred := decode_sample(s, encode_sample(s, bit_depth, pcm_sample)); + err := cast(float64) (pcm_sample - pred); + err_square_accum += err * err; + } + + if err_square_accum < err_min { + err_min = err_square_accum; + shift_best = s; + } + } + + using br_block : Bit_Reduction_Block = ---; + shift_amount = shift_best; + for pcm_sample, i : pcm_data { + data[i] = encode_sample(shift_best, bit_depth, pcm_sample); + } + + return br_block; +} + +convert_float_to_int :: (data_float: [] float, data_int: [] s16) #no_context { + for float_sample, i : data_float { + data_int[i] = cast(s16) (float_sample * cast(float32) (1 << 8)); + } +} + +convert_int_to_float :: (data_int: [] s16, data_float: [] float) #no_context { + for _, i : data_float { + data_float[i] = cast(float32) data_int[i] / cast(float32) (1 << 8); + } +} + +Math :: #import "Math"; \ No newline at end of file diff --git a/src/jai/krusher/lofi_downsampler.jai b/src/jai/krusher/lofi_downsampler.jai new file mode 100644 index 00000000..9cc52bc7 --- /dev/null +++ b/src/jai/krusher/lofi_downsampler.jai @@ -0,0 +1,65 @@ +Krusher_Lofi_Resample_State :: struct { + upsample_overshoot: float64; + downsample_overshoot: float64; +} + +#program_export +krusher_init_lofi_resample :: (using state: *Krusher_Lofi_Resample_State) #c_call { + upsample_overshoot = 0.0; + downsample_overshoot = 0.0; +} + +#program_export +krusher_process_lofi_downsample :: (ctx: *Context, + using state: *Krusher_Lofi_Resample_State, + buffer: **float32, + num_channels: s32, + num_samples: s32, + resample_factor: float64) #c_call +{ + + push_context ctx { + ds_buffer_size := cast(s32) Math.ceil(cast(float64) num_samples / resample_factor); + + // allocating memory here, but it's real-time safe since we're using Temporary_Storage + temp_data : [..] float32; + temp_data.allocator = temp; + array_resize(*temp_data, 2 * ds_buffer_size, true); + + // Eventually we should figure out a cleaner way to have a kind of "audio buffer view" + ds_buffer : [2] *float32; + ds_buffer[0] = temp_data.data; + ds_buffer[1] = temp_data.data + ds_buffer_size; + + krusher_process_lofi_resample(buffer, ds_buffer.data, num_channels, num_samples, ds_buffer_size, resample_factor, *downsample_overshoot); + krusher_process_lofi_resample(ds_buffer.data, buffer, num_channels, ds_buffer_size, num_samples, 1.0 / resample_factor, *upsample_overshoot); + + reset_temporary_storage(); + } +} + +#scope_file +krusher_process_lofi_resample :: (source_buffer: **float32, + dest_buffer: **float32, + num_channels: s32, + num_samples_source: s32, + num_samples_dest: s32, + resample_factor: float64, + overshoot_samples: *float64) +{ + // simple S&H lofi resampler + for channel: 0..num_channels-1 { + source_data := source_buffer[channel]; + dest_data := dest_buffer[channel]; + + for i: 0..num_samples_dest-1 { + grab_index := cast(s32) (cast(float64) i * resample_factor + (<ascii conversion method that uses +doubles with error correction (double-doubles, for ~105 bits of +precision). This conversion is round-trip perfect - that is, an atof +of the values output here will give you the bit-exact double back. + +One difference is that our insignificant digits will be different than +with MSVC or GCC (but they don't match each other either). We also +don't attempt to find the minimum length matching float (pre-MSVC15 +doesn't either). + +If you don't need float or doubles at all, define STB_SPRINTF_NOFLOAT +and you'll save 4K of code space. + +64-BIT INTS: +============ +This library also supports 64-bit integers and you can use MSVC style or +GCC style indicators (%I64d or %lld). It supports the C99 specifiers +for size_t and ptr_diff_t (%jd %zd) as well. + +EXTRAS: +======= +Like some GCCs, for integers and floats, you can use a ' (single quote) +specifier and commas will be inserted on the thousands: "%'d" on 12345 +would print 12,345. + +For integers and floats, you can use a "$" specifier and the number +will be converted to float and then divided to get kilo, mega, giga or +tera and then printed, so "%$d" 1024 is "1.0 k", "%$.2d" 2536000 is +"2.42 m", etc. + +In addition to octal and hexadecimal conversions, you can print +integers in binary: "%b" for 256 would print 100. + +PERFORMANCE vs MSVC 2008 32-/64-bit (GCC is even slower than MSVC): +=================================================================== +"%d" across all 32-bit ints (4.8x/4.0x faster than 32-/64-bit MSVC) +"%24d" across all 32-bit ints (4.5x/4.2x faster) +"%x" across all 32-bit ints (4.5x/3.8x faster) +"%08x" across all 32-bit ints (4.3x/3.8x faster) +"%f" across e-10 to e+10 floats (7.3x/6.0x faster) +"%e" across e-10 to e+10 floats (8.1x/6.0x faster) +"%g" across e-10 to e+10 floats (10.0x/7.1x faster) +"%f" for values near e-300 (7.9x/6.5x faster) +"%f" for values near e+300 (10.0x/9.1x faster) +"%e" for values near e-300 (10.1x/7.0x faster) +"%e" for values near e+300 (9.2x/6.0x faster) +"%.320f" for values near e-300 (12.6x/11.2x faster) +"%a" for random values (8.6x/4.3x faster) +"%I64d" for 64-bits with 32-bit values (4.8x/3.4x faster) +"%I64d" for 64-bits > 32-bit values (4.9x/5.5x faster) +"%s%s%s" for 64 char strings (7.1x/7.3x faster) +"...512 char string..." ( 35.0x/32.5x faster!) +*/ + +/* +#ifdef STB_SPRINTF_STATIC +#define STBSP__PUBLICDEC static +#define STBSP__PUBLICDEF static +#else +#ifdef __cplusplus +#define STBSP__PUBLICDEC extern "C" +#define STBSP__PUBLICDEF extern "C" +#else +#define STBSP__PUBLICDEC extern +#define STBSP__PUBLICDEF +#endif +#endif +*/ + +#ifdef WIN32 +#define STBSP__PUBLICDEF extern "C" __declspec(dllexport) extern +#else +#define STBSP__PUBLICDEF extern "C" +#endif + +#include // for va_list() + +#ifndef STB_SPRINTF_MIN +#define STB_SPRINTF_MIN 512 // how many characters per callback +#endif +typedef char* STBSP_SPRINTFCB (char* buf, void* user, int len); + +#ifndef STB_SPRINTF_DECORATE +#define STB_SPRINTF_DECORATE(name) stbsp_##name // define this before including if you want to change the names +#endif + +STBSP__PUBLICDEF int STB_SPRINTF_DECORATE (vsprintf) (char* buf, char const* fmt, va_list va); +STBSP__PUBLICDEF int STB_SPRINTF_DECORATE (vsnprintf) (char* buf, int count, char const* fmt, va_list va); +STBSP__PUBLICDEF int STB_SPRINTF_DECORATE (sprintf) (char* buf, char const* fmt, ...); +STBSP__PUBLICDEF int STB_SPRINTF_DECORATE (snprintf) (char* buf, int count, char const* fmt, ...); + +STBSP__PUBLICDEF int STB_SPRINTF_DECORATE (vsprintfcb) (STBSP_SPRINTFCB* callback, void* user, char* buf, char const* fmt, va_list va); +STBSP__PUBLICDEF void STB_SPRINTF_DECORATE (set_separators) (char comma, char period); + +#endif // STB_SPRINTF_H_INCLUDE + +#ifdef STB_SPRINTF_IMPLEMENTATION + +#include // for va_arg() + +#define stbsp__uint32 unsigned int +#define stbsp__int32 signed int + +#ifdef _MSC_VER +#define stbsp__uint64 unsigned __int64 +#define stbsp__int64 signed __int64 +#else +#define stbsp__uint64 unsigned long long +#define stbsp__int64 signed long long +#endif +#define stbsp__uint16 unsigned short + +#ifndef stbsp__uintptr +#if defined(__ppc64__) || defined(__aarch64__) || defined(_M_X64) || defined(__x86_64__) || defined(__x86_64) +#define stbsp__uintptr stbsp__uint64 +#else +#define stbsp__uintptr stbsp__uint32 +#endif +#endif + +#ifndef STB_SPRINTF_MSVC_MODE // used for MSVC2013 and earlier (MSVC2015 matches GCC) +#if defined(_MSC_VER) && (_MSC_VER < 1900) +#define STB_SPRINTF_MSVC_MODE +#endif +#endif + +#ifdef STB_SPRINTF_NOUNALIGNED // define this before inclusion to force stbsp_sprintf to always use aligned accesses +#define STBSP__UNALIGNED(code) +#else +#define STBSP__UNALIGNED(code) code +#endif + +#ifndef STB_SPRINTF_NOFLOAT +// internal float utility functions +static stbsp__int32 stbsp__real_to_str (char const** start, stbsp__uint32* len, char* out, stbsp__int32* decimal_pos, double value, stbsp__uint32 frac_digits); +static stbsp__int32 stbsp__real_to_parts (stbsp__int64* bits, stbsp__int32* expo, double value); +#define STBSP__SPECIAL 0x7000 +#endif + +static char stbsp__period = '.'; +static char stbsp__comma = ','; +static char stbsp__digitpair[201] = "00010203040506070809101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899"; + +STBSP__PUBLICDEF void STB_SPRINTF_DECORATE (set_separators) (char pcomma, char pperiod) +{ + stbsp__period = pperiod; + stbsp__comma = pcomma; +} + +STBSP__PUBLICDEF int STB_SPRINTF_DECORATE (vsprintfcb) (STBSP_SPRINTFCB* callback, void* user, char* buf, char const* fmt, va_list va) +{ + static char hex[] = "0123456789abcdefxp"; + static char hexu[] = "0123456789ABCDEFXP"; + char* bf; + char const* f; + int tlen = 0; + + bf = buf; + f = fmt; + for (;;) + { + stbsp__int32 fw, pr, tz; + stbsp__uint32 fl; + +#define STBSP__LEFTJUST 1 +#define STBSP__LEADINGPLUS 2 +#define STBSP__LEADINGSPACE 4 +#define STBSP__LEADING_0X 8 +#define STBSP__LEADINGZERO 16 +#define STBSP__INTMAX 32 +#define STBSP__TRIPLET_COMMA 64 +#define STBSP__NEGATIVE 128 +#define STBSP__METRIC_SUFFIX 256 +#define STBSP__HALFWIDTH 512 + +// macros for the callback buffer stuff +#define stbsp__chk_cb_bufL(bytes) \ + { \ + int len = (int) (bf - buf); \ + if ((len + (bytes)) >= STB_SPRINTF_MIN) \ + { \ + tlen += len; \ + if (0 == (bf = buf = callback (buf, user, len))) \ + goto done; \ + } \ + } +#define stbsp__chk_cb_buf(bytes) \ + { \ + if (callback) \ + { \ + stbsp__chk_cb_bufL (bytes); \ + } \ + } +#define stbsp__flush_cb() \ + { \ + stbsp__chk_cb_bufL (STB_SPRINTF_MIN - 1); \ + } //flush if there is even one byte in the buffer +#define stbsp__cb_buf_clamp(cl, v) \ + cl = v; \ + if (callback) \ + { \ + int lg = STB_SPRINTF_MIN - (int) (bf - buf); \ + if (cl > lg) \ + cl = lg; \ + } + + // fast copy everything up to the next % (or end of string) + for (;;) + { + while (((stbsp__uintptr) f) & 3) + { + schk1: + if (f[0] == '%') + goto scandd; + schk2: + if (f[0] == 0) + goto endfmt; + stbsp__chk_cb_buf (1); + *bf++ = f[0]; + ++f; + } + for (;;) + { + stbsp__uint32 v, c; + v = *(stbsp__uint32*) f; + c = (~v) & 0x80808080; + if ((v - 0x26262626) & c) + goto schk1; + if ((v - 0x01010101) & c) + goto schk2; + if (callback) + if ((STB_SPRINTF_MIN - (int) (bf - buf)) < 4) + goto schk1; + *(stbsp__uint32*) bf = v; + bf += 4; + f += 4; + } + } + scandd: + + ++f; + + // ok, we have a percent, read the modifiers first + fw = 0; + pr = -1; + fl = 0; + tz = 0; + + // flags + for (;;) + { + switch (f[0]) + { + // if we have left justify + case '-': + fl |= STBSP__LEFTJUST; + ++f; + continue; + // if we have leading plus + case '+': + fl |= STBSP__LEADINGPLUS; + ++f; + continue; + // if we have leading space + case ' ': + fl |= STBSP__LEADINGSPACE; + ++f; + continue; + // if we have leading 0x + case '#': + fl |= STBSP__LEADING_0X; + ++f; + continue; + // if we have thousand commas + case '\'': + fl |= STBSP__TRIPLET_COMMA; + ++f; + continue; + // if we have kilo marker + case '$': + fl |= STBSP__METRIC_SUFFIX; + ++f; + continue; + // if we have leading zero + case '0': + fl |= STBSP__LEADINGZERO; + ++f; + goto flags_done; + default: + goto flags_done; + } + } + flags_done: + + // get the field width + if (f[0] == '*') + { + fw = va_arg (va, stbsp__uint32); + ++f; + } + else + { + while ((f[0] >= '0') && (f[0] <= '9')) + { + fw = fw * 10 + f[0] - '0'; + f++; + } + } + // get the precision + if (f[0] == '.') + { + ++f; + if (f[0] == '*') + { + pr = va_arg (va, stbsp__uint32); + ++f; + } + else + { + pr = 0; + while ((f[0] >= '0') && (f[0] <= '9')) + { + pr = pr * 10 + f[0] - '0'; + f++; + } + } + } + + // handle integer size overrides + switch (f[0]) + { + // are we halfwidth? + case 'h': + fl |= STBSP__HALFWIDTH; + ++f; + break; + // are we 64-bit (unix style) + case 'l': + ++f; + if (f[0] == 'l') + { + fl |= STBSP__INTMAX; + ++f; + } + break; + // are we 64-bit on intmax? (c99) + case 'j': + fl |= STBSP__INTMAX; + ++f; + break; + // are we 64-bit on size_t or ptrdiff_t? (c99) + case 'z': + case 't': + fl |= ((sizeof (char*) == 8) ? STBSP__INTMAX : 0); + ++f; + break; + // are we 64-bit (msft style) + case 'I': + if ((f[1] == '6') && (f[2] == '4')) + { + fl |= STBSP__INTMAX; + f += 3; + } + else if ((f[1] == '3') && (f[2] == '2')) + { + f += 3; + } + else + { + fl |= ((sizeof (void*) == 8) ? STBSP__INTMAX : 0); + ++f; + } + break; + default: + break; + } + + // handle each replacement + switch (f[0]) + { +#define STBSP__NUMSZ 512 // big enough for e308 (with commas) or e-307 + char num[STBSP__NUMSZ]; + char lead[8]; + char tail[8]; + char* s; + char const* h; + stbsp__uint32 l, n, cs; + stbsp__uint64 n64; +#ifndef STB_SPRINTF_NOFLOAT + double fv; +#endif + stbsp__int32 dp; + char const* sn; + + case 's': + // get the string + s = va_arg (va, char*); + if (s == 0) + s = (char*) "null"; + // get the length + sn = s; + for (;;) + { + if ((((stbsp__uintptr) sn) & 3) == 0) + break; + lchk: + if (sn[0] == 0) + goto ld; + ++sn; + } + n = 0xffffffff; + if (pr >= 0) + { + n = (stbsp__uint32) (sn - s); + if (n >= (stbsp__uint32) pr) + goto ld; + n = ((stbsp__uint32) (pr - n)) >> 2; + } + while (n) + { + stbsp__uint32 v = *(stbsp__uint32*) sn; + if ((v - 0x01010101) & (~v) & 0x80808080UL) + goto lchk; + sn += 4; + --n; + } + goto lchk; + ld: + + l = (stbsp__uint32) (sn - s); + // clamp to precision + if (l > (stbsp__uint32) pr) + l = pr; + lead[0] = 0; + tail[0] = 0; + pr = 0; + dp = 0; + cs = 0; + // copy the string in + goto scopy; + + case 'c': // char + // get the character + s = num + STBSP__NUMSZ - 1; + *s = (char) va_arg (va, int); + l = 1; + lead[0] = 0; + tail[0] = 0; + pr = 0; + dp = 0; + cs = 0; + goto scopy; + + case 'n': // weird write-bytes specifier + { + int* d = va_arg (va, int*); + *d = tlen + (int) (bf - buf); + } + break; + +#ifdef STB_SPRINTF_NOFLOAT + case 'A': // float + case 'a': // hex float + case 'G': // float + case 'g': // float + case 'E': // float + case 'e': // float + case 'f': // float + va_arg (va, double); // eat it + s = (char*) "No float"; + l = 8; + lead[0] = 0; + tail[0] = 0; + pr = 0; + dp = 0; + cs = 0; + goto scopy; +#else + case 'A': // float + h = hexu; + goto hexfloat; + + case 'a': // hex float + h = hex; + hexfloat: + fv = va_arg (va, double); + if (pr == -1) + pr = 6; // default is 6 + // read the double into a string + if (stbsp__real_to_parts ((stbsp__int64*) &n64, &dp, fv)) + fl |= STBSP__NEGATIVE; + + s = num + 64; + + // sign + lead[0] = 0; + if (fl & STBSP__NEGATIVE) + { + lead[0] = 1; + lead[1] = '-'; + } + else if (fl & STBSP__LEADINGSPACE) + { + lead[0] = 1; + lead[1] = ' '; + } + else if (fl & STBSP__LEADINGPLUS) + { + lead[0] = 1; + lead[1] = '+'; + }; + + if (dp == -1023) + dp = (n64) ? -1022 : 0; + else + n64 |= (((stbsp__uint64) 1) << 52); + n64 <<= (64 - 56); + if (pr < 15) + n64 += ((((stbsp__uint64) 8) << 56) >> (pr * 4)); + // add leading chars + +#ifdef STB_SPRINTF_MSVC_MODE + *s++ = '0'; + *s++ = 'x'; +#else + lead[1 + lead[0]] = '0'; + lead[2 + lead[0]] = 'x'; + lead[0] += 2; +#endif + *s++ = h[(n64 >> 60) & 15]; + n64 <<= 4; + if (pr) + *s++ = stbsp__period; + sn = s; + + // print the bits + n = pr; + if (n > 13) + n = 13; + if (pr > (stbsp__int32) n) + tz = pr - n; + pr = 0; + while (n--) + { + *s++ = h[(n64 >> 60) & 15]; + n64 <<= 4; + } + + // print the expo + tail[1] = h[17]; + if (dp < 0) + { + tail[2] = '-'; + dp = -dp; + } + else + tail[2] = '+'; + n = (dp >= 1000) ? 6 : ((dp >= 100) ? 5 : ((dp >= 10) ? 4 : 3)); + tail[0] = (char) n; + for (;;) + { + tail[n] = '0' + dp % 10; + if (n <= 3) + break; + --n; + dp /= 10; + } + + dp = (int) (s - sn); + l = (int) (s - (num + 64)); + s = num + 64; + cs = 1 + (3 << 24); + goto scopy; + + case 'G': // float + h = hexu; + goto dosmallfloat; + + case 'g': // float + h = hex; + dosmallfloat: + fv = va_arg (va, double); + if (pr == -1) + pr = 6; + else if (pr == 0) + pr = 1; // default is 6 + // read the double into a string + if (stbsp__real_to_str (&sn, &l, num, &dp, fv, (pr - 1) | 0x80000000)) + fl |= STBSP__NEGATIVE; + + // clamp the precision and delete extra zeros after clamp + n = pr; + if (l > (stbsp__uint32) pr) + l = pr; + while ((l > 1) && (pr) && (sn[l - 1] == '0')) + { + --pr; + --l; + } + + // should we use %e + if ((dp <= -4) || (dp > (stbsp__int32) n)) + { + if (pr > (stbsp__int32) l) + pr = l - 1; + else if (pr) + --pr; // when using %e, there is one digit before the decimal + goto doexpfromg; + } + // this is the insane action to get the pr to match %g sematics for %f + if (dp > 0) + { + pr = (dp < (stbsp__int32) l) ? l - dp : 0; + } + else + { + pr = -dp + ((pr > (stbsp__int32) l) ? l : pr); + } + goto dofloatfromg; + + case 'E': // float + h = hexu; + goto doexp; + + case 'e': // float + h = hex; + doexp: + fv = va_arg (va, double); + if (pr == -1) + pr = 6; // default is 6 + // read the double into a string + if (stbsp__real_to_str (&sn, &l, num, &dp, fv, pr | 0x80000000)) + fl |= STBSP__NEGATIVE; + doexpfromg: + tail[0] = 0; + lead[0] = 0; + if (fl & STBSP__NEGATIVE) + { + lead[0] = 1; + lead[1] = '-'; + } + else if (fl & STBSP__LEADINGSPACE) + { + lead[0] = 1; + lead[1] = ' '; + } + else if (fl & STBSP__LEADINGPLUS) + { + lead[0] = 1; + lead[1] = '+'; + }; + if (dp == STBSP__SPECIAL) + { + s = (char*) sn; + cs = 0; + pr = 0; + goto scopy; + } + s = num + 64; + // handle leading chars + *s++ = sn[0]; + + if (pr) + *s++ = stbsp__period; + + // handle after decimal + if ((l - 1) > (stbsp__uint32) pr) + l = pr + 1; + for (n = 1; n < l; n++) + *s++ = sn[n]; + // trailing zeros + tz = pr - (l - 1); + pr = 0; + // dump expo + tail[1] = h[0xe]; + dp -= 1; + if (dp < 0) + { + tail[2] = '-'; + dp = -dp; + } + else + tail[2] = '+'; +#ifdef STB_SPRINTF_MSVC_MODE + n = 5; +#else + n = (dp >= 100) ? 5 : 4; +#endif + tail[0] = (char) n; + for (;;) + { + tail[n] = '0' + dp % 10; + if (n <= 3) + break; + --n; + dp /= 10; + } + cs = 1 + (3 << 24); // how many tens + goto flt_lead; + + case 'f': // float + fv = va_arg (va, double); + doafloat: + // do kilos + if (fl & STBSP__METRIC_SUFFIX) + { + while (fl < 0x4000000) + { + if ((fv < 1024.0) && (fv > -1024.0)) + break; + fv /= 1024.0; + fl += 0x1000000; + } + } + if (pr == -1) + pr = 6; // default is 6 + // read the double into a string + if (stbsp__real_to_str (&sn, &l, num, &dp, fv, pr)) + fl |= STBSP__NEGATIVE; + dofloatfromg: + tail[0] = 0; + // sign + lead[0] = 0; + if (fl & STBSP__NEGATIVE) + { + lead[0] = 1; + lead[1] = '-'; + } + else if (fl & STBSP__LEADINGSPACE) + { + lead[0] = 1; + lead[1] = ' '; + } + else if (fl & STBSP__LEADINGPLUS) + { + lead[0] = 1; + lead[1] = '+'; + }; + if (dp == STBSP__SPECIAL) + { + s = (char*) sn; + cs = 0; + pr = 0; + goto scopy; + } + s = num + 64; + + // handle the three decimal varieties + if (dp <= 0) + { + stbsp__int32 i; + // handle 0.000*000xxxx + *s++ = '0'; + if (pr) + *s++ = stbsp__period; + n = -dp; + if ((stbsp__int32) n > pr) + n = pr; + i = n; + while (i) + { + if ((((stbsp__uintptr) s) & 3) == 0) + break; + *s++ = '0'; + --i; + } + while (i >= 4) + { + *(stbsp__uint32*) s = 0x30303030; + s += 4; + i -= 4; + } + while (i) + { + *s++ = '0'; + --i; + } + if ((stbsp__int32) (l + n) > pr) + l = pr - n; + i = l; + while (i) + { + *s++ = *sn++; + --i; + } + tz = pr - (n + l); + cs = 1 + (3 << 24); // how many tens did we write (for commas below) + } + else + { + cs = (fl & STBSP__TRIPLET_COMMA) ? ((600 - (stbsp__uint32) dp) % 3) : 0; + if ((stbsp__uint32) dp >= l) + { + // handle xxxx000*000.0 + n = 0; + for (;;) + { + if ((fl & STBSP__TRIPLET_COMMA) && (++cs == 4)) + { + cs = 0; + *s++ = stbsp__comma; + } + else + { + *s++ = sn[n]; + ++n; + if (n >= l) + break; + } + } + if (n < (stbsp__uint32) dp) + { + n = dp - n; + if ((fl & STBSP__TRIPLET_COMMA) == 0) + { + while (n) + { + if ((((stbsp__uintptr) s) & 3) == 0) + break; + *s++ = '0'; + --n; + } + while (n >= 4) + { + *(stbsp__uint32*) s = 0x30303030; + s += 4; + n -= 4; + } + } + while (n) + { + if ((fl & STBSP__TRIPLET_COMMA) && (++cs == 4)) + { + cs = 0; + *s++ = stbsp__comma; + } + else + { + *s++ = '0'; + --n; + } + } + } + cs = (int) (s - (num + 64)) + (3 << 24); // cs is how many tens + if (pr) + { + *s++ = stbsp__period; + tz = pr; + } + } + else + { + // handle xxxxx.xxxx000*000 + n = 0; + for (;;) + { + if ((fl & STBSP__TRIPLET_COMMA) && (++cs == 4)) + { + cs = 0; + *s++ = stbsp__comma; + } + else + { + *s++ = sn[n]; + ++n; + if (n >= (stbsp__uint32) dp) + break; + } + } + cs = (int) (s - (num + 64)) + (3 << 24); // cs is how many tens + if (pr) + *s++ = stbsp__period; + if ((l - dp) > (stbsp__uint32) pr) + l = pr + dp; + while (n < l) + { + *s++ = sn[n]; + ++n; + } + tz = pr - (l - dp); + } + } + pr = 0; + + // handle k,m,g,t + if (fl & STBSP__METRIC_SUFFIX) + { + tail[0] = 1; + tail[1] = ' '; + { + if (fl >> 24) + { + tail[2] = "_kmgt"[fl >> 24]; + tail[0] = 2; + } + } + }; + + flt_lead: + // get the length that we copied + l = (stbsp__uint32) (s - (num + 64)); + s = num + 64; + goto scopy; +#endif + + case 'B': // upper binary + h = hexu; + goto binary; + + case 'b': // lower binary + h = hex; + binary: + lead[0] = 0; + if (fl & STBSP__LEADING_0X) + { + lead[0] = 2; + lead[1] = '0'; + lead[2] = h[0xb]; + } + l = (8 << 4) | (1 << 8); + goto radixnum; + + case 'o': // octal + h = hexu; + lead[0] = 0; + if (fl & STBSP__LEADING_0X) + { + lead[0] = 1; + lead[1] = '0'; + } + l = (3 << 4) | (3 << 8); + goto radixnum; + + case 'p': // pointer + fl |= (sizeof (void*) == 8) ? STBSP__INTMAX : 0; + pr = sizeof (void*) * 2; + fl &= ~STBSP__LEADINGZERO; // 'p' only prints the pointer with zeros + // drop through to X + + case 'X': // upper binary + h = hexu; + goto dohexb; + + case 'x': // lower binary + h = hex; + dohexb: + l = (4 << 4) | (4 << 8); + lead[0] = 0; + if (fl & STBSP__LEADING_0X) + { + lead[0] = 2; + lead[1] = '0'; + lead[2] = h[16]; + } + radixnum: + // get the number + if (fl & STBSP__INTMAX) + n64 = va_arg (va, stbsp__uint64); + else + n64 = va_arg (va, stbsp__uint32); + + s = num + STBSP__NUMSZ; + dp = 0; + // clear tail, and clear leading if value is zero + tail[0] = 0; + if (n64 == 0) + { + lead[0] = 0; + if (pr == 0) + { + l = 0; + cs = (((l >> 4) & 15)) << 24; + goto scopy; + } + } + // convert to string + for (;;) + { + *--s = h[n64 & ((1 << (l >> 8)) - 1)]; + n64 >>= (l >> 8); + if (! ((n64) || ((stbsp__int32) ((num + STBSP__NUMSZ) - s) < pr))) + break; + if (fl & STBSP__TRIPLET_COMMA) + { + ++l; + if ((l & 15) == ((l >> 4) & 15)) + { + l &= ~15; + *--s = stbsp__comma; + } + } + }; + // get the tens and the comma pos + cs = (stbsp__uint32) ((num + STBSP__NUMSZ) - s) + ((((l >> 4) & 15)) << 24); + // get the length that we copied + l = (stbsp__uint32) ((num + STBSP__NUMSZ) - s); + // copy it + goto scopy; + + case 'u': // unsigned + case 'i': + case 'd': // integer + // get the integer and abs it + if (fl & STBSP__INTMAX) + { + stbsp__int64 i64 = va_arg (va, stbsp__int64); + n64 = (stbsp__uint64) i64; + if ((f[0] != 'u') && (i64 < 0)) + { + n64 = (stbsp__uint64) -i64; + fl |= STBSP__NEGATIVE; + } + } + else + { + stbsp__int32 i = va_arg (va, stbsp__int32); + n64 = (stbsp__uint32) i; + if ((f[0] != 'u') && (i < 0)) + { + n64 = (stbsp__uint32) -i; + fl |= STBSP__NEGATIVE; + } + } + +#ifndef STB_SPRINTF_NOFLOAT + if (fl & STBSP__METRIC_SUFFIX) + { + if (n64 < 1024) + pr = 0; + else if (pr == -1) + pr = 1; + fv = (double) (stbsp__int64) n64; + goto doafloat; + } +#endif + + // convert to string + s = num + STBSP__NUMSZ; + l = 0; + + for (;;) + { + // do in 32-bit chunks (avoid lots of 64-bit divides even with constant denominators) + char* o = s - 8; + if (n64 >= 100000000) + { + n = (stbsp__uint32) (n64 % 100000000); + n64 /= 100000000; + } + else + { + n = (stbsp__uint32) n64; + n64 = 0; + } + if ((fl & STBSP__TRIPLET_COMMA) == 0) + { + while (n) + { + s -= 2; + *(stbsp__uint16*) s = *(stbsp__uint16*) &stbsp__digitpair[(n % 100) * 2]; + n /= 100; + } + } + while (n) + { + if ((fl & STBSP__TRIPLET_COMMA) && (l++ == 3)) + { + l = 0; + *--s = stbsp__comma; + --o; + } + else + { + *--s = (char) (n % 10) + '0'; + n /= 10; + } + } + if (n64 == 0) + { + if ((s[0] == '0') && (s != (num + STBSP__NUMSZ))) + ++s; + break; + } + while (s != o) + if ((fl & STBSP__TRIPLET_COMMA) && (l++ == 3)) + { + l = 0; + *--s = stbsp__comma; + --o; + } + else + { + *--s = '0'; + } + } + + tail[0] = 0; + // sign + lead[0] = 0; + if (fl & STBSP__NEGATIVE) + { + lead[0] = 1; + lead[1] = '-'; + } + else if (fl & STBSP__LEADINGSPACE) + { + lead[0] = 1; + lead[1] = ' '; + } + else if (fl & STBSP__LEADINGPLUS) + { + lead[0] = 1; + lead[1] = '+'; + }; + + // get the length that we copied + l = (stbsp__uint32) ((num + STBSP__NUMSZ) - s); + if (l == 0) + { + *--s = '0'; + l = 1; + } + cs = l + (3 << 24); + if (pr < 0) + pr = 0; + + scopy: + // get fw=leading/trailing space, pr=leading zeros + if (pr < (stbsp__int32) l) + pr = l; + n = pr + lead[0] + tail[0] + tz; + if (fw < (stbsp__int32) n) + fw = n; + fw -= n; + pr -= l; + + // handle right justify and leading zeros + if ((fl & STBSP__LEFTJUST) == 0) + { + if (fl & STBSP__LEADINGZERO) // if leading zeros, everything is in pr + { + pr = (fw > pr) ? fw : pr; + fw = 0; + } + else + { + fl &= ~STBSP__TRIPLET_COMMA; // if no leading zeros, then no commas + } + } + + // copy the spaces and/or zeros + if (fw + pr) + { + stbsp__int32 i; + stbsp__uint32 c; + + // copy leading spaces (or when doing %8.4d stuff) + if ((fl & STBSP__LEFTJUST) == 0) + while (fw > 0) + { + stbsp__cb_buf_clamp (i, fw); + fw -= i; + while (i) + { + if ((((stbsp__uintptr) bf) & 3) == 0) + break; + *bf++ = ' '; + --i; + } + while (i >= 4) + { + *(stbsp__uint32*) bf = 0x20202020; + bf += 4; + i -= 4; + } + while (i) + { + *bf++ = ' '; + --i; + } + stbsp__chk_cb_buf (1); + } + + // copy leader + sn = lead + 1; + while (lead[0]) + { + stbsp__cb_buf_clamp (i, lead[0]); + lead[0] -= (char) i; + while (i) + { + *bf++ = *sn++; + --i; + } + stbsp__chk_cb_buf (1); + } + + // copy leading zeros + c = cs >> 24; + cs &= 0xffffff; + cs = (fl & STBSP__TRIPLET_COMMA) ? ((stbsp__uint32) (c - ((pr + cs) % (c + 1)))) : 0; + while (pr > 0) + { + stbsp__cb_buf_clamp (i, pr); + pr -= i; + if ((fl & STBSP__TRIPLET_COMMA) == 0) + { + while (i) + { + if ((((stbsp__uintptr) bf) & 3) == 0) + break; + *bf++ = '0'; + --i; + } + while (i >= 4) + { + *(stbsp__uint32*) bf = 0x30303030; + bf += 4; + i -= 4; + } + } + while (i) + { + if ((fl & STBSP__TRIPLET_COMMA) && (cs++ == c)) + { + cs = 0; + *bf++ = stbsp__comma; + } + else + *bf++ = '0'; + --i; + } + stbsp__chk_cb_buf (1); + } + } + + // copy leader if there is still one + sn = lead + 1; + while (lead[0]) + { + stbsp__int32 i; + stbsp__cb_buf_clamp (i, lead[0]); + lead[0] -= (char) i; + while (i) + { + *bf++ = *sn++; + --i; + } + stbsp__chk_cb_buf (1); + } + + // copy the string + n = l; + while (n) + { + stbsp__int32 i; + stbsp__cb_buf_clamp (i, n); + n -= i; + STBSP__UNALIGNED (while (i >= 4) { *(stbsp__uint32*)bf=*(stbsp__uint32*)s; bf+=4; s+=4; i-=4; }) + while (i) + { + *bf++ = *s++; + --i; + } + stbsp__chk_cb_buf (1); + } + + // copy trailing zeros + while (tz) + { + stbsp__int32 i; + stbsp__cb_buf_clamp (i, tz); + tz -= i; + while (i) + { + if ((((stbsp__uintptr) bf) & 3) == 0) + break; + *bf++ = '0'; + --i; + } + while (i >= 4) + { + *(stbsp__uint32*) bf = 0x30303030; + bf += 4; + i -= 4; + } + while (i) + { + *bf++ = '0'; + --i; + } + stbsp__chk_cb_buf (1); + } + + // copy tail if there is one + sn = tail + 1; + while (tail[0]) + { + stbsp__int32 i; + stbsp__cb_buf_clamp (i, tail[0]); + tail[0] -= (char) i; + while (i) + { + *bf++ = *sn++; + --i; + } + stbsp__chk_cb_buf (1); + } + + // handle the left justify + if (fl & STBSP__LEFTJUST) + if (fw > 0) + { + while (fw) + { + stbsp__int32 i; + stbsp__cb_buf_clamp (i, fw); + fw -= i; + while (i) + { + if ((((stbsp__uintptr) bf) & 3) == 0) + break; + *bf++ = ' '; + --i; + } + while (i >= 4) + { + *(stbsp__uint32*) bf = 0x20202020; + bf += 4; + i -= 4; + } + while (i--) + *bf++ = ' '; + stbsp__chk_cb_buf (1); + } + } + break; + + default: // unknown, just copy code + s = num + STBSP__NUMSZ - 1; + *s = f[0]; + l = 1; + fw = pr = fl = 0; + lead[0] = 0; + tail[0] = 0; + pr = 0; + dp = 0; + cs = 0; + goto scopy; + } + ++f; + } +endfmt: + + if (! callback) + *bf = 0; + else + stbsp__flush_cb(); + +done: + return tlen + (int) (bf - buf); +} + +// cleanup +#undef STBSP__LEFTJUST +#undef STBSP__LEADINGPLUS +#undef STBSP__LEADINGSPACE +#undef STBSP__LEADING_0X +#undef STBSP__LEADINGZERO +#undef STBSP__INTMAX +#undef STBSP__TRIPLET_COMMA +#undef STBSP__NEGATIVE +#undef STBSP__METRIC_SUFFIX +#undef STBSP__NUMSZ +#undef stbsp__chk_cb_bufL +#undef stbsp__chk_cb_buf +#undef stbsp__flush_cb +#undef stbsp__cb_buf_clamp + +// ============================================================================ +// wrapper functions + +STBSP__PUBLICDEF int STB_SPRINTF_DECORATE (sprintf) (char* buf, char const* fmt, ...) +{ + va_list va; + va_start (va, fmt); + return STB_SPRINTF_DECORATE (vsprintfcb) (0, 0, buf, fmt, va); +} + +typedef struct stbsp__context +{ + char* buf; + int count; + char tmp[STB_SPRINTF_MIN]; +} stbsp__context; + +static char* stbsp__clamp_callback (char* buf, void* user, int len) +{ + stbsp__context* c = (stbsp__context*) user; + + if (len > c->count) + len = c->count; + + if (len) + { + if (buf != c->buf) + { + char *s, *d, *se; + d = c->buf; + s = buf; + se = buf + len; + do + { + *d++ = *s++; + } while (s < se); + } + c->buf += len; + c->count -= len; + } + + if (c->count <= 0) + return 0; + return (c->count >= STB_SPRINTF_MIN) ? c->buf : c->tmp; // go direct into buffer if you can +} + +STBSP__PUBLICDEF int STB_SPRINTF_DECORATE (vsnprintf) (char* buf, int count, char const* fmt, va_list va) +{ + stbsp__context c; + int l; + + if (count == 0) + return 0; + + c.buf = buf; + c.count = count; + + STB_SPRINTF_DECORATE (vsprintfcb) + (stbsp__clamp_callback, &c, stbsp__clamp_callback (0, &c, 0), fmt, va); + + // zero-terminate + l = (int) (c.buf - buf); + if (l >= count) // should never be greater, only equal (or less) than count + l = count - 1; + buf[l] = 0; + + return l; +} + +STBSP__PUBLICDEF int STB_SPRINTF_DECORATE (snprintf) (char* buf, int count, char const* fmt, ...) +{ + va_list va; + va_start (va, fmt); + + return STB_SPRINTF_DECORATE (vsnprintf) (buf, count, fmt, va); +} + +STBSP__PUBLICDEF int STB_SPRINTF_DECORATE (vsprintf) (char* buf, char const* fmt, va_list va) +{ + return STB_SPRINTF_DECORATE (vsprintfcb) (0, 0, buf, fmt, va); +} + +// ======================================================================= +// low level float utility functions + +#ifndef STB_SPRINTF_NOFLOAT + +// copies d to bits w/ strict aliasing (this compiles to nothing on /Ox) +#define STBSP__COPYFP(dest, src) \ + { \ + int cn; \ + for (cn = 0; cn < 8; cn++) \ + ((char*) &dest)[cn] = ((char*) &src)[cn]; \ + } + +// get float info +static stbsp__int32 stbsp__real_to_parts (stbsp__int64* bits, stbsp__int32* expo, double value) +{ + double d; + stbsp__int64 b = 0; + + // load value and round at the frac_digits + d = value; + + STBSP__COPYFP (b, d); + + *bits = b & ((((stbsp__uint64) 1) << 52) - 1); + *expo = (stbsp__int32) (((b >> 52) & 2047) - 1023); + + return (stbsp__int32) (b >> 63); +} + +static double const stbsp__bot[23] = { 1e+000, 1e+001, 1e+002, 1e+003, 1e+004, 1e+005, 1e+006, 1e+007, 1e+008, 1e+009, 1e+010, 1e+011, 1e+012, 1e+013, 1e+014, 1e+015, 1e+016, 1e+017, 1e+018, 1e+019, 1e+020, 1e+021, 1e+022 }; +static double const stbsp__negbot[22] = { 1e-001, 1e-002, 1e-003, 1e-004, 1e-005, 1e-006, 1e-007, 1e-008, 1e-009, 1e-010, 1e-011, 1e-012, 1e-013, 1e-014, 1e-015, 1e-016, 1e-017, 1e-018, 1e-019, 1e-020, 1e-021, 1e-022 }; +static double const stbsp__negboterr[22] = { -5.551115123125783e-018, -2.0816681711721684e-019, -2.0816681711721686e-020, -4.7921736023859299e-021, -8.1803053914031305e-022, 4.5251888174113741e-023, 4.5251888174113739e-024, -2.0922560830128471e-025, -6.2281591457779853e-026, -3.6432197315497743e-027, 6.0503030718060191e-028, 2.0113352370744385e-029, -3.0373745563400371e-030, 1.1806906454401013e-032, -7.7705399876661076e-032, 2.0902213275965398e-033, -7.1542424054621921e-034, -7.1542424054621926e-035, 2.4754073164739869e-036, 5.4846728545790429e-037, 9.2462547772103625e-038, -4.8596774326570872e-039 }; +static double const stbsp__top[13] = { 1e+023, 1e+046, 1e+069, 1e+092, 1e+115, 1e+138, 1e+161, 1e+184, 1e+207, 1e+230, 1e+253, 1e+276, 1e+299 }; +static double const stbsp__negtop[13] = { 1e-023, 1e-046, 1e-069, 1e-092, 1e-115, 1e-138, 1e-161, 1e-184, 1e-207, 1e-230, 1e-253, 1e-276, 1e-299 }; +static double const stbsp__toperr[13] = { 8388608, 6.8601809640529717e+028, -7.253143638152921e+052, -4.3377296974619174e+075, -1.5559416129466825e+098, -3.2841562489204913e+121, -3.7745893248228135e+144, -1.7356668416969134e+167, -3.8893577551088374e+190, -9.9566444326005119e+213, 6.3641293062232429e+236, -5.2069140800249813e+259, -5.2504760255204387e+282 }; +static double const stbsp__negtoperr[13] = { 3.9565301985100693e-040, -2.299904345391321e-063, 3.6506201437945798e-086, 1.1875228833981544e-109, -5.0644902316928607e-132, -6.7156837247865426e-155, -2.812077463003139e-178, -5.7778912386589953e-201, 7.4997100559334532e-224, -4.6439668915134491e-247, -6.3691100762962136e-270, -9.436808465446358e-293, 8.0970921678014997e-317 }; + +#if defined(_MSC_VER) && (_MSC_VER <= 1200) +static stbsp__uint64 const stbsp__powten[20] = { 1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000, 10000000000, 100000000000, 1000000000000, 10000000000000, 100000000000000, 1000000000000000, 10000000000000000, 100000000000000000, 1000000000000000000, 10000000000000000000U }; +#define stbsp__tento19th ((stbsp__uint64) 1000000000000000000) +#else +static stbsp__uint64 const stbsp__powten[20] = { 1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000, 10000000000ULL, 100000000000ULL, 1000000000000ULL, 10000000000000ULL, 100000000000000ULL, 1000000000000000ULL, 10000000000000000ULL, 100000000000000000ULL, 1000000000000000000ULL, 10000000000000000000ULL }; +#define stbsp__tento19th (1000000000000000000ULL) +#endif + +#define stbsp__ddmulthi(oh, ol, xh, yh) \ + { \ + double ahi = 0, alo, bhi = 0, blo; \ + stbsp__int64 bt; \ + oh = xh * yh; \ + STBSP__COPYFP (bt, xh); \ + bt &= ((~(stbsp__uint64) 0) << 27); \ + STBSP__COPYFP (ahi, bt); \ + alo = xh - ahi; \ + STBSP__COPYFP (bt, yh); \ + bt &= ((~(stbsp__uint64) 0) << 27); \ + STBSP__COPYFP (bhi, bt); \ + blo = yh - bhi; \ + ol = ((ahi * bhi - oh) + ahi * blo + alo * bhi) + alo * blo; \ + } + +#define stbsp__ddtoS64(ob, xh, xl) \ + { \ + double ahi = 0, alo, vh, t; \ + ob = (stbsp__int64) ph; \ + vh = (double) ob; \ + ahi = (xh - vh); \ + t = (ahi - xh); \ + alo = (xh - (ahi - t)) - (vh + t); \ + ob += (stbsp__int64) (ahi + alo + xl); \ + } + +#define stbsp__ddrenorm(oh, ol) \ + { \ + double s; \ + s = oh + ol; \ + ol = ol - (s - oh); \ + oh = s; \ + } + +#define stbsp__ddmultlo(oh, ol, xh, xl, yh, yl) \ + ol = ol + (xh * yl + xl * yh); + +#define stbsp__ddmultlos(oh, ol, xh, yl) \ + ol = ol + (xh * yl); + +static void stbsp__raise_to_power10 (double* ohi, double* olo, double d, stbsp__int32 power) // power can be -323 to +350 +{ + double ph, pl; + if ((power >= 0) && (power <= 22)) + { + stbsp__ddmulthi (ph, pl, d, stbsp__bot[power]); + } + else + { + stbsp__int32 e, et, eb; + double p2h, p2l; + + e = power; + if (power < 0) + e = -e; + et = (e * 0x2c9) >> 14; /* %23 */ + if (et > 13) + et = 13; + eb = e - (et * 23); + + ph = d; + pl = 0.0; + if (power < 0) + { + if (eb) + { + --eb; + stbsp__ddmulthi (ph, pl, d, stbsp__negbot[eb]); + stbsp__ddmultlos (ph, pl, d, stbsp__negboterr[eb]); + } + if (et) + { + stbsp__ddrenorm (ph, pl); + --et; + stbsp__ddmulthi (p2h, p2l, ph, stbsp__negtop[et]); + stbsp__ddmultlo (p2h, p2l, ph, pl, stbsp__negtop[et], stbsp__negtoperr[et]); + ph = p2h; + pl = p2l; + } + } + else + { + if (eb) + { + e = eb; + if (eb > 22) + eb = 22; + e -= eb; + stbsp__ddmulthi (ph, pl, d, stbsp__bot[eb]); + if (e) + { + stbsp__ddrenorm (ph, pl); + stbsp__ddmulthi (p2h, p2l, ph, stbsp__bot[e]); + stbsp__ddmultlos (p2h, p2l, stbsp__bot[e], pl); + ph = p2h; + pl = p2l; + } + } + if (et) + { + stbsp__ddrenorm (ph, pl); + --et; + stbsp__ddmulthi (p2h, p2l, ph, stbsp__top[et]); + stbsp__ddmultlo (p2h, p2l, ph, pl, stbsp__top[et], stbsp__toperr[et]); + ph = p2h; + pl = p2l; + } + } + } + stbsp__ddrenorm (ph, pl); + *ohi = ph; + *olo = pl; +} + +// given a float value, returns the significant bits in bits, and the position of the +// decimal point in decimal_pos. +/-INF and NAN are specified by special values +// returned in the decimal_pos parameter. +// frac_digits is absolute normally, but if you want from first significant digits (got %g and %e), or in 0x80000000 +static stbsp__int32 stbsp__real_to_str (char const** start, stbsp__uint32* len, char* out, stbsp__int32* decimal_pos, double value, stbsp__uint32 frac_digits) +{ + double d; + stbsp__int64 bits = 0; + stbsp__int32 expo, e, ng, tens; + + d = value; + STBSP__COPYFP (bits, d); + expo = (stbsp__int32) ((bits >> 52) & 2047); + ng = (stbsp__int32) (bits >> 63); + if (ng) + d = -d; + + if (expo == 2047) // is nan or inf? + { + *start = (bits & ((((stbsp__uint64) 1) << 52) - 1)) ? "NaN" : "Inf"; + *decimal_pos = STBSP__SPECIAL; + *len = 3; + return ng; + } + + if (expo == 0) // is zero or denormal + { + if ((bits << 1) == 0) // do zero + { + *decimal_pos = 1; + *start = out; + out[0] = '0'; + *len = 1; + return ng; + } + // find the right expo for denormals + { + stbsp__int64 v = ((stbsp__uint64) 1) << 51; + while ((bits & v) == 0) + { + --expo; + v >>= 1; + } + } + } + + // find the decimal exponent as well as the decimal bits of the value + { + double ph, pl; + + // log10 estimate - very specifically tweaked to hit or undershoot by no more than 1 of log10 of all expos 1..2046 + tens = expo - 1023; + tens = (tens < 0) ? ((tens * 617) / 2048) : (((tens * 1233) / 4096) + 1); + + // move the significant bits into position and stick them into an int + stbsp__raise_to_power10 (&ph, &pl, d, 18 - tens); + + // get full as much precision from double-double as possible + stbsp__ddtoS64 (bits, ph, pl); + + // check if we undershot + if (((stbsp__uint64) bits) >= stbsp__tento19th) + ++tens; + } + + // now do the rounding in integer land + frac_digits = (frac_digits & 0x80000000) ? ((frac_digits & 0x7ffffff) + 1) : (tens + frac_digits); + if ((frac_digits < 24)) + { + stbsp__uint32 dg = 1; + if ((stbsp__uint64) bits >= stbsp__powten[9]) + dg = 10; + while ((stbsp__uint64) bits >= stbsp__powten[dg]) + { + ++dg; + if (dg == 20) + goto noround; + } + if (frac_digits < dg) + { + stbsp__uint64 r; + // add 0.5 at the right position and round + e = dg - frac_digits; + if ((stbsp__uint32) e >= 24) + goto noround; + r = stbsp__powten[e]; + bits = bits + (r / 2); + if ((stbsp__uint64) bits >= stbsp__powten[dg]) + ++tens; + bits /= r; + } + noround:; + } + + // kill long trailing runs of zeros + if (bits) + { + stbsp__uint32 n; + for (;;) + { + if (bits <= 0xffffffff) + break; + if (bits % 1000) + goto donez; + bits /= 1000; + } + n = (stbsp__uint32) bits; + while ((n % 1000) == 0) + n /= 1000; + bits = n; + donez:; + } + + // convert to string + out += 64; + e = 0; + for (;;) + { + stbsp__uint32 n; + char* o = out - 8; + // do the conversion in chunks of U32s (avoid most 64-bit divides, worth it, constant denomiators be damned) + if (bits >= 100000000) + { + n = (stbsp__uint32) (bits % 100000000); + bits /= 100000000; + } + else + { + n = (stbsp__uint32) bits; + bits = 0; + } + while (n) + { + out -= 2; + *(stbsp__uint16*) out = *(stbsp__uint16*) &stbsp__digitpair[(n % 100) * 2]; + n /= 100; + e += 2; + } + if (bits == 0) + { + if ((e) && (out[0] == '0')) + { + ++out; + --e; + } + break; + } + while (out != o) + { + *--out = '0'; + ++e; + } + } + + *decimal_pos = tens; + *start = out; + *len = e; + return ng; +} + +#undef stbsp__ddmulthi +#undef stbsp__ddrenorm +#undef stbsp__ddmultlo +#undef stbsp__ddmultlos +#undef STBSP__SPECIAL +#undef STBSP__COPYFP + +#endif // STB_SPRINTF_NOFLOAT + +// clean up +#undef stbsp__uint16 +#undef stbsp__uint32 +#undef stbsp__int32 +#undef stbsp__uint64 +#undef stbsp__int64 +#undef STBSP__UNALIGNED + +#endif // STB_SPRINTF_IMPLEMENTATION diff --git a/src/pch.h b/src/pch.h index e3b0383b..85677e88 100644 --- a/src/pch.h +++ b/src/pch.h @@ -31,6 +31,10 @@ JUCE_BEGIN_IGNORE_WARNINGS_GCC_LIKE ("-Wzero-as-null-pointer-constant", #include JUCE_END_IGNORE_WARNINGS_GCC_LIKE +#if BYOD_BUILDING_JAI_MODULES +#include "jai/SharedJaiContext.h" +#endif + // global definitions using Parameters = std::vector>; using ParamLayout = AudioProcessorValueTreeState::ParameterLayout; diff --git a/src/processors/ProcessorStore.cpp b/src/processors/ProcessorStore.cpp index b956e665..f9cb9558 100644 --- a/src/processors/ProcessorStore.cpp +++ b/src/processors/ProcessorStore.cpp @@ -58,6 +58,7 @@ #include "other/ShimmerReverb.h" #include "other/SmoothReverb.h" #include "other/cry_baby/CryBaby.h" +#include "other/krusher/Krusher.h" #include "other/spring_reverb/SpringReverbProcessor.h" #include "utility/CleanGain.h" @@ -151,6 +152,7 @@ ProcessorStore::StoreMap ProcessorStore::store = { { "Shimmer Reverb", { &processorFactory, { ProcessorType::Other, 1, 1 } } }, { "Smooth Reverb", { &processorFactory, { ProcessorType::Other, 1, 1 } } }, { "Spring Reverb", { &processorFactory, { ProcessorType::Other, 1, 1 } } }, + { "Krusher", { &processorFactory, { ProcessorType::Other, 1, 1 } } }, #if BYOD_ENABLE_ADD_ON_MODULES BYOD_STORE_MAP_ADD_ON_MODULES diff --git a/src/processors/other/krusher/Krusher.cpp b/src/processors/other/krusher/Krusher.cpp new file mode 100644 index 00000000..1915495a --- /dev/null +++ b/src/processors/other/krusher/Krusher.cpp @@ -0,0 +1,126 @@ +#include "Krusher.h" +#include "processors/ParameterHelpers.h" + +Krusher::Krusher (UndoManager* um) + : BaseProcessor ("Krusher", createParameterLayout(), um) +{ + using namespace ParameterHelpers; + loadParameterPointer (sampleRateParam, vts, "sample_rate"); + loadParameterPointer (antialiasParam, vts, "antialias"); + loadParameterPointer (bitDepthParam, vts, "bit_depth"); + loadParameterPointer (brrFilterIndex, vts, "bit_reduction_filter"); + loadParameterPointer (mixParam, vts, "mix"); + + uiOptions.backgroundColour = Colour { Colours::lightpink }; + uiOptions.powerColour = Colour { Colours::red.darker (0.2f) }; + uiOptions.info.description = "A lo-fi effect that reduces the sample rate and bit depth of the signal."; + uiOptions.info.authors = StringArray { "Jatin Chowdhury" }; + + addPopupMenuParameter ("antialias"); +} + +ParamLayout Krusher::createParameterLayout() +{ + using namespace ParameterHelpers; + auto params = createBaseParams(); + + emplace_param (params, + "sample_rate", + "Downsample", + createNormalisableRange (1000.0f, 48000.0f, 8000.0f), + 48000.0f, + &freqValToString, + &stringToFreqVal); + emplace_param (params, "antialias", "Anti-Alias", false); + emplace_param (params, + "bit_depth", + "Bits", + NormalisableRange { 1.0f, 12.0f, 1.0f }, + 12.0f, + &floatValToString, + &stringToFloatVal); + emplace_param (params, + "bit_reduction_filter", + "Smoothq", + StringArray { "Zero-Order", "First-Order", "Second-Order", "Third-Order" }, + 1); + createPercentParameter (params, "mix", "Mix", 1.0f); + + return { params.begin(), params.end() }; +} + +static void setDryWetGain (chowdsp::Gain& dryGain, chowdsp::Gain& wetGain, float mix) +{ + const auto dryGainValue = std::sqrt (1.0f - mix); + const auto wetGainValue = std::sqrt (mix); + + dryGain.setGainLinear (dryGainValue); + wetGain.setGainLinear (wetGainValue); +} + +void Krusher::prepare (double sampleRate, int samplesPerBlock) +{ + hostFs = (float) sampleRate; + + aaFilter.prepare (2); + aiFilter.prepare (2); + + krusher_init_lofi_resample (&resample_state); + + for (auto& state : brFilterStates) + state = {}; + + dcBlocker.prepare (2); + dcBlocker.calcCoefs (20.0f, (float) sampleRate); + + wetGain.setRampDurationSeconds (0.05); + dryGain.setRampDurationSeconds (0.05); + setDryWetGain (dryGain, wetGain, *mixParam); + wetGain.prepare ({ sampleRate, (uint32_t) samplesPerBlock, 2 }); + dryGain.prepare ({ sampleRate, (uint32_t) samplesPerBlock, 2 }); + dryBuffer.setMaxSize (2, samplesPerBlock); +} + +void Krusher::processDownsampler (const chowdsp::BufferView& buffer, float targetFs, bool antialias) noexcept +{ + if (targetFs >= hostFs) + return; + + aaFilter.calcCoefs ((float) targetFs * 0.48f, chowdsp::CoefficientCalculators::butterworthQ, hostFs); + aiFilter.calcCoefs ((float) targetFs * 0.48f, chowdsp::CoefficientCalculators::butterworthQ, hostFs); + + if (antialias) + aaFilter.processBlock (buffer); + + krusher_process_lofi_downsample (jai_context.get(), + &resample_state, + const_cast (buffer.getArrayOfWritePointers()), + buffer.getNumChannels(), + buffer.getNumSamples(), + double (hostFs / targetFs)); + + if (antialias) + aiFilter.processBlock (buffer); +} + +void Krusher::processAudio (AudioBuffer& buffer) +{ + dryBuffer.setCurrentSize (buffer.getNumChannels(), buffer.getNumSamples()); + chowdsp::BufferMath::copyBufferData (buffer, dryBuffer); + + krusher_bit_reduce_process_block (const_cast (buffer.getArrayOfWritePointers()), + buffer.getNumChannels(), + buffer.getNumSamples(), + brrFilterIndex->getIndex(), + (int) *bitDepthParam, + brFilterStates.data()); + + processDownsampler (buffer, *sampleRateParam, antialiasParam->get()); + + dcBlocker.processBlock (buffer); + + setDryWetGain (dryGain, wetGain, *mixParam); + wetGain.process (buffer); + dryGain.process (dryBuffer); + chowdsp::BufferMath::addBufferData (dryBuffer, buffer); +} diff --git a/src/processors/other/krusher/Krusher.h b/src/processors/other/krusher/Krusher.h new file mode 100644 index 00000000..12a1e6ef --- /dev/null +++ b/src/processors/other/krusher/Krusher.h @@ -0,0 +1,54 @@ +#pragma once + +#include "processors/BaseProcessor.h" + +#define KRUSHER_USE_JAI_IMPL ! JUCE_ARM&& BYOD_BUILDING_JAI_MODULES + +#if KRUSHER_USE_JAI_IMPL +#include "jai/byod_jai_lib.h" +#else +#include "krusher_fallback_impl.h" +#endif + +class Krusher : public BaseProcessor +{ +public: + explicit Krusher (UndoManager* um = nullptr); + + ProcessorType getProcessorType() const override { return Other; } + static ParamLayout createParameterLayout(); + + void prepare (double sampleRate, int samplesPerBlock) override; + void processAudio (AudioBuffer& buffer) override; + +private: + void processDownsampler (const chowdsp::BufferView& buffer, float targetFs, bool antialias) noexcept; + + chowdsp::FloatParameter* sampleRateParam = nullptr; + chowdsp::BoolParameter* antialiasParam = nullptr; + chowdsp::FloatParameter* bitDepthParam = nullptr; + chowdsp::ChoiceParameter* brrFilterIndex = nullptr; + chowdsp::FloatParameter* mixParam = nullptr; + + chowdsp::EllipticFilter<8> aaFilter; + chowdsp::EllipticFilter<8> aiFilter; + float hostFs = 48000.0f; + +#if KRUSHER_USE_JAI_IMPL + SharedJaiContext jai_context; + jai_Krusher_Lofi_Resample_State resample_state {}; + std::array brFilterStates {}; +#else + std::unique_ptr jai_context; + Krusher_Lofi_Resample_State resample_state {}; + std::array brFilterStates {}; +#endif + + chowdsp::FirstOrderHPF dcBlocker; + + chowdsp::Gain wetGain; + chowdsp::Gain dryGain; + chowdsp::Buffer dryBuffer; + + JUCE_DECLARE_NON_COPYABLE_WITH_LEAK_DETECTOR (Krusher) +}; diff --git a/src/processors/other/krusher/krusher_fallback_impl.h b/src/processors/other/krusher/krusher_fallback_impl.h new file mode 100644 index 00000000..691496ad --- /dev/null +++ b/src/processors/other/krusher/krusher_fallback_impl.h @@ -0,0 +1,240 @@ +#pragma once + +struct Krusher_Lofi_Resample_State +{ + double upsample_overshoot = 0.0; + double downsample_overshoot = 0.0; +}; + +inline void krusher_init_lofi_resample (Krusher_Lofi_Resample_State* state) +{ + state->upsample_overshoot = 0.0; + state->downsample_overshoot = 0.0; +} + +namespace krusher_detail +{ +inline void process_lofi_resample (float** source_buffer, + float** dest_buffer, + int num_channels, + int num_samples_source, + int num_samples_dest, + double resample_factor, + double& overshoot_samples) +{ + // simple S&H lofi resampler + for (int channel = 0; channel < num_channels; ++channel) + { + const auto* source_data = source_buffer[channel]; + auto* dest_data = dest_buffer[channel]; + + for (int i = 0; i < num_samples_dest; ++i) + { + const auto grab_index = (int) ((double) i * resample_factor + overshoot_samples); + dest_data[i] = source_data[std::min (grab_index, num_samples_source - 1)]; + } + } + + overshoot_samples = (double) num_samples_dest * resample_factor - std::floor ((double) num_samples_dest * resample_factor); +} +} // namespace krusher_detail + +inline void krusher_process_lofi_downsample ([[maybe_unused]] void* ctx, + Krusher_Lofi_Resample_State* state, + float** buffer, + int num_channels, + int num_samples, + double resample_factor) +{ + const auto ds_buffer_size = (int) std::ceil ((double) num_samples / resample_factor); + + auto* temp_data = (float*) alloca (2 * (size_t) ds_buffer_size * sizeof (float)); + float* ds_buffer[2] = { temp_data, temp_data + ds_buffer_size }; + + krusher_detail::process_lofi_resample (buffer, ds_buffer, num_channels, num_samples, ds_buffer_size, resample_factor, state->downsample_overshoot); + krusher_detail::process_lofi_resample (ds_buffer, buffer, num_channels, ds_buffer_size, num_samples, 1.0 / resample_factor, state->upsample_overshoot); +} + +//============================================== +struct Krusher_Bit_Reducer_Filter_State +{ + int32_t p1 {}; + int32_t p2 {}; +}; + +namespace krusher_detail +{ +static constexpr uint16_t BIT_MASKS[] = { + 0, // 0 + 0x0001, // 1 + 0x0003, // 2 + 0x0007, // 3 + 0x000F, // 4 + 0x001F, // 5 + 0x003F, // 6 + 0x007F, // 7 + 0x00FF, // 8 + 0x01FF, // 9 + 0x03FF, // 10 + 0x07FF, // 11 + 0x0FFF, // 12 + 0x1FFF, // 13 + 0x3FFF, // 14 + 0x7FFF, // 15 +}; + +struct Bit_Reduction_Block +{ + uint8_t shift_amount {}; + std::array data {}; +}; + +inline uint16_t encode_sample (uint8_t shift, int bit_depth, int16_t x) +{ + const auto value_unsigned = (uint16_t) (x + (1 << 8)); + return (uint16_t) (value_unsigned >> shift) & BIT_MASKS[bit_depth]; +} + +inline int16_t decode_sample (uint8_t shift, uint16_t x) +{ + return (int16_t) ((uint16_t) (x << shift) - (1 << 8)); +} + +inline void bit_reduce_decode (const Bit_Reduction_Block& br_block, + std::span out, + int filter, + Krusher_Bit_Reducer_Filter_State& state) +{ + uint8_t shift_amount = br_block.shift_amount; + + const auto type1_filter = [&state] (int16_t nibble_2r) + { + const auto y = int32_t (nibble_2r) + ((state.p1 * 15) >> 4); + state.p2 = 0; + state.p1 = y; + return (int16_t) (y >> 4); + }; + + const auto type2_filter = [&state] (int16_t nibble_2r) + { + const auto y = int32_t (nibble_2r) + ((state.p1 * 61) >> 5) - ((state.p2 * 15) >> 4); + state.p2 = state.p1; + state.p1 = y; + return (int16_t) (y >> 5); + }; + + const auto type3_filter = [&state] (int16_t nibble_2r) + { + const auto y = int32_t (nibble_2r) + ((state.p1 * 115) >> 6) - ((state.p2 * 13) >> 4); + state.p2 = state.p1; + state.p1 = y; + return (int16_t) (y >> 6); + }; + + for (size_t i = 0; i < 16; ++i) + { + const auto brr_sample = br_block.data[i]; + + switch (filter) + { + case 0: + out[i] = decode_sample (shift_amount, brr_sample); + break; + + case 1: + out[i] = type1_filter (decode_sample (shift_amount, brr_sample)); + break; + + case 2: + out[i] = type2_filter (decode_sample (shift_amount, brr_sample)); + break; + + case 3: + out[i] = type3_filter (decode_sample (shift_amount, brr_sample)); + break; + + default: + jassertfalse; + break; + } + } +} + +inline Bit_Reduction_Block bit_reduce_encode (std::span PCM_data, int bit_depth) +{ + uint8_t shift_best = 0; + double err_min = std::numeric_limits::max(); + + for (uint8_t s = 0; s < uint8_t (16 - bit_depth + 1); ++s) + { + auto err_sq_accum = 0.0; + for (size_t i = 0; i < 16; ++i) + { + const auto pred = decode_sample (s, encode_sample (s, bit_depth, PCM_data[i])); + const auto err = double (PCM_data[i] - pred); + err_sq_accum += err * err; + } + + if (err_sq_accum < err_min) + { + err_min = err_sq_accum; + shift_best = s; + } + } + + Bit_Reduction_Block brr {}; + brr.shift_amount = shift_best; + for (size_t i = 0; i < 16; ++i) + brr.data[i] = encode_sample (shift_best, bit_depth, PCM_data[i]); + + return brr; +} + +inline void convert_float_to_int (std::span dataFloat, std::span dataInt) +{ + for (size_t i = 0; i < dataFloat.size(); ++i) + dataInt[i] = int16_t (dataFloat[i] * float (1 << 8)); +} + +inline void convert_int_to_float (std::span dataInt, std::span dataFloat) +{ + for (size_t i = 0; i < dataFloat.size(); ++i) + dataFloat[i] = ((float) dataInt[i]) / float (1 << 8); +} +} // namespace krusher_detail + +inline void krusher_bit_reduce_process_block (float** buffer, + int32_t num_channels, + int32_t num_samples, + int32_t filter_index, + int32_t bit_depth, + Krusher_Bit_Reducer_Filter_State* filter_states) +{ + static constexpr size_t small_block_size = 16; + std::array samples_int {}; + + for (int channel = 0; channel < num_channels; ++channel) + { + auto samples_remaining = num_samples; + while (samples_remaining > 0) + { + const auto samples_to_process = std::min (samples_remaining, (int) small_block_size); + + std::span samples_float_span { buffer[channel] + num_samples - samples_remaining, (size_t) samples_to_process }; + std::fill (samples_int.begin(), samples_int.end(), 0); + std::span samples_int_span { samples_int.data(), (size_t) samples_to_process }; + + krusher_detail::convert_float_to_int (samples_float_span, samples_int_span); + + if (bit_depth < 12) + { + const auto br_data = krusher_detail::bit_reduce_encode (samples_int_span, bit_depth); + krusher_detail::bit_reduce_decode (br_data, samples_int_span, filter_index, filter_states[channel]); + } + + krusher_detail::convert_int_to_float (samples_int_span, samples_float_span); + + samples_remaining -= samples_to_process; + } + } +}