diff --git a/.github/workflows/cmake.yml b/.github/workflows/cmake.yml
index 064b6290..95944616 100644
--- a/.github/workflows/cmake.yml
+++ b/.github/workflows/cmake.yml
@@ -54,6 +54,15 @@ jobs:
         path: modules/BYOD-add-ons
         token: ${{ secrets.OUR_GITHUB_PAT }}
 
+    - name: Checkout Jai
+      uses: actions/checkout@v2
+      if: github.actor == 'jatinchowdhury18'
+      with:
+        ref: main
+        repository: Chowdhury-DSP/jai-minimal
+        token: ${{ secrets.OUR_GITHUB_PAT }}
+        path: modules/jai
+
     - name: Configure
       shell: bash
       env:
diff --git a/modules/cmake/FindJaiCompiler.cmake b/modules/cmake/FindJaiCompiler.cmake
new file mode 100644
index 00000000..555728e5
--- /dev/null
+++ b/modules/cmake/FindJaiCompiler.cmake
@@ -0,0 +1,18 @@
+if(NOT (IOS OR LINUX))
+    if(WIN32)
+        set(JAI_COMPILER_EXE "jai.exe")
+    elseif(APPLE)
+        set(JAI_COMPILER_EXE "jai-macos")
+    else()
+        set(JAI_COMPILER_EXE "jai-linux")
+    endif()
+
+    find_program(JAI_COMPILER
+        NAMES ${JAI_COMPILER_EXE}
+        HINTS ${CMAKE_SOURCE_DIR}/modules/jai/bin ${CMAKE_SOURCE_DIR}/../../Research/jai/bin
+    )
+    message(STATUS "Jai compiler: ${JAI_COMPILER}")
+else()
+    message(STATUS "Skipping Jai checks on this platform")
+    set(JAI_COMPILER "JAI_COMPILER-NOTFOUND")
+endif()
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index d12fc365..9c7cb01f 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -3,6 +3,7 @@ if(NOT (IOS OR BUILD_RELEASE))
     add_subdirectory(headless)
 endif()
 
+# main source files
 target_sources(BYOD PRIVATE
     BYOD.cpp
 
@@ -140,6 +141,7 @@ target_sources(BYOD PRIVATE
     processors/other/cry_baby/CryBabyNDK.cpp
     processors/other/spring_reverb/SpringReverb.cpp
     processors/other/spring_reverb/SpringReverbProcessor.cpp
+    processors/other/krusher/Krusher.cpp
 
     processors/utility/CleanGain.cpp
     processors/utility/FreqBandSplitter.cpp
@@ -155,8 +157,15 @@ target_sources(BYOD PRIVATE
     processors/netlist_helpers/NetlistViewer.cpp
 )
 
-target_precompile_headers(BYOD PRIVATE pch.h)
+# Jai files
+include(${CMAKE_SOURCE_DIR}/modules/cmake/FindJaiCompiler.cmake)
+if (NOT(${JAI_COMPILER} STREQUAL "JAI_COMPILER-NOTFOUND"))
+    message(STATUS "Configuring Jai compilation!")
+    add_subdirectory(jai)
+    target_compile_definitions(BYOD PRIVATE BYOD_BUILDING_JAI_MODULES=1)
+endif()
 
+# AVX/SSE files for accelerated neural nets
 make_lib_simd_runtime(rnn_accelerated processors/drive/neural_utils/RNNAccelerated.cpp)
 foreach(target IN ITEMS rnn_accelerated_sse_or_arm rnn_accelerated_avx)
     target_link_libraries(${target} PRIVATE config_flags juce::juce_recommended_lto_flags warning_flags)
@@ -177,7 +186,10 @@ target_compile_definitions(rnn_accelerated_sse_or_arm PRIVATE RTNEURAL_DEFAULT_A
 target_compile_definitions(rnn_accelerated_avx PRIVATE RTNEURAL_DEFAULT_ALIGNMENT=32)
 target_link_libraries(BYOD PRIVATE rnn_accelerated)
 
+# special flags for MSVC
 if (MSVC)
     target_compile_options(BYOD PRIVATE /bigobj)
 endif ()
 
+# pre-compiled header
+target_precompile_headers(BYOD PRIVATE pch.h)
diff --git a/src/jai/.gitignore b/src/jai/.gitignore
new file mode 100644
index 00000000..33a4f18a
--- /dev/null
+++ b/src/jai/.gitignore
@@ -0,0 +1,4 @@
+.build/
+*.lib
+*.a
+*_jai_lib.h
diff --git a/src/jai/CMakeLists.txt b/src/jai/CMakeLists.txt
new file mode 100644
index 00000000..e5f306e4
--- /dev/null
+++ b/src/jai/CMakeLists.txt
@@ -0,0 +1,42 @@
+if(WIN32)
+    set(JAI_LIBRARY_FILE "byod_jai_lib.lib")
+else()
+    set(JAI_LIBRARY_FILE "byod_jai_lib.a")
+endif()
+
+add_custom_command(OUTPUT ${CMAKE_CURRENT_SOURCE_DIR}/${JAI_LIBRARY_FILE}
+    COMMAND ${JAI_COMPILER} build.jai
+    WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
+    DEPENDS build.jai krusher/bit_reduction.jai krusher/lofi_downsampler.jai
+)
+add_custom_target(jai_library_build DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/${JAI_LIBRARY_FILE})
+
+add_library(byod_jai_lib STATIC IMPORTED GLOBAL)
+add_dependencies(byod_jai_lib jai_library_build)
+
+set_target_properties(byod_jai_lib
+    PROPERTIES
+    IMPORTED_LOCATION ${CMAKE_CURRENT_SOURCE_DIR}/${JAI_LIBRARY_FILE}
+)
+
+target_link_libraries(BYOD PRIVATE byod_jai_lib)
+target_sources(BYOD PRIVATE SharedJaiContext.cpp stb_sprintf.cpp)
+target_compile_definitions(BYOD PRIVATE STB_SPRINTF_IMPLEMENTATION=1)
+
+if((CMAKE_CXX_COMPILER_ID STREQUAL "Clang") OR (CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang") OR (CMAKE_CXX_COMPILER_ID STREQUAL "GNU"))
+    string(JOIN " " STB_CXX_FLAGS
+        "-Wno-language-extension-token"
+        "-Wno-zero-as-null-pointer-constant"
+        "-Wno-cast-align"
+        "-Wno-implicit-fallthrough"
+        "-Wno-conditional-uninitialized"
+        "-Wno-duplicate-decl-specifier"
+        "-Wno-unreachable-code"
+    )
+    set_source_files_properties(stb_sprintf.cpp
+        TARGET_DIRECTORY BYOD
+        PROPERTIES COMPILE_FLAGS "${STB_CXX_FLAGS}"
+    )
+endif()
+
+#TODO: remove stb_sprintf once Jai.Basic no longer depends on it!
diff --git a/src/jai/SharedJaiContext.cpp b/src/jai/SharedJaiContext.cpp
new file mode 100644
index 00000000..00df66e7
--- /dev/null
+++ b/src/jai/SharedJaiContext.cpp
@@ -0,0 +1,12 @@
+#include "SharedJaiContext.h"
+#include "byod_jai_lib.h"
+
+JaiContextWrapper::JaiContextWrapper()
+{
+    internal = __jai_runtime_init (0, nullptr);
+}
+
+JaiContextWrapper::~JaiContextWrapper()
+{
+    __jai_runtime_fini (internal);
+}
diff --git a/src/jai/SharedJaiContext.h b/src/jai/SharedJaiContext.h
new file mode 100644
index 00000000..7f4fc089
--- /dev/null
+++ b/src/jai/SharedJaiContext.h
@@ -0,0 +1,17 @@
+#pragma once
+
+#include <juce_core/juce_core.h>
+
+struct jai_Context;
+struct JaiContextWrapper
+{
+    JaiContextWrapper();
+    ~JaiContextWrapper();
+
+    operator jai_Context*() { return internal; }; // NOLINT
+
+private:
+    jai_Context* internal = nullptr;
+};
+
+using SharedJaiContext = juce::SharedResourcePointer<JaiContextWrapper>;
diff --git a/src/jai/build.jai b/src/jai/build.jai
new file mode 100644
index 00000000..2237c7c4
--- /dev/null
+++ b/src/jai/build.jai
@@ -0,0 +1,44 @@
+#import "Basic";
+#import "Compiler";
+#import "generate_c_header";
+
+#run build();
+
+SRC_FILES :: string.[
+    "krusher/lofi_downsampler.jai",
+    "krusher/bit_reduction.jai"
+];
+
+build :: () {
+    header_info : Header_Info;
+    header_info.jai_type_prefix = "jai_";
+
+    w := compiler_create_workspace();
+
+    target_options := get_build_options(w);
+    target_options.output_executable_name = "byod_jai_lib";
+    target_options.output_type = .STATIC_LIBRARY; // specifies output to be a static library
+    target_options.backend = .LLVM;
+    target_options.text_output_flags = 1;
+    set_optimization(*target_options, .OPTIMIZED);
+
+    set_build_options(target_options, w);
+
+    compiler_begin_intercept(w);
+    for file, _ : SRC_FILES {
+        add_build_file(tprint("%/%", #filepath, file), w);
+    }
+    while true {
+        message := compiler_wait_for_message();
+        handle_message(*header_info, message);
+        if message.kind == {
+          case .COMPLETE;
+            break;
+        }
+    }
+    compiler_end_intercept(w);
+
+    generate_header(*header_info, "byod_jai_lib.h");
+
+    set_build_options_dc(.{do_output=false});  // No executable for this workspace.
+}
diff --git a/src/jai/krusher/bit_reduction.jai b/src/jai/krusher/bit_reduction.jai
new file mode 100644
index 00000000..6829d58c
--- /dev/null
+++ b/src/jai/krusher/bit_reduction.jai
@@ -0,0 +1,159 @@
+Krusher_Bit_Reducer_Filter_State :: struct {
+    p1: s32;
+    p2: s32;
+}
+
+#program_export
+krusher_bit_reduce_process_block :: (buffer: **float,
+                                     num_channels: s32,
+                                     num_samples: s32,
+                                     filter_index: s32,
+                                     bit_depth: s32,
+                                     filter_states: *Krusher_Bit_Reducer_Filter_State) #c_call {
+    small_block_size : s32 : 16;
+    samples_int : [small_block_size]s16 = ---;
+
+    for channel : 0..num_channels-1 {
+        samples_remaining : s32 = num_samples;
+        while samples_remaining > 0 {
+            samples_to_process := ifx samples_remaining > small_block_size then small_block_size else samples_remaining;
+            defer { samples_remaining -= samples_to_process; }
+
+            samples_float_span : []float32;
+            samples_float_span.data = buffer[channel] + num_samples - samples_remaining;
+            samples_float_span.count = samples_to_process;
+
+            memset(*samples_int, 0, size_of(s16) * small_block_size);
+            samples_int_span : []s16;
+            samples_int_span.data = samples_int.data;
+            samples_int_span.count = samples_to_process;
+
+            convert_float_to_int(samples_float_span, samples_int_span);
+            
+            if (bit_depth < 12) {
+                br_data := bit_reduce_encode(samples_int_span, bit_depth);
+                bit_reduce_decode(br_data, samples_int_span, cast(BR_Filter) filter_index, *filter_states[channel]);
+            }
+            
+            convert_int_to_float(samples_int_span, samples_float_span);
+        }
+    }
+}
+
+#scope_file
+BIT_MASKS :: u16.[
+    0, // 0
+    0x0001, // 1
+    0x0003, // 2
+    0x0007, // 3
+    0x000F, // 4
+    0x001F, // 5
+    0x003F, // 6
+    0x007F, // 7
+    0x00FF, // 8
+    0x01FF, // 9
+    0x03FF, // 10
+    0x07FF, // 11
+    0x0FFF, // 12
+    0x1FFF, // 13
+    0x3FFF, // 14
+    0x7FFF, // 15
+];
+
+Bit_Reduction_Block :: struct {
+    shift_amount: u8;
+    data: [16] u16;
+}
+
+BR_Filter :: enum {
+    TYPE_0;
+    TYPE_1;
+    TYPE_2;
+    TYPE_3;
+}
+
+encode_sample :: inline (shift: u8, bit_depth: s32, x: s16) -> u16 #no_context {
+    value_unsigned := cast(u16) (x + (1 << 8));
+    return cast(u16) (value_unsigned >> shift) & BIT_MASKS[bit_depth];
+}
+
+decode_sample :: inline (shift: u8, x: u16) -> s16 #no_context {
+    return cast(s16) (cast(u16) (x << shift) - (1 << 8));
+}
+
+bit_reduce_decode :: (using br_block: Bit_Reduction_Block,
+                      out: []s16,
+                      filter: BR_Filter,
+                      state: *Krusher_Bit_Reducer_Filter_State) #no_context {
+
+    type1_filter :: inline (nibble_2r: s16, using state: *Krusher_Bit_Reducer_Filter_State) -> s16 #no_context {
+        y := cast(s32) nibble_2r + ((p1 * 15) >> 4);
+        p2 = 0;
+        p1 = y;
+        return cast(s16) (y >> 4);
+    }
+
+    type2_filter :: inline (nibble_2r: s16, using state: *Krusher_Bit_Reducer_Filter_State) -> s16 #no_context {
+        y := cast(s32) nibble_2r + ((p1 * 61) >> 5) -  ((p2 * 15) >> 4);
+        p2 = p1;
+        p1 = y;
+        return cast(s16) (y >> 5);
+    }
+
+    type3_filter :: inline (nibble_2r: s16, using state: *Krusher_Bit_Reducer_Filter_State) -> s16 #no_context {
+        y := cast(s32) nibble_2r + ((p1 * 115) >> 6) -  ((p2 * 13) >> 4);
+        p2 = p1;
+        p1 = y;
+        return cast(s16) (y >> 6);
+    }
+
+    for br_sample, i : data {
+        if #complete filter == {
+            case .TYPE_0; out[i] = decode_sample(shift_amount, br_sample);
+            case .TYPE_1; out[i] = type1_filter (decode_sample(shift_amount, br_sample), state);
+            case .TYPE_2; out[i] = type2_filter (decode_sample(shift_amount, br_sample), state);
+            case .TYPE_3; out[i] = type3_filter (decode_sample(shift_amount, br_sample), state);
+        }
+    }
+}
+
+bit_reduce_encode :: (pcm_data: []s16, bit_depth: s32) -> Bit_Reduction_Block #no_context {
+    shift_best : u8 = 0;
+    err_min : float64 = Math.FLOAT64_MAX;
+
+    for s : cast(u8) 0.. cast(u8) (16 - bit_depth) {
+        err_square_accum : float64 = 0.0;
+        for pcm_sample, _ : pcm_data {
+            pred := decode_sample(s, encode_sample(s, bit_depth, pcm_sample));
+            err := cast(float64) (pcm_sample - pred);
+            err_square_accum += err * err;
+        }
+
+        if err_square_accum < err_min {
+            err_min = err_square_accum;
+            shift_best = s;
+        }
+    }
+
+    using br_block : Bit_Reduction_Block = ---;
+    shift_amount = shift_best;
+    for pcm_sample, i : pcm_data {
+        data[i] = encode_sample(shift_best, bit_depth, pcm_sample);
+    }
+
+    return br_block;
+}
+
+convert_float_to_int :: (data_float: [] float, data_int: [] s16) #no_context {
+    for float_sample, i : data_float {
+        data_int[i] = cast(s16) (float_sample * cast(float32) (1 << 8));
+    }
+}
+
+convert_int_to_float :: (data_int: [] s16, data_float: [] float) #no_context {
+    for _, i : data_float {
+        data_float[i] = cast(float32) data_int[i] / cast(float32) (1 << 8);
+    }
+}
+
+Math :: #import "Math";
\ No newline at end of file
diff --git a/src/jai/krusher/lofi_downsampler.jai b/src/jai/krusher/lofi_downsampler.jai
new file mode 100644
index 00000000..9cc52bc7
--- /dev/null
+++ b/src/jai/krusher/lofi_downsampler.jai
@@ -0,0 +1,65 @@
+Krusher_Lofi_Resample_State :: struct {
+    upsample_overshoot: float64;
+    downsample_overshoot: float64;
+}
+
+#program_export
+krusher_init_lofi_resample :: (using state: *Krusher_Lofi_Resample_State) #c_call {
+    upsample_overshoot = 0.0;
+    downsample_overshoot = 0.0;
+}
+
+#program_export
+krusher_process_lofi_downsample :: (ctx: *Context,
+                                    using state: *Krusher_Lofi_Resample_State,
+                                    buffer: **float32,
+                                    num_channels: s32,
+                                    num_samples: s32,
+                                    resample_factor: float64) #c_call
+{
+
+    push_context ctx {
+        ds_buffer_size := cast(s32) Math.ceil(cast(float64) num_samples / resample_factor);
+
+        // allocating memory here, but it's real-time safe since we're using Temporary_Storage
+        temp_data : [..] float32;
+        temp_data.allocator = temp;
+        array_resize(*temp_data, 2 * ds_buffer_size, true);
+
+        // Eventually we should figure out a cleaner way to have a kind of "audio buffer view"
+        ds_buffer : [2] *float32;
+        ds_buffer[0] = temp_data.data;
+        ds_buffer[1] = temp_data.data + ds_buffer_size;
+
+        krusher_process_lofi_resample(buffer, ds_buffer.data, num_channels, num_samples, ds_buffer_size, resample_factor, *downsample_overshoot);
+        krusher_process_lofi_resample(ds_buffer.data, buffer, num_channels, ds_buffer_size, num_samples, 1.0 / resample_factor, *upsample_overshoot);
+
+        reset_temporary_storage();
+    }
+}
+
+#scope_file
+krusher_process_lofi_resample :: (source_buffer: **float32,
+                                  dest_buffer: **float32,
+                                  num_channels: s32,
+                                  num_samples_source: s32,
+                                  num_samples_dest: s32,
+                                  resample_factor: float64,
+                                  overshoot_samples: *float64)
+{
+    // simple S&H lofi resampler
+    for channel: 0..num_channels-1 {
+        source_data := source_buffer[channel];
+        dest_data := dest_buffer[channel];
+
+        for i: 0..num_samples_dest-1 {
+            grab_index := cast(s32) (cast(float64) i * resample_factor + (<<overshoot_samples));
+            dest_data[i] = source_data[min(grab_index, num_samples_source - 1)];
+        }
+    }
+
+    <<overshoot_samples = cast(float64) num_samples_dest * resample_factor - Math.floor(cast(float64) num_samples_dest * resample_factor);
+}
+
+#import "Basic";
+Math :: #import "Math";
diff --git a/src/jai/stb_sprintf.cpp b/src/jai/stb_sprintf.cpp
new file mode 100644
index 00000000..9e6d1f3e
--- /dev/null
+++ b/src/jai/stb_sprintf.cpp
@@ -0,0 +1,1866 @@
+// stb_sprintf - v1.01 - public domain snprintf() implementation
+// originally by Jeff Roberts / RAD Game Tools, 2015/10/20
+// http://github.com/nothings/stb
+//
+// allowed types:  sc uidBboXx p AaGgEef n
+// lengths      :  h ll j z t I64 I32 I
+
+#ifndef STB_SPRINTF_H_INCLUDE
+#define STB_SPRINTF_H_INCLUDE
+
+/*
+Single file sprintf replacement.
+
+Originally written by Jeff Roberts at RAD Game Tools - 2015/10/20.
+Hereby placed in public domain.
+
+This is a full sprintf replacement that supports everything that
+the C runtime sprintfs support, including float/double, 64-bit integers,
+hex floats, field parameters (%*.*d stuff), length reads backs, etc.
+
+Why would you need this if sprintf already exists?  Well, first off,
+it's *much* faster (see below). It's also much smaller than the CRT
+versions code-space-wise. We've also added some simple improvements
+that are super handy (commas in thousands, callbacks at buffer full,
+for example). Finally, the format strings for MSVC and GCC differ
+for 64-bit integers (among other small things), so this lets you use
+the same format strings in cross platform code.
+
+It uses the standard single file trick of being both the header file
+and the source itself. If you just include it normally, you just get
+the header file function definitions. To get the code, you include
+it from a C or C++ file and define STB_SPRINTF_IMPLEMENTATION first.
+
+It only uses va_args macros from the C runtime to do it's work. It
+does cast doubles to S64s and shifts and divides U64s, which does
+drag in CRT code on most platforms.
+
+It compiles to roughly 8K with float support, and 4K without.
+As a comparison, when using MSVC static libs, calling sprintf drags
+in 16K.
+
+API:
+====
+int stbsp_sprintf( char * buf, char const * fmt, ... )
+int stbsp_snprintf( char * buf, int count, char const * fmt, ... )
+  Convert an arg list into a buffer.  stbsp_snprintf always returns
+  a zero-terminated string (unlike regular snprintf).
+
+int stbsp_vsprintf( char * buf, char const * fmt, va_list va )
+int stbsp_vsnprintf( char * buf, int count, char const * fmt, va_list va )
+  Convert a va_list arg list into a buffer.  stbsp_vsnprintf always returns
+  a zero-terminated string (unlike regular snprintf).
+
+int stbsp_vsprintfcb( STBSP_SPRINTFCB * callback, void * user, char * buf, char const * fmt, va_list va )
+    typedef char * STBSP_SPRINTFCB( char const * buf, void * user, int len );
+  Convert into a buffer, calling back every STB_SPRINTF_MIN chars.
+  Your callback can then copy the chars out, print them or whatever.
+  This function is actually the workhorse for everything else.
+  The buffer you pass in must hold at least STB_SPRINTF_MIN characters.
+    // you return the next buffer to use or 0 to stop converting
+
+void stbsp_set_separators( char comma, char period )
+  Set the comma and period characters to use.
+
+FLOATS/DOUBLES:
+===============
+This code uses a internal float->ascii conversion method that uses
+doubles with error correction (double-doubles, for ~105 bits of
+precision).  This conversion is round-trip perfect - that is, an atof
+of the values output here will give you the bit-exact double back.
+
+One difference is that our insignificant digits will be different than
+with MSVC or GCC (but they don't match each other either).  We also
+don't attempt to find the minimum length matching float (pre-MSVC15
+doesn't either).
+
+If you don't need float or doubles at all, define STB_SPRINTF_NOFLOAT
+and you'll save 4K of code space.
+
+64-BIT INTS:
+============
+This library also supports 64-bit integers and you can use MSVC style or
+GCC style indicators (%I64d or %lld).  It supports the C99 specifiers
+for size_t and ptr_diff_t (%jd %zd) as well.
+
+EXTRAS:
+=======
+Like some GCCs, for integers and floats, you can use a ' (single quote)
+specifier and commas will be inserted on the thousands: "%'d" on 12345
+would print 12,345.
+
+For integers and floats, you can use a "$" specifier and the number
+will be converted to float and then divided to get kilo, mega, giga or
+tera and then printed, so "%$d" 1024 is "1.0 k", "%$.2d" 2536000 is
+"2.42 m", etc.
+
+In addition to octal and hexadecimal conversions, you can print
+integers in binary: "%b" for 256 would print 100.
+
+PERFORMANCE vs MSVC 2008 32-/64-bit (GCC is even slower than MSVC):
+===================================================================
+"%d" across all 32-bit ints (4.8x/4.0x faster than 32-/64-bit MSVC)
+"%24d" across all 32-bit ints (4.5x/4.2x faster)
+"%x" across all 32-bit ints (4.5x/3.8x faster)
+"%08x" across all 32-bit ints (4.3x/3.8x faster)
+"%f" across e-10 to e+10 floats (7.3x/6.0x faster)
+"%e" across e-10 to e+10 floats (8.1x/6.0x faster)
+"%g" across e-10 to e+10 floats (10.0x/7.1x faster)
+"%f" for values near e-300 (7.9x/6.5x faster)
+"%f" for values near e+300 (10.0x/9.1x faster)
+"%e" for values near e-300 (10.1x/7.0x faster)
+"%e" for values near e+300 (9.2x/6.0x faster)
+"%.320f" for values near e-300 (12.6x/11.2x faster)
+"%a" for random values (8.6x/4.3x faster)
+"%I64d" for 64-bits with 32-bit values (4.8x/3.4x faster)
+"%I64d" for 64-bits > 32-bit values (4.9x/5.5x faster)
+"%s%s%s" for 64 char strings (7.1x/7.3x faster)
+"...512 char string..." ( 35.0x/32.5x faster!)
+*/
+
+/*
+#ifdef STB_SPRINTF_STATIC
+#define STBSP__PUBLICDEC static
+#define STBSP__PUBLICDEF static
+#else
+#ifdef __cplusplus
+#define STBSP__PUBLICDEC extern "C"
+#define STBSP__PUBLICDEF extern "C"
+#else
+#define STBSP__PUBLICDEC extern
+#define STBSP__PUBLICDEF
+#endif
+#endif
+*/
+
+#ifdef WIN32
+#define STBSP__PUBLICDEF extern "C" __declspec(dllexport) extern
+#else
+#define STBSP__PUBLICDEF extern "C"
+#endif
+
+#include <stdarg.h> // for va_list()
+
+#ifndef STB_SPRINTF_MIN
+#define STB_SPRINTF_MIN 512 // how many characters per callback
+#endif
+typedef char* STBSP_SPRINTFCB (char* buf, void* user, int len);
+
+#ifndef STB_SPRINTF_DECORATE
+#define STB_SPRINTF_DECORATE(name) stbsp_##name // define this before including if you want to change the names
+#endif
+
+STBSP__PUBLICDEF int STB_SPRINTF_DECORATE (vsprintf) (char* buf, char const* fmt, va_list va);
+STBSP__PUBLICDEF int STB_SPRINTF_DECORATE (vsnprintf) (char* buf, int count, char const* fmt, va_list va);
+STBSP__PUBLICDEF int STB_SPRINTF_DECORATE (sprintf) (char* buf, char const* fmt, ...);
+STBSP__PUBLICDEF int STB_SPRINTF_DECORATE (snprintf) (char* buf, int count, char const* fmt, ...);
+
+STBSP__PUBLICDEF int STB_SPRINTF_DECORATE (vsprintfcb) (STBSP_SPRINTFCB* callback, void* user, char* buf, char const* fmt, va_list va);
+STBSP__PUBLICDEF void STB_SPRINTF_DECORATE (set_separators) (char comma, char period);
+
+#endif // STB_SPRINTF_H_INCLUDE
+
+#ifdef STB_SPRINTF_IMPLEMENTATION
+
+#include <stdlib.h> // for va_arg()
+
+#define stbsp__uint32 unsigned int
+#define stbsp__int32 signed int
+
+#ifdef _MSC_VER
+#define stbsp__uint64 unsigned __int64
+#define stbsp__int64 signed __int64
+#else
+#define stbsp__uint64 unsigned long long
+#define stbsp__int64 signed long long
+#endif
+#define stbsp__uint16 unsigned short
+
+#ifndef stbsp__uintptr
+#if defined(__ppc64__) || defined(__aarch64__) || defined(_M_X64) || defined(__x86_64__) || defined(__x86_64)
+#define stbsp__uintptr stbsp__uint64
+#else
+#define stbsp__uintptr stbsp__uint32
+#endif
+#endif
+
+#ifndef STB_SPRINTF_MSVC_MODE // used for MSVC2013 and earlier (MSVC2015 matches GCC)
+#if defined(_MSC_VER) && (_MSC_VER < 1900)
+#define STB_SPRINTF_MSVC_MODE
+#endif
+#endif
+
+#ifdef STB_SPRINTF_NOUNALIGNED // define this before inclusion to force stbsp_sprintf to always use aligned accesses
+#define STBSP__UNALIGNED(code)
+#else
+#define STBSP__UNALIGNED(code) code
+#endif
+
+#ifndef STB_SPRINTF_NOFLOAT
+// internal float utility functions
+static stbsp__int32 stbsp__real_to_str (char const** start, stbsp__uint32* len, char* out, stbsp__int32* decimal_pos, double value, stbsp__uint32 frac_digits);
+static stbsp__int32 stbsp__real_to_parts (stbsp__int64* bits, stbsp__int32* expo, double value);
+#define STBSP__SPECIAL 0x7000
+#endif
+
+static char stbsp__period = '.';
+static char stbsp__comma = ',';
+static char stbsp__digitpair[201] = "00010203040506070809101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899";
+
+STBSP__PUBLICDEF void STB_SPRINTF_DECORATE (set_separators) (char pcomma, char pperiod)
+{
+    stbsp__period = pperiod;
+    stbsp__comma = pcomma;
+}
+
+STBSP__PUBLICDEF int STB_SPRINTF_DECORATE (vsprintfcb) (STBSP_SPRINTFCB* callback, void* user, char* buf, char const* fmt, va_list va)
+{
+    static char hex[] = "0123456789abcdefxp";
+    static char hexu[] = "0123456789ABCDEFXP";
+    char* bf;
+    char const* f;
+    int tlen = 0;
+
+    bf = buf;
+    f = fmt;
+    for (;;)
+    {
+        stbsp__int32 fw, pr, tz;
+        stbsp__uint32 fl;
+
+#define STBSP__LEFTJUST 1
+#define STBSP__LEADINGPLUS 2
+#define STBSP__LEADINGSPACE 4
+#define STBSP__LEADING_0X 8
+#define STBSP__LEADINGZERO 16
+#define STBSP__INTMAX 32
+#define STBSP__TRIPLET_COMMA 64
+#define STBSP__NEGATIVE 128
+#define STBSP__METRIC_SUFFIX 256
+#define STBSP__HALFWIDTH 512
+
+// macros for the callback buffer stuff
+#define stbsp__chk_cb_bufL(bytes)                            \
+    {                                                        \
+        int len = (int) (bf - buf);                          \
+        if ((len + (bytes)) >= STB_SPRINTF_MIN)              \
+        {                                                    \
+            tlen += len;                                     \
+            if (0 == (bf = buf = callback (buf, user, len))) \
+                goto done;                                   \
+        }                                                    \
+    }
+#define stbsp__chk_cb_buf(bytes)        \
+    {                                   \
+        if (callback)                   \
+        {                               \
+            stbsp__chk_cb_bufL (bytes); \
+        }                               \
+    }
+#define stbsp__flush_cb()                         \
+    {                                             \
+        stbsp__chk_cb_bufL (STB_SPRINTF_MIN - 1); \
+    } //flush if there is even one byte in the buffer
+#define stbsp__cb_buf_clamp(cl, v)                   \
+    cl = v;                                          \
+    if (callback)                                    \
+    {                                                \
+        int lg = STB_SPRINTF_MIN - (int) (bf - buf); \
+        if (cl > lg)                                 \
+            cl = lg;                                 \
+    }
+
+        // fast copy everything up to the next % (or end of string)
+        for (;;)
+        {
+            while (((stbsp__uintptr) f) & 3)
+            {
+            schk1:
+                if (f[0] == '%')
+                    goto scandd;
+            schk2:
+                if (f[0] == 0)
+                    goto endfmt;
+                stbsp__chk_cb_buf (1);
+                *bf++ = f[0];
+                ++f;
+            }
+            for (;;)
+            {
+                stbsp__uint32 v, c;
+                v = *(stbsp__uint32*) f;
+                c = (~v) & 0x80808080;
+                if ((v - 0x26262626) & c)
+                    goto schk1;
+                if ((v - 0x01010101) & c)
+                    goto schk2;
+                if (callback)
+                    if ((STB_SPRINTF_MIN - (int) (bf - buf)) < 4)
+                        goto schk1;
+                *(stbsp__uint32*) bf = v;
+                bf += 4;
+                f += 4;
+            }
+        }
+    scandd:
+
+        ++f;
+
+        // ok, we have a percent, read the modifiers first
+        fw = 0;
+        pr = -1;
+        fl = 0;
+        tz = 0;
+
+        // flags
+        for (;;)
+        {
+            switch (f[0])
+            {
+                // if we have left justify
+                case '-':
+                    fl |= STBSP__LEFTJUST;
+                    ++f;
+                    continue;
+                // if we have leading plus
+                case '+':
+                    fl |= STBSP__LEADINGPLUS;
+                    ++f;
+                    continue;
+                // if we have leading space
+                case ' ':
+                    fl |= STBSP__LEADINGSPACE;
+                    ++f;
+                    continue;
+                // if we have leading 0x
+                case '#':
+                    fl |= STBSP__LEADING_0X;
+                    ++f;
+                    continue;
+                // if we have thousand commas
+                case '\'':
+                    fl |= STBSP__TRIPLET_COMMA;
+                    ++f;
+                    continue;
+                // if we have kilo marker
+                case '$':
+                    fl |= STBSP__METRIC_SUFFIX;
+                    ++f;
+                    continue;
+                // if we have leading zero
+                case '0':
+                    fl |= STBSP__LEADINGZERO;
+                    ++f;
+                    goto flags_done;
+                default:
+                    goto flags_done;
+            }
+        }
+    flags_done:
+
+        // get the field width
+        if (f[0] == '*')
+        {
+            fw = va_arg (va, stbsp__uint32);
+            ++f;
+        }
+        else
+        {
+            while ((f[0] >= '0') && (f[0] <= '9'))
+            {
+                fw = fw * 10 + f[0] - '0';
+                f++;
+            }
+        }
+        // get the precision
+        if (f[0] == '.')
+        {
+            ++f;
+            if (f[0] == '*')
+            {
+                pr = va_arg (va, stbsp__uint32);
+                ++f;
+            }
+            else
+            {
+                pr = 0;
+                while ((f[0] >= '0') && (f[0] <= '9'))
+                {
+                    pr = pr * 10 + f[0] - '0';
+                    f++;
+                }
+            }
+        }
+
+        // handle integer size overrides
+        switch (f[0])
+        {
+            // are we halfwidth?
+            case 'h':
+                fl |= STBSP__HALFWIDTH;
+                ++f;
+                break;
+            // are we 64-bit (unix style)
+            case 'l':
+                ++f;
+                if (f[0] == 'l')
+                {
+                    fl |= STBSP__INTMAX;
+                    ++f;
+                }
+                break;
+            // are we 64-bit on intmax? (c99)
+            case 'j':
+                fl |= STBSP__INTMAX;
+                ++f;
+                break;
+            // are we 64-bit on size_t or ptrdiff_t? (c99)
+            case 'z':
+            case 't':
+                fl |= ((sizeof (char*) == 8) ? STBSP__INTMAX : 0);
+                ++f;
+                break;
+            // are we 64-bit (msft style)
+            case 'I':
+                if ((f[1] == '6') && (f[2] == '4'))
+                {
+                    fl |= STBSP__INTMAX;
+                    f += 3;
+                }
+                else if ((f[1] == '3') && (f[2] == '2'))
+                {
+                    f += 3;
+                }
+                else
+                {
+                    fl |= ((sizeof (void*) == 8) ? STBSP__INTMAX : 0);
+                    ++f;
+                }
+                break;
+            default:
+                break;
+        }
+
+        // handle each replacement
+        switch (f[0])
+        {
+#define STBSP__NUMSZ 512 // big enough for e308 (with commas) or e-307
+            char num[STBSP__NUMSZ];
+            char lead[8];
+            char tail[8];
+            char* s;
+            char const* h;
+            stbsp__uint32 l, n, cs;
+            stbsp__uint64 n64;
+#ifndef STB_SPRINTF_NOFLOAT
+            double fv;
+#endif
+            stbsp__int32 dp;
+            char const* sn;
+
+            case 's':
+                // get the string
+                s = va_arg (va, char*);
+                if (s == 0)
+                    s = (char*) "null";
+                // get the length
+                sn = s;
+                for (;;)
+                {
+                    if ((((stbsp__uintptr) sn) & 3) == 0)
+                        break;
+                lchk:
+                    if (sn[0] == 0)
+                        goto ld;
+                    ++sn;
+                }
+                n = 0xffffffff;
+                if (pr >= 0)
+                {
+                    n = (stbsp__uint32) (sn - s);
+                    if (n >= (stbsp__uint32) pr)
+                        goto ld;
+                    n = ((stbsp__uint32) (pr - n)) >> 2;
+                }
+                while (n)
+                {
+                    stbsp__uint32 v = *(stbsp__uint32*) sn;
+                    if ((v - 0x01010101) & (~v) & 0x80808080UL)
+                        goto lchk;
+                    sn += 4;
+                    --n;
+                }
+                goto lchk;
+            ld:
+
+                l = (stbsp__uint32) (sn - s);
+                // clamp to precision
+                if (l > (stbsp__uint32) pr)
+                    l = pr;
+                lead[0] = 0;
+                tail[0] = 0;
+                pr = 0;
+                dp = 0;
+                cs = 0;
+                // copy the string in
+                goto scopy;
+
+            case 'c': // char
+                // get the character
+                s = num + STBSP__NUMSZ - 1;
+                *s = (char) va_arg (va, int);
+                l = 1;
+                lead[0] = 0;
+                tail[0] = 0;
+                pr = 0;
+                dp = 0;
+                cs = 0;
+                goto scopy;
+
+            case 'n': // weird write-bytes specifier
+            {
+                int* d = va_arg (va, int*);
+                *d = tlen + (int) (bf - buf);
+            }
+            break;
+
+#ifdef STB_SPRINTF_NOFLOAT
+            case 'A': // float
+            case 'a': // hex float
+            case 'G': // float
+            case 'g': // float
+            case 'E': // float
+            case 'e': // float
+            case 'f': // float
+                va_arg (va, double); // eat it
+                s = (char*) "No float";
+                l = 8;
+                lead[0] = 0;
+                tail[0] = 0;
+                pr = 0;
+                dp = 0;
+                cs = 0;
+                goto scopy;
+#else
+            case 'A': // float
+                h = hexu;
+                goto hexfloat;
+
+            case 'a': // hex float
+                h = hex;
+            hexfloat:
+                fv = va_arg (va, double);
+                if (pr == -1)
+                    pr = 6; // default is 6
+                // read the double into a string
+                if (stbsp__real_to_parts ((stbsp__int64*) &n64, &dp, fv))
+                    fl |= STBSP__NEGATIVE;
+
+                s = num + 64;
+
+                // sign
+                lead[0] = 0;
+                if (fl & STBSP__NEGATIVE)
+                {
+                    lead[0] = 1;
+                    lead[1] = '-';
+                }
+                else if (fl & STBSP__LEADINGSPACE)
+                {
+                    lead[0] = 1;
+                    lead[1] = ' ';
+                }
+                else if (fl & STBSP__LEADINGPLUS)
+                {
+                    lead[0] = 1;
+                    lead[1] = '+';
+                };
+
+                if (dp == -1023)
+                    dp = (n64) ? -1022 : 0;
+                else
+                    n64 |= (((stbsp__uint64) 1) << 52);
+                n64 <<= (64 - 56);
+                if (pr < 15)
+                    n64 += ((((stbsp__uint64) 8) << 56) >> (pr * 4));
+                    // add leading chars
+
+#ifdef STB_SPRINTF_MSVC_MODE
+                *s++ = '0';
+                *s++ = 'x';
+#else
+                lead[1 + lead[0]] = '0';
+                lead[2 + lead[0]] = 'x';
+                lead[0] += 2;
+#endif
+                *s++ = h[(n64 >> 60) & 15];
+                n64 <<= 4;
+                if (pr)
+                    *s++ = stbsp__period;
+                sn = s;
+
+                // print the bits
+                n = pr;
+                if (n > 13)
+                    n = 13;
+                if (pr > (stbsp__int32) n)
+                    tz = pr - n;
+                pr = 0;
+                while (n--)
+                {
+                    *s++ = h[(n64 >> 60) & 15];
+                    n64 <<= 4;
+                }
+
+                // print the expo
+                tail[1] = h[17];
+                if (dp < 0)
+                {
+                    tail[2] = '-';
+                    dp = -dp;
+                }
+                else
+                    tail[2] = '+';
+                n = (dp >= 1000) ? 6 : ((dp >= 100) ? 5 : ((dp >= 10) ? 4 : 3));
+                tail[0] = (char) n;
+                for (;;)
+                {
+                    tail[n] = '0' + dp % 10;
+                    if (n <= 3)
+                        break;
+                    --n;
+                    dp /= 10;
+                }
+
+                dp = (int) (s - sn);
+                l = (int) (s - (num + 64));
+                s = num + 64;
+                cs = 1 + (3 << 24);
+                goto scopy;
+
+            case 'G': // float
+                h = hexu;
+                goto dosmallfloat;
+
+            case 'g': // float
+                h = hex;
+            dosmallfloat:
+                fv = va_arg (va, double);
+                if (pr == -1)
+                    pr = 6;
+                else if (pr == 0)
+                    pr = 1; // default is 6
+                // read the double into a string
+                if (stbsp__real_to_str (&sn, &l, num, &dp, fv, (pr - 1) | 0x80000000))
+                    fl |= STBSP__NEGATIVE;
+
+                // clamp the precision and delete extra zeros after clamp
+                n = pr;
+                if (l > (stbsp__uint32) pr)
+                    l = pr;
+                while ((l > 1) && (pr) && (sn[l - 1] == '0'))
+                {
+                    --pr;
+                    --l;
+                }
+
+                // should we use %e
+                if ((dp <= -4) || (dp > (stbsp__int32) n))
+                {
+                    if (pr > (stbsp__int32) l)
+                        pr = l - 1;
+                    else if (pr)
+                        --pr; // when using %e, there is one digit before the decimal
+                    goto doexpfromg;
+                }
+                // this is the insane action to get the pr to match %g sematics for %f
+                if (dp > 0)
+                {
+                    pr = (dp < (stbsp__int32) l) ? l - dp : 0;
+                }
+                else
+                {
+                    pr = -dp + ((pr > (stbsp__int32) l) ? l : pr);
+                }
+                goto dofloatfromg;
+
+            case 'E': // float
+                h = hexu;
+                goto doexp;
+
+            case 'e': // float
+                h = hex;
+            doexp:
+                fv = va_arg (va, double);
+                if (pr == -1)
+                    pr = 6; // default is 6
+                // read the double into a string
+                if (stbsp__real_to_str (&sn, &l, num, &dp, fv, pr | 0x80000000))
+                    fl |= STBSP__NEGATIVE;
+            doexpfromg:
+                tail[0] = 0;
+                lead[0] = 0;
+                if (fl & STBSP__NEGATIVE)
+                {
+                    lead[0] = 1;
+                    lead[1] = '-';
+                }
+                else if (fl & STBSP__LEADINGSPACE)
+                {
+                    lead[0] = 1;
+                    lead[1] = ' ';
+                }
+                else if (fl & STBSP__LEADINGPLUS)
+                {
+                    lead[0] = 1;
+                    lead[1] = '+';
+                };
+                if (dp == STBSP__SPECIAL)
+                {
+                    s = (char*) sn;
+                    cs = 0;
+                    pr = 0;
+                    goto scopy;
+                }
+                s = num + 64;
+                // handle leading chars
+                *s++ = sn[0];
+
+                if (pr)
+                    *s++ = stbsp__period;
+
+                // handle after decimal
+                if ((l - 1) > (stbsp__uint32) pr)
+                    l = pr + 1;
+                for (n = 1; n < l; n++)
+                    *s++ = sn[n];
+                // trailing zeros
+                tz = pr - (l - 1);
+                pr = 0;
+                // dump expo
+                tail[1] = h[0xe];
+                dp -= 1;
+                if (dp < 0)
+                {
+                    tail[2] = '-';
+                    dp = -dp;
+                }
+                else
+                    tail[2] = '+';
+#ifdef STB_SPRINTF_MSVC_MODE
+                n = 5;
+#else
+                n = (dp >= 100) ? 5 : 4;
+#endif
+                tail[0] = (char) n;
+                for (;;)
+                {
+                    tail[n] = '0' + dp % 10;
+                    if (n <= 3)
+                        break;
+                    --n;
+                    dp /= 10;
+                }
+                cs = 1 + (3 << 24); // how many tens
+                goto flt_lead;
+
+            case 'f': // float
+                fv = va_arg (va, double);
+            doafloat:
+                // do kilos
+                if (fl & STBSP__METRIC_SUFFIX)
+                {
+                    while (fl < 0x4000000)
+                    {
+                        if ((fv < 1024.0) && (fv > -1024.0))
+                            break;
+                        fv /= 1024.0;
+                        fl += 0x1000000;
+                    }
+                }
+                if (pr == -1)
+                    pr = 6; // default is 6
+                // read the double into a string
+                if (stbsp__real_to_str (&sn, &l, num, &dp, fv, pr))
+                    fl |= STBSP__NEGATIVE;
+            dofloatfromg:
+                tail[0] = 0;
+                // sign
+                lead[0] = 0;
+                if (fl & STBSP__NEGATIVE)
+                {
+                    lead[0] = 1;
+                    lead[1] = '-';
+                }
+                else if (fl & STBSP__LEADINGSPACE)
+                {
+                    lead[0] = 1;
+                    lead[1] = ' ';
+                }
+                else if (fl & STBSP__LEADINGPLUS)
+                {
+                    lead[0] = 1;
+                    lead[1] = '+';
+                };
+                if (dp == STBSP__SPECIAL)
+                {
+                    s = (char*) sn;
+                    cs = 0;
+                    pr = 0;
+                    goto scopy;
+                }
+                s = num + 64;
+
+                // handle the three decimal varieties
+                if (dp <= 0)
+                {
+                    stbsp__int32 i;
+                    // handle 0.000*000xxxx
+                    *s++ = '0';
+                    if (pr)
+                        *s++ = stbsp__period;
+                    n = -dp;
+                    if ((stbsp__int32) n > pr)
+                        n = pr;
+                    i = n;
+                    while (i)
+                    {
+                        if ((((stbsp__uintptr) s) & 3) == 0)
+                            break;
+                        *s++ = '0';
+                        --i;
+                    }
+                    while (i >= 4)
+                    {
+                        *(stbsp__uint32*) s = 0x30303030;
+                        s += 4;
+                        i -= 4;
+                    }
+                    while (i)
+                    {
+                        *s++ = '0';
+                        --i;
+                    }
+                    if ((stbsp__int32) (l + n) > pr)
+                        l = pr - n;
+                    i = l;
+                    while (i)
+                    {
+                        *s++ = *sn++;
+                        --i;
+                    }
+                    tz = pr - (n + l);
+                    cs = 1 + (3 << 24); // how many tens did we write (for commas below)
+                }
+                else
+                {
+                    cs = (fl & STBSP__TRIPLET_COMMA) ? ((600 - (stbsp__uint32) dp) % 3) : 0;
+                    if ((stbsp__uint32) dp >= l)
+                    {
+                        // handle xxxx000*000.0
+                        n = 0;
+                        for (;;)
+                        {
+                            if ((fl & STBSP__TRIPLET_COMMA) && (++cs == 4))
+                            {
+                                cs = 0;
+                                *s++ = stbsp__comma;
+                            }
+                            else
+                            {
+                                *s++ = sn[n];
+                                ++n;
+                                if (n >= l)
+                                    break;
+                            }
+                        }
+                        if (n < (stbsp__uint32) dp)
+                        {
+                            n = dp - n;
+                            if ((fl & STBSP__TRIPLET_COMMA) == 0)
+                            {
+                                while (n)
+                                {
+                                    if ((((stbsp__uintptr) s) & 3) == 0)
+                                        break;
+                                    *s++ = '0';
+                                    --n;
+                                }
+                                while (n >= 4)
+                                {
+                                    *(stbsp__uint32*) s = 0x30303030;
+                                    s += 4;
+                                    n -= 4;
+                                }
+                            }
+                            while (n)
+                            {
+                                if ((fl & STBSP__TRIPLET_COMMA) && (++cs == 4))
+                                {
+                                    cs = 0;
+                                    *s++ = stbsp__comma;
+                                }
+                                else
+                                {
+                                    *s++ = '0';
+                                    --n;
+                                }
+                            }
+                        }
+                        cs = (int) (s - (num + 64)) + (3 << 24); // cs is how many tens
+                        if (pr)
+                        {
+                            *s++ = stbsp__period;
+                            tz = pr;
+                        }
+                    }
+                    else
+                    {
+                        // handle xxxxx.xxxx000*000
+                        n = 0;
+                        for (;;)
+                        {
+                            if ((fl & STBSP__TRIPLET_COMMA) && (++cs == 4))
+                            {
+                                cs = 0;
+                                *s++ = stbsp__comma;
+                            }
+                            else
+                            {
+                                *s++ = sn[n];
+                                ++n;
+                                if (n >= (stbsp__uint32) dp)
+                                    break;
+                            }
+                        }
+                        cs = (int) (s - (num + 64)) + (3 << 24); // cs is how many tens
+                        if (pr)
+                            *s++ = stbsp__period;
+                        if ((l - dp) > (stbsp__uint32) pr)
+                            l = pr + dp;
+                        while (n < l)
+                        {
+                            *s++ = sn[n];
+                            ++n;
+                        }
+                        tz = pr - (l - dp);
+                    }
+                }
+                pr = 0;
+
+                // handle k,m,g,t
+                if (fl & STBSP__METRIC_SUFFIX)
+                {
+                    tail[0] = 1;
+                    tail[1] = ' ';
+                    {
+                        if (fl >> 24)
+                        {
+                            tail[2] = "_kmgt"[fl >> 24];
+                            tail[0] = 2;
+                        }
+                    }
+                };
+
+            flt_lead:
+                // get the length that we copied
+                l = (stbsp__uint32) (s - (num + 64));
+                s = num + 64;
+                goto scopy;
+#endif
+
+            case 'B': // upper binary
+                h = hexu;
+                goto binary;
+
+            case 'b': // lower binary
+                h = hex;
+            binary:
+                lead[0] = 0;
+                if (fl & STBSP__LEADING_0X)
+                {
+                    lead[0] = 2;
+                    lead[1] = '0';
+                    lead[2] = h[0xb];
+                }
+                l = (8 << 4) | (1 << 8);
+                goto radixnum;
+
+            case 'o': // octal
+                h = hexu;
+                lead[0] = 0;
+                if (fl & STBSP__LEADING_0X)
+                {
+                    lead[0] = 1;
+                    lead[1] = '0';
+                }
+                l = (3 << 4) | (3 << 8);
+                goto radixnum;
+
+            case 'p': // pointer
+                fl |= (sizeof (void*) == 8) ? STBSP__INTMAX : 0;
+                pr = sizeof (void*) * 2;
+                fl &= ~STBSP__LEADINGZERO; // 'p' only prints the pointer with zeros
+                // drop through to X
+
+            case 'X': // upper binary
+                h = hexu;
+                goto dohexb;
+
+            case 'x': // lower binary
+                h = hex;
+            dohexb:
+                l = (4 << 4) | (4 << 8);
+                lead[0] = 0;
+                if (fl & STBSP__LEADING_0X)
+                {
+                    lead[0] = 2;
+                    lead[1] = '0';
+                    lead[2] = h[16];
+                }
+            radixnum:
+                // get the number
+                if (fl & STBSP__INTMAX)
+                    n64 = va_arg (va, stbsp__uint64);
+                else
+                    n64 = va_arg (va, stbsp__uint32);
+
+                s = num + STBSP__NUMSZ;
+                dp = 0;
+                // clear tail, and clear leading if value is zero
+                tail[0] = 0;
+                if (n64 == 0)
+                {
+                    lead[0] = 0;
+                    if (pr == 0)
+                    {
+                        l = 0;
+                        cs = (((l >> 4) & 15)) << 24;
+                        goto scopy;
+                    }
+                }
+                // convert to string
+                for (;;)
+                {
+                    *--s = h[n64 & ((1 << (l >> 8)) - 1)];
+                    n64 >>= (l >> 8);
+                    if (! ((n64) || ((stbsp__int32) ((num + STBSP__NUMSZ) - s) < pr)))
+                        break;
+                    if (fl & STBSP__TRIPLET_COMMA)
+                    {
+                        ++l;
+                        if ((l & 15) == ((l >> 4) & 15))
+                        {
+                            l &= ~15;
+                            *--s = stbsp__comma;
+                        }
+                    }
+                };
+                // get the tens and the comma pos
+                cs = (stbsp__uint32) ((num + STBSP__NUMSZ) - s) + ((((l >> 4) & 15)) << 24);
+                // get the length that we copied
+                l = (stbsp__uint32) ((num + STBSP__NUMSZ) - s);
+                // copy it
+                goto scopy;
+
+            case 'u': // unsigned
+            case 'i':
+            case 'd': // integer
+                // get the integer and abs it
+                if (fl & STBSP__INTMAX)
+                {
+                    stbsp__int64 i64 = va_arg (va, stbsp__int64);
+                    n64 = (stbsp__uint64) i64;
+                    if ((f[0] != 'u') && (i64 < 0))
+                    {
+                        n64 = (stbsp__uint64) -i64;
+                        fl |= STBSP__NEGATIVE;
+                    }
+                }
+                else
+                {
+                    stbsp__int32 i = va_arg (va, stbsp__int32);
+                    n64 = (stbsp__uint32) i;
+                    if ((f[0] != 'u') && (i < 0))
+                    {
+                        n64 = (stbsp__uint32) -i;
+                        fl |= STBSP__NEGATIVE;
+                    }
+                }
+
+#ifndef STB_SPRINTF_NOFLOAT
+                if (fl & STBSP__METRIC_SUFFIX)
+                {
+                    if (n64 < 1024)
+                        pr = 0;
+                    else if (pr == -1)
+                        pr = 1;
+                    fv = (double) (stbsp__int64) n64;
+                    goto doafloat;
+                }
+#endif
+
+                // convert to string
+                s = num + STBSP__NUMSZ;
+                l = 0;
+
+                for (;;)
+                {
+                    // do in 32-bit chunks (avoid lots of 64-bit divides even with constant denominators)
+                    char* o = s - 8;
+                    if (n64 >= 100000000)
+                    {
+                        n = (stbsp__uint32) (n64 % 100000000);
+                        n64 /= 100000000;
+                    }
+                    else
+                    {
+                        n = (stbsp__uint32) n64;
+                        n64 = 0;
+                    }
+                    if ((fl & STBSP__TRIPLET_COMMA) == 0)
+                    {
+                        while (n)
+                        {
+                            s -= 2;
+                            *(stbsp__uint16*) s = *(stbsp__uint16*) &stbsp__digitpair[(n % 100) * 2];
+                            n /= 100;
+                        }
+                    }
+                    while (n)
+                    {
+                        if ((fl & STBSP__TRIPLET_COMMA) && (l++ == 3))
+                        {
+                            l = 0;
+                            *--s = stbsp__comma;
+                            --o;
+                        }
+                        else
+                        {
+                            *--s = (char) (n % 10) + '0';
+                            n /= 10;
+                        }
+                    }
+                    if (n64 == 0)
+                    {
+                        if ((s[0] == '0') && (s != (num + STBSP__NUMSZ)))
+                            ++s;
+                        break;
+                    }
+                    while (s != o)
+                        if ((fl & STBSP__TRIPLET_COMMA) && (l++ == 3))
+                        {
+                            l = 0;
+                            *--s = stbsp__comma;
+                            --o;
+                        }
+                        else
+                        {
+                            *--s = '0';
+                        }
+                }
+
+                tail[0] = 0;
+                // sign
+                lead[0] = 0;
+                if (fl & STBSP__NEGATIVE)
+                {
+                    lead[0] = 1;
+                    lead[1] = '-';
+                }
+                else if (fl & STBSP__LEADINGSPACE)
+                {
+                    lead[0] = 1;
+                    lead[1] = ' ';
+                }
+                else if (fl & STBSP__LEADINGPLUS)
+                {
+                    lead[0] = 1;
+                    lead[1] = '+';
+                };
+
+                // get the length that we copied
+                l = (stbsp__uint32) ((num + STBSP__NUMSZ) - s);
+                if (l == 0)
+                {
+                    *--s = '0';
+                    l = 1;
+                }
+                cs = l + (3 << 24);
+                if (pr < 0)
+                    pr = 0;
+
+            scopy:
+                // get fw=leading/trailing space, pr=leading zeros
+                if (pr < (stbsp__int32) l)
+                    pr = l;
+                n = pr + lead[0] + tail[0] + tz;
+                if (fw < (stbsp__int32) n)
+                    fw = n;
+                fw -= n;
+                pr -= l;
+
+                // handle right justify and leading zeros
+                if ((fl & STBSP__LEFTJUST) == 0)
+                {
+                    if (fl & STBSP__LEADINGZERO) // if leading zeros, everything is in pr
+                    {
+                        pr = (fw > pr) ? fw : pr;
+                        fw = 0;
+                    }
+                    else
+                    {
+                        fl &= ~STBSP__TRIPLET_COMMA; // if no leading zeros, then no commas
+                    }
+                }
+
+                // copy the spaces and/or zeros
+                if (fw + pr)
+                {
+                    stbsp__int32 i;
+                    stbsp__uint32 c;
+
+                    // copy leading spaces (or when doing %8.4d stuff)
+                    if ((fl & STBSP__LEFTJUST) == 0)
+                        while (fw > 0)
+                        {
+                            stbsp__cb_buf_clamp (i, fw);
+                            fw -= i;
+                            while (i)
+                            {
+                                if ((((stbsp__uintptr) bf) & 3) == 0)
+                                    break;
+                                *bf++ = ' ';
+                                --i;
+                            }
+                            while (i >= 4)
+                            {
+                                *(stbsp__uint32*) bf = 0x20202020;
+                                bf += 4;
+                                i -= 4;
+                            }
+                            while (i)
+                            {
+                                *bf++ = ' ';
+                                --i;
+                            }
+                            stbsp__chk_cb_buf (1);
+                        }
+
+                    // copy leader
+                    sn = lead + 1;
+                    while (lead[0])
+                    {
+                        stbsp__cb_buf_clamp (i, lead[0]);
+                        lead[0] -= (char) i;
+                        while (i)
+                        {
+                            *bf++ = *sn++;
+                            --i;
+                        }
+                        stbsp__chk_cb_buf (1);
+                    }
+
+                    // copy leading zeros
+                    c = cs >> 24;
+                    cs &= 0xffffff;
+                    cs = (fl & STBSP__TRIPLET_COMMA) ? ((stbsp__uint32) (c - ((pr + cs) % (c + 1)))) : 0;
+                    while (pr > 0)
+                    {
+                        stbsp__cb_buf_clamp (i, pr);
+                        pr -= i;
+                        if ((fl & STBSP__TRIPLET_COMMA) == 0)
+                        {
+                            while (i)
+                            {
+                                if ((((stbsp__uintptr) bf) & 3) == 0)
+                                    break;
+                                *bf++ = '0';
+                                --i;
+                            }
+                            while (i >= 4)
+                            {
+                                *(stbsp__uint32*) bf = 0x30303030;
+                                bf += 4;
+                                i -= 4;
+                            }
+                        }
+                        while (i)
+                        {
+                            if ((fl & STBSP__TRIPLET_COMMA) && (cs++ == c))
+                            {
+                                cs = 0;
+                                *bf++ = stbsp__comma;
+                            }
+                            else
+                                *bf++ = '0';
+                            --i;
+                        }
+                        stbsp__chk_cb_buf (1);
+                    }
+                }
+
+                // copy leader if there is still one
+                sn = lead + 1;
+                while (lead[0])
+                {
+                    stbsp__int32 i;
+                    stbsp__cb_buf_clamp (i, lead[0]);
+                    lead[0] -= (char) i;
+                    while (i)
+                    {
+                        *bf++ = *sn++;
+                        --i;
+                    }
+                    stbsp__chk_cb_buf (1);
+                }
+
+                // copy the string
+                n = l;
+                while (n)
+                {
+                    stbsp__int32 i;
+                    stbsp__cb_buf_clamp (i, n);
+                    n -= i;
+                    STBSP__UNALIGNED (while (i >= 4) { *(stbsp__uint32*)bf=*(stbsp__uint32*)s; bf+=4; s+=4; i-=4; })
+                    while (i)
+                    {
+                        *bf++ = *s++;
+                        --i;
+                    }
+                    stbsp__chk_cb_buf (1);
+                }
+
+                // copy trailing zeros
+                while (tz)
+                {
+                    stbsp__int32 i;
+                    stbsp__cb_buf_clamp (i, tz);
+                    tz -= i;
+                    while (i)
+                    {
+                        if ((((stbsp__uintptr) bf) & 3) == 0)
+                            break;
+                        *bf++ = '0';
+                        --i;
+                    }
+                    while (i >= 4)
+                    {
+                        *(stbsp__uint32*) bf = 0x30303030;
+                        bf += 4;
+                        i -= 4;
+                    }
+                    while (i)
+                    {
+                        *bf++ = '0';
+                        --i;
+                    }
+                    stbsp__chk_cb_buf (1);
+                }
+
+                // copy tail if there is one
+                sn = tail + 1;
+                while (tail[0])
+                {
+                    stbsp__int32 i;
+                    stbsp__cb_buf_clamp (i, tail[0]);
+                    tail[0] -= (char) i;
+                    while (i)
+                    {
+                        *bf++ = *sn++;
+                        --i;
+                    }
+                    stbsp__chk_cb_buf (1);
+                }
+
+                // handle the left justify
+                if (fl & STBSP__LEFTJUST)
+                    if (fw > 0)
+                    {
+                        while (fw)
+                        {
+                            stbsp__int32 i;
+                            stbsp__cb_buf_clamp (i, fw);
+                            fw -= i;
+                            while (i)
+                            {
+                                if ((((stbsp__uintptr) bf) & 3) == 0)
+                                    break;
+                                *bf++ = ' ';
+                                --i;
+                            }
+                            while (i >= 4)
+                            {
+                                *(stbsp__uint32*) bf = 0x20202020;
+                                bf += 4;
+                                i -= 4;
+                            }
+                            while (i--)
+                                *bf++ = ' ';
+                            stbsp__chk_cb_buf (1);
+                        }
+                    }
+                break;
+
+            default: // unknown, just copy code
+                s = num + STBSP__NUMSZ - 1;
+                *s = f[0];
+                l = 1;
+                fw = pr = fl = 0;
+                lead[0] = 0;
+                tail[0] = 0;
+                pr = 0;
+                dp = 0;
+                cs = 0;
+                goto scopy;
+        }
+        ++f;
+    }
+endfmt:
+
+    if (! callback)
+        *bf = 0;
+    else
+        stbsp__flush_cb();
+
+done:
+    return tlen + (int) (bf - buf);
+}
+
+// cleanup
+#undef STBSP__LEFTJUST
+#undef STBSP__LEADINGPLUS
+#undef STBSP__LEADINGSPACE
+#undef STBSP__LEADING_0X
+#undef STBSP__LEADINGZERO
+#undef STBSP__INTMAX
+#undef STBSP__TRIPLET_COMMA
+#undef STBSP__NEGATIVE
+#undef STBSP__METRIC_SUFFIX
+#undef STBSP__NUMSZ
+#undef stbsp__chk_cb_bufL
+#undef stbsp__chk_cb_buf
+#undef stbsp__flush_cb
+#undef stbsp__cb_buf_clamp
+
+// ============================================================================
+//   wrapper functions
+
+STBSP__PUBLICDEF int STB_SPRINTF_DECORATE (sprintf) (char* buf, char const* fmt, ...)
+{
+    va_list va;
+    va_start (va, fmt);
+    return STB_SPRINTF_DECORATE (vsprintfcb) (0, 0, buf, fmt, va);
+}
+
+typedef struct stbsp__context
+{
+    char* buf;
+    int count;
+    char tmp[STB_SPRINTF_MIN];
+} stbsp__context;
+
+static char* stbsp__clamp_callback (char* buf, void* user, int len)
+{
+    stbsp__context* c = (stbsp__context*) user;
+
+    if (len > c->count)
+        len = c->count;
+
+    if (len)
+    {
+        if (buf != c->buf)
+        {
+            char *s, *d, *se;
+            d = c->buf;
+            s = buf;
+            se = buf + len;
+            do
+            {
+                *d++ = *s++;
+            } while (s < se);
+        }
+        c->buf += len;
+        c->count -= len;
+    }
+
+    if (c->count <= 0)
+        return 0;
+    return (c->count >= STB_SPRINTF_MIN) ? c->buf : c->tmp; // go direct into buffer if you can
+}
+
+STBSP__PUBLICDEF int STB_SPRINTF_DECORATE (vsnprintf) (char* buf, int count, char const* fmt, va_list va)
+{
+    stbsp__context c;
+    int l;
+
+    if (count == 0)
+        return 0;
+
+    c.buf = buf;
+    c.count = count;
+
+    STB_SPRINTF_DECORATE (vsprintfcb)
+    (stbsp__clamp_callback, &c, stbsp__clamp_callback (0, &c, 0), fmt, va);
+
+    // zero-terminate
+    l = (int) (c.buf - buf);
+    if (l >= count) // should never be greater, only equal (or less) than count
+        l = count - 1;
+    buf[l] = 0;
+
+    return l;
+}
+
+STBSP__PUBLICDEF int STB_SPRINTF_DECORATE (snprintf) (char* buf, int count, char const* fmt, ...)
+{
+    va_list va;
+    va_start (va, fmt);
+
+    return STB_SPRINTF_DECORATE (vsnprintf) (buf, count, fmt, va);
+}
+
+STBSP__PUBLICDEF int STB_SPRINTF_DECORATE (vsprintf) (char* buf, char const* fmt, va_list va)
+{
+    return STB_SPRINTF_DECORATE (vsprintfcb) (0, 0, buf, fmt, va);
+}
+
+// =======================================================================
+//   low level float utility functions
+
+#ifndef STB_SPRINTF_NOFLOAT
+
+// copies d to bits w/ strict aliasing (this compiles to nothing on /Ox)
+#define STBSP__COPYFP(dest, src)                      \
+    {                                                 \
+        int cn;                                       \
+        for (cn = 0; cn < 8; cn++)                    \
+            ((char*) &dest)[cn] = ((char*) &src)[cn]; \
+    }
+
+// get float info
+static stbsp__int32 stbsp__real_to_parts (stbsp__int64* bits, stbsp__int32* expo, double value)
+{
+    double d;
+    stbsp__int64 b = 0;
+
+    // load value and round at the frac_digits
+    d = value;
+
+    STBSP__COPYFP (b, d);
+
+    *bits = b & ((((stbsp__uint64) 1) << 52) - 1);
+    *expo = (stbsp__int32) (((b >> 52) & 2047) - 1023);
+
+    return (stbsp__int32) (b >> 63);
+}
+
+static double const stbsp__bot[23] = { 1e+000, 1e+001, 1e+002, 1e+003, 1e+004, 1e+005, 1e+006, 1e+007, 1e+008, 1e+009, 1e+010, 1e+011, 1e+012, 1e+013, 1e+014, 1e+015, 1e+016, 1e+017, 1e+018, 1e+019, 1e+020, 1e+021, 1e+022 };
+static double const stbsp__negbot[22] = { 1e-001, 1e-002, 1e-003, 1e-004, 1e-005, 1e-006, 1e-007, 1e-008, 1e-009, 1e-010, 1e-011, 1e-012, 1e-013, 1e-014, 1e-015, 1e-016, 1e-017, 1e-018, 1e-019, 1e-020, 1e-021, 1e-022 };
+static double const stbsp__negboterr[22] = { -5.551115123125783e-018, -2.0816681711721684e-019, -2.0816681711721686e-020, -4.7921736023859299e-021, -8.1803053914031305e-022, 4.5251888174113741e-023, 4.5251888174113739e-024, -2.0922560830128471e-025, -6.2281591457779853e-026, -3.6432197315497743e-027, 6.0503030718060191e-028, 2.0113352370744385e-029, -3.0373745563400371e-030, 1.1806906454401013e-032, -7.7705399876661076e-032, 2.0902213275965398e-033, -7.1542424054621921e-034, -7.1542424054621926e-035, 2.4754073164739869e-036, 5.4846728545790429e-037, 9.2462547772103625e-038, -4.8596774326570872e-039 };
+static double const stbsp__top[13] = { 1e+023, 1e+046, 1e+069, 1e+092, 1e+115, 1e+138, 1e+161, 1e+184, 1e+207, 1e+230, 1e+253, 1e+276, 1e+299 };
+static double const stbsp__negtop[13] = { 1e-023, 1e-046, 1e-069, 1e-092, 1e-115, 1e-138, 1e-161, 1e-184, 1e-207, 1e-230, 1e-253, 1e-276, 1e-299 };
+static double const stbsp__toperr[13] = { 8388608, 6.8601809640529717e+028, -7.253143638152921e+052, -4.3377296974619174e+075, -1.5559416129466825e+098, -3.2841562489204913e+121, -3.7745893248228135e+144, -1.7356668416969134e+167, -3.8893577551088374e+190, -9.9566444326005119e+213, 6.3641293062232429e+236, -5.2069140800249813e+259, -5.2504760255204387e+282 };
+static double const stbsp__negtoperr[13] = { 3.9565301985100693e-040, -2.299904345391321e-063, 3.6506201437945798e-086, 1.1875228833981544e-109, -5.0644902316928607e-132, -6.7156837247865426e-155, -2.812077463003139e-178, -5.7778912386589953e-201, 7.4997100559334532e-224, -4.6439668915134491e-247, -6.3691100762962136e-270, -9.436808465446358e-293, 8.0970921678014997e-317 };
+
+#if defined(_MSC_VER) && (_MSC_VER <= 1200)
+static stbsp__uint64 const stbsp__powten[20] = { 1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000, 10000000000, 100000000000, 1000000000000, 10000000000000, 100000000000000, 1000000000000000, 10000000000000000, 100000000000000000, 1000000000000000000, 10000000000000000000U };
+#define stbsp__tento19th ((stbsp__uint64) 1000000000000000000)
+#else
+static stbsp__uint64 const stbsp__powten[20] = { 1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000, 10000000000ULL, 100000000000ULL, 1000000000000ULL, 10000000000000ULL, 100000000000000ULL, 1000000000000000ULL, 10000000000000000ULL, 100000000000000000ULL, 1000000000000000000ULL, 10000000000000000000ULL };
+#define stbsp__tento19th (1000000000000000000ULL)
+#endif
+
+#define stbsp__ddmulthi(oh, ol, xh, yh)                              \
+    {                                                                \
+        double ahi = 0, alo, bhi = 0, blo;                           \
+        stbsp__int64 bt;                                             \
+        oh = xh * yh;                                                \
+        STBSP__COPYFP (bt, xh);                                      \
+        bt &= ((~(stbsp__uint64) 0) << 27);                          \
+        STBSP__COPYFP (ahi, bt);                                     \
+        alo = xh - ahi;                                              \
+        STBSP__COPYFP (bt, yh);                                      \
+        bt &= ((~(stbsp__uint64) 0) << 27);                          \
+        STBSP__COPYFP (bhi, bt);                                     \
+        blo = yh - bhi;                                              \
+        ol = ((ahi * bhi - oh) + ahi * blo + alo * bhi) + alo * blo; \
+    }
+
+#define stbsp__ddtoS64(ob, xh, xl)             \
+    {                                          \
+        double ahi = 0, alo, vh, t;            \
+        ob = (stbsp__int64) ph;                \
+        vh = (double) ob;                      \
+        ahi = (xh - vh);                       \
+        t = (ahi - xh);                        \
+        alo = (xh - (ahi - t)) - (vh + t);     \
+        ob += (stbsp__int64) (ahi + alo + xl); \
+    }
+
+#define stbsp__ddrenorm(oh, ol) \
+    {                           \
+        double s;               \
+        s = oh + ol;            \
+        ol = ol - (s - oh);     \
+        oh = s;                 \
+    }
+
+#define stbsp__ddmultlo(oh, ol, xh, xl, yh, yl) \
+    ol = ol + (xh * yl + xl * yh);
+
+#define stbsp__ddmultlos(oh, ol, xh, yl) \
+    ol = ol + (xh * yl);
+
+static void stbsp__raise_to_power10 (double* ohi, double* olo, double d, stbsp__int32 power) // power can be -323 to +350
+{
+    double ph, pl;
+    if ((power >= 0) && (power <= 22))
+    {
+        stbsp__ddmulthi (ph, pl, d, stbsp__bot[power]);
+    }
+    else
+    {
+        stbsp__int32 e, et, eb;
+        double p2h, p2l;
+
+        e = power;
+        if (power < 0)
+            e = -e;
+        et = (e * 0x2c9) >> 14; /* %23 */
+        if (et > 13)
+            et = 13;
+        eb = e - (et * 23);
+
+        ph = d;
+        pl = 0.0;
+        if (power < 0)
+        {
+            if (eb)
+            {
+                --eb;
+                stbsp__ddmulthi (ph, pl, d, stbsp__negbot[eb]);
+                stbsp__ddmultlos (ph, pl, d, stbsp__negboterr[eb]);
+            }
+            if (et)
+            {
+                stbsp__ddrenorm (ph, pl);
+                --et;
+                stbsp__ddmulthi (p2h, p2l, ph, stbsp__negtop[et]);
+                stbsp__ddmultlo (p2h, p2l, ph, pl, stbsp__negtop[et], stbsp__negtoperr[et]);
+                ph = p2h;
+                pl = p2l;
+            }
+        }
+        else
+        {
+            if (eb)
+            {
+                e = eb;
+                if (eb > 22)
+                    eb = 22;
+                e -= eb;
+                stbsp__ddmulthi (ph, pl, d, stbsp__bot[eb]);
+                if (e)
+                {
+                    stbsp__ddrenorm (ph, pl);
+                    stbsp__ddmulthi (p2h, p2l, ph, stbsp__bot[e]);
+                    stbsp__ddmultlos (p2h, p2l, stbsp__bot[e], pl);
+                    ph = p2h;
+                    pl = p2l;
+                }
+            }
+            if (et)
+            {
+                stbsp__ddrenorm (ph, pl);
+                --et;
+                stbsp__ddmulthi (p2h, p2l, ph, stbsp__top[et]);
+                stbsp__ddmultlo (p2h, p2l, ph, pl, stbsp__top[et], stbsp__toperr[et]);
+                ph = p2h;
+                pl = p2l;
+            }
+        }
+    }
+    stbsp__ddrenorm (ph, pl);
+    *ohi = ph;
+    *olo = pl;
+}
+
+// given a float value, returns the significant bits in bits, and the position of the
+//   decimal point in decimal_pos.  +/-INF and NAN are specified by special values
+//   returned in the decimal_pos parameter.
+// frac_digits is absolute normally, but if you want from first significant digits (got %g and %e), or in 0x80000000
+static stbsp__int32 stbsp__real_to_str (char const** start, stbsp__uint32* len, char* out, stbsp__int32* decimal_pos, double value, stbsp__uint32 frac_digits)
+{
+    double d;
+    stbsp__int64 bits = 0;
+    stbsp__int32 expo, e, ng, tens;
+
+    d = value;
+    STBSP__COPYFP (bits, d);
+    expo = (stbsp__int32) ((bits >> 52) & 2047);
+    ng = (stbsp__int32) (bits >> 63);
+    if (ng)
+        d = -d;
+
+    if (expo == 2047) // is nan or inf?
+    {
+        *start = (bits & ((((stbsp__uint64) 1) << 52) - 1)) ? "NaN" : "Inf";
+        *decimal_pos = STBSP__SPECIAL;
+        *len = 3;
+        return ng;
+    }
+
+    if (expo == 0) // is zero or denormal
+    {
+        if ((bits << 1) == 0) // do zero
+        {
+            *decimal_pos = 1;
+            *start = out;
+            out[0] = '0';
+            *len = 1;
+            return ng;
+        }
+        // find the right expo for denormals
+        {
+            stbsp__int64 v = ((stbsp__uint64) 1) << 51;
+            while ((bits & v) == 0)
+            {
+                --expo;
+                v >>= 1;
+            }
+        }
+    }
+
+    // find the decimal exponent as well as the decimal bits of the value
+    {
+        double ph, pl;
+
+        // log10 estimate - very specifically tweaked to hit or undershoot by no more than 1 of log10 of all expos 1..2046
+        tens = expo - 1023;
+        tens = (tens < 0) ? ((tens * 617) / 2048) : (((tens * 1233) / 4096) + 1);
+
+        // move the significant bits into position and stick them into an int
+        stbsp__raise_to_power10 (&ph, &pl, d, 18 - tens);
+
+        // get full as much precision from double-double as possible
+        stbsp__ddtoS64 (bits, ph, pl);
+
+        // check if we undershot
+        if (((stbsp__uint64) bits) >= stbsp__tento19th)
+            ++tens;
+    }
+
+    // now do the rounding in integer land
+    frac_digits = (frac_digits & 0x80000000) ? ((frac_digits & 0x7ffffff) + 1) : (tens + frac_digits);
+    if ((frac_digits < 24))
+    {
+        stbsp__uint32 dg = 1;
+        if ((stbsp__uint64) bits >= stbsp__powten[9])
+            dg = 10;
+        while ((stbsp__uint64) bits >= stbsp__powten[dg])
+        {
+            ++dg;
+            if (dg == 20)
+                goto noround;
+        }
+        if (frac_digits < dg)
+        {
+            stbsp__uint64 r;
+            // add 0.5 at the right position and round
+            e = dg - frac_digits;
+            if ((stbsp__uint32) e >= 24)
+                goto noround;
+            r = stbsp__powten[e];
+            bits = bits + (r / 2);
+            if ((stbsp__uint64) bits >= stbsp__powten[dg])
+                ++tens;
+            bits /= r;
+        }
+    noround:;
+    }
+
+    // kill long trailing runs of zeros
+    if (bits)
+    {
+        stbsp__uint32 n;
+        for (;;)
+        {
+            if (bits <= 0xffffffff)
+                break;
+            if (bits % 1000)
+                goto donez;
+            bits /= 1000;
+        }
+        n = (stbsp__uint32) bits;
+        while ((n % 1000) == 0)
+            n /= 1000;
+        bits = n;
+    donez:;
+    }
+
+    // convert to string
+    out += 64;
+    e = 0;
+    for (;;)
+    {
+        stbsp__uint32 n;
+        char* o = out - 8;
+        // do the conversion in chunks of U32s (avoid most 64-bit divides, worth it, constant denomiators be damned)
+        if (bits >= 100000000)
+        {
+            n = (stbsp__uint32) (bits % 100000000);
+            bits /= 100000000;
+        }
+        else
+        {
+            n = (stbsp__uint32) bits;
+            bits = 0;
+        }
+        while (n)
+        {
+            out -= 2;
+            *(stbsp__uint16*) out = *(stbsp__uint16*) &stbsp__digitpair[(n % 100) * 2];
+            n /= 100;
+            e += 2;
+        }
+        if (bits == 0)
+        {
+            if ((e) && (out[0] == '0'))
+            {
+                ++out;
+                --e;
+            }
+            break;
+        }
+        while (out != o)
+        {
+            *--out = '0';
+            ++e;
+        }
+    }
+
+    *decimal_pos = tens;
+    *start = out;
+    *len = e;
+    return ng;
+}
+
+#undef stbsp__ddmulthi
+#undef stbsp__ddrenorm
+#undef stbsp__ddmultlo
+#undef stbsp__ddmultlos
+#undef STBSP__SPECIAL
+#undef STBSP__COPYFP
+
+#endif // STB_SPRINTF_NOFLOAT
+
+// clean up
+#undef stbsp__uint16
+#undef stbsp__uint32
+#undef stbsp__int32
+#undef stbsp__uint64
+#undef stbsp__int64
+#undef STBSP__UNALIGNED
+
+#endif // STB_SPRINTF_IMPLEMENTATION
diff --git a/src/pch.h b/src/pch.h
index e3b0383b..85677e88 100644
--- a/src/pch.h
+++ b/src/pch.h
@@ -31,6 +31,10 @@ JUCE_BEGIN_IGNORE_WARNINGS_GCC_LIKE ("-Wzero-as-null-pointer-constant",
 #include <modules/Eigen/Eigen/Dense>
 JUCE_END_IGNORE_WARNINGS_GCC_LIKE
 
+#if BYOD_BUILDING_JAI_MODULES
+#include "jai/SharedJaiContext.h"
+#endif
+
 // global definitions
 using Parameters = std::vector<std::unique_ptr<juce::RangedAudioParameter>>;
 using ParamLayout = AudioProcessorValueTreeState::ParameterLayout;
diff --git a/src/processors/ProcessorStore.cpp b/src/processors/ProcessorStore.cpp
index b956e665..f9cb9558 100644
--- a/src/processors/ProcessorStore.cpp
+++ b/src/processors/ProcessorStore.cpp
@@ -58,6 +58,7 @@
 #include "other/ShimmerReverb.h"
 #include "other/SmoothReverb.h"
 #include "other/cry_baby/CryBaby.h"
+#include "other/krusher/Krusher.h"
 #include "other/spring_reverb/SpringReverbProcessor.h"
 
 #include "utility/CleanGain.h"
@@ -151,6 +152,7 @@ ProcessorStore::StoreMap ProcessorStore::store = {
     { "Shimmer Reverb", { &processorFactory<ShimmerReverb>, { ProcessorType::Other, 1, 1 } } },
     { "Smooth Reverb", { &processorFactory<SmoothReverb>, { ProcessorType::Other, 1, 1 } } },
     { "Spring Reverb", { &processorFactory<SpringReverbProcessor>, { ProcessorType::Other, 1, 1 } } },
+    { "Krusher", { &processorFactory<Krusher>, { ProcessorType::Other, 1, 1 } } },
 
 #if BYOD_ENABLE_ADD_ON_MODULES
     BYOD_STORE_MAP_ADD_ON_MODULES
diff --git a/src/processors/other/krusher/Krusher.cpp b/src/processors/other/krusher/Krusher.cpp
new file mode 100644
index 00000000..1915495a
--- /dev/null
+++ b/src/processors/other/krusher/Krusher.cpp
@@ -0,0 +1,126 @@
+#include "Krusher.h"
+#include "processors/ParameterHelpers.h"
+
+Krusher::Krusher (UndoManager* um)
+    : BaseProcessor ("Krusher", createParameterLayout(), um)
+{
+    using namespace ParameterHelpers;
+    loadParameterPointer (sampleRateParam, vts, "sample_rate");
+    loadParameterPointer (antialiasParam, vts, "antialias");
+    loadParameterPointer (bitDepthParam, vts, "bit_depth");
+    loadParameterPointer (brrFilterIndex, vts, "bit_reduction_filter");
+    loadParameterPointer (mixParam, vts, "mix");
+
+    uiOptions.backgroundColour = Colour { Colours::lightpink };
+    uiOptions.powerColour = Colour { Colours::red.darker (0.2f) };
+    uiOptions.info.description = "A lo-fi effect that reduces the sample rate and bit depth of the signal.";
+    uiOptions.info.authors = StringArray { "Jatin Chowdhury" };
+
+    addPopupMenuParameter ("antialias");
+}
+
+ParamLayout Krusher::createParameterLayout()
+{
+    using namespace ParameterHelpers;
+    auto params = createBaseParams();
+
+    emplace_param<chowdsp::FloatParameter> (params,
+                                            "sample_rate",
+                                            "Downsample",
+                                            createNormalisableRange (1000.0f, 48000.0f, 8000.0f),
+                                            48000.0f,
+                                            &freqValToString,
+                                            &stringToFreqVal);
+    emplace_param<chowdsp::BoolParameter> (params, "antialias", "Anti-Alias", false);
+    emplace_param<chowdsp::FloatParameter> (params,
+                                            "bit_depth",
+                                            "Bits",
+                                            NormalisableRange { 1.0f, 12.0f, 1.0f },
+                                            12.0f,
+                                            &floatValToString,
+                                            &stringToFloatVal);
+    emplace_param<chowdsp::ChoiceParameter> (params,
+                                             "bit_reduction_filter",
+                                             "Smoothq",
+                                             StringArray { "Zero-Order", "First-Order", "Second-Order", "Third-Order" },
+                                             1);
+    createPercentParameter (params, "mix", "Mix", 1.0f);
+
+    return { params.begin(), params.end() };
+}
+
+static void setDryWetGain (chowdsp::Gain<float>& dryGain, chowdsp::Gain<float>& wetGain, float mix)
+{
+    const auto dryGainValue = std::sqrt (1.0f - mix);
+    const auto wetGainValue = std::sqrt (mix);
+
+    dryGain.setGainLinear (dryGainValue);
+    wetGain.setGainLinear (wetGainValue);
+}
+
+void Krusher::prepare (double sampleRate, int samplesPerBlock)
+{
+    hostFs = (float) sampleRate;
+
+    aaFilter.prepare (2);
+    aiFilter.prepare (2);
+
+    krusher_init_lofi_resample (&resample_state);
+
+    for (auto& state : brFilterStates)
+        state = {};
+
+    dcBlocker.prepare (2);
+    dcBlocker.calcCoefs (20.0f, (float) sampleRate);
+
+    wetGain.setRampDurationSeconds (0.05);
+    dryGain.setRampDurationSeconds (0.05);
+    setDryWetGain (dryGain, wetGain, *mixParam);
+    wetGain.prepare ({ sampleRate, (uint32_t) samplesPerBlock, 2 });
+    dryGain.prepare ({ sampleRate, (uint32_t) samplesPerBlock, 2 });
+    dryBuffer.setMaxSize (2, samplesPerBlock);
+}
+
+void Krusher::processDownsampler (const chowdsp::BufferView<float>& buffer, float targetFs, bool antialias) noexcept
+{
+    if (targetFs >= hostFs)
+        return;
+
+    aaFilter.calcCoefs ((float) targetFs * 0.48f, chowdsp::CoefficientCalculators::butterworthQ<float>, hostFs);
+    aiFilter.calcCoefs ((float) targetFs * 0.48f, chowdsp::CoefficientCalculators::butterworthQ<float>, hostFs);
+
+    if (antialias)
+        aaFilter.processBlock (buffer);
+
+    krusher_process_lofi_downsample (jai_context.get(),
+                                     &resample_state,
+                                     const_cast<float**> (buffer.getArrayOfWritePointers()),
+                                     buffer.getNumChannels(),
+                                     buffer.getNumSamples(),
+                                     double (hostFs / targetFs));
+
+    if (antialias)
+        aiFilter.processBlock (buffer);
+}
+
+void Krusher::processAudio (AudioBuffer<float>& buffer)
+{
+    dryBuffer.setCurrentSize (buffer.getNumChannels(), buffer.getNumSamples());
+    chowdsp::BufferMath::copyBufferData (buffer, dryBuffer);
+
+    krusher_bit_reduce_process_block (const_cast<float**> (buffer.getArrayOfWritePointers()),
+                                      buffer.getNumChannels(),
+                                      buffer.getNumSamples(),
+                                      brrFilterIndex->getIndex(),
+                                      (int) *bitDepthParam,
+                                      brFilterStates.data());
+
+    processDownsampler (buffer, *sampleRateParam, antialiasParam->get());
+
+    dcBlocker.processBlock (buffer);
+
+    setDryWetGain (dryGain, wetGain, *mixParam);
+    wetGain.process (buffer);
+    dryGain.process (dryBuffer);
+    chowdsp::BufferMath::addBufferData (dryBuffer, buffer);
+}
diff --git a/src/processors/other/krusher/Krusher.h b/src/processors/other/krusher/Krusher.h
new file mode 100644
index 00000000..12a1e6ef
--- /dev/null
+++ b/src/processors/other/krusher/Krusher.h
@@ -0,0 +1,54 @@
+#pragma once
+
+#include "processors/BaseProcessor.h"
+
+#define KRUSHER_USE_JAI_IMPL ! JUCE_ARM&& BYOD_BUILDING_JAI_MODULES
+
+#if KRUSHER_USE_JAI_IMPL
+#include "jai/byod_jai_lib.h"
+#else
+#include "krusher_fallback_impl.h"
+#endif
+
+class Krusher : public BaseProcessor
+{
+public:
+    explicit Krusher (UndoManager* um = nullptr);
+
+    ProcessorType getProcessorType() const override { return Other; }
+    static ParamLayout createParameterLayout();
+
+    void prepare (double sampleRate, int samplesPerBlock) override;
+    void processAudio (AudioBuffer<float>& buffer) override;
+
+private:
+    void processDownsampler (const chowdsp::BufferView<float>& buffer, float targetFs, bool antialias) noexcept;
+
+    chowdsp::FloatParameter* sampleRateParam = nullptr;
+    chowdsp::BoolParameter* antialiasParam = nullptr;
+    chowdsp::FloatParameter* bitDepthParam = nullptr;
+    chowdsp::ChoiceParameter* brrFilterIndex = nullptr;
+    chowdsp::FloatParameter* mixParam = nullptr;
+
+    chowdsp::EllipticFilter<8> aaFilter;
+    chowdsp::EllipticFilter<8> aiFilter;
+    float hostFs = 48000.0f;
+
+#if KRUSHER_USE_JAI_IMPL
+    SharedJaiContext jai_context;
+    jai_Krusher_Lofi_Resample_State resample_state {};
+    std::array<jai_Krusher_Bit_Reducer_Filter_State, 2> brFilterStates {};
+#else
+    std::unique_ptr<chowdsp::NullType> jai_context;
+    Krusher_Lofi_Resample_State resample_state {};
+    std::array<Krusher_Bit_Reducer_Filter_State, 2> brFilterStates {};
+#endif
+
+    chowdsp::FirstOrderHPF<float> dcBlocker;
+
+    chowdsp::Gain<float> wetGain;
+    chowdsp::Gain<float> dryGain;
+    chowdsp::Buffer<float> dryBuffer;
+
+    JUCE_DECLARE_NON_COPYABLE_WITH_LEAK_DETECTOR (Krusher)
+};
diff --git a/src/processors/other/krusher/krusher_fallback_impl.h b/src/processors/other/krusher/krusher_fallback_impl.h
new file mode 100644
index 00000000..691496ad
--- /dev/null
+++ b/src/processors/other/krusher/krusher_fallback_impl.h
@@ -0,0 +1,240 @@
+#pragma once
+
+struct Krusher_Lofi_Resample_State
+{
+    double upsample_overshoot = 0.0;
+    double downsample_overshoot = 0.0;
+};
+
+inline void krusher_init_lofi_resample (Krusher_Lofi_Resample_State* state)
+{
+    state->upsample_overshoot = 0.0;
+    state->downsample_overshoot = 0.0;
+}
+
+namespace krusher_detail
+{
+inline void process_lofi_resample (float** source_buffer,
+                                   float** dest_buffer,
+                                   int num_channels,
+                                   int num_samples_source,
+                                   int num_samples_dest,
+                                   double resample_factor,
+                                   double& overshoot_samples)
+{
+    // simple S&H lofi resampler
+    for (int channel = 0; channel < num_channels; ++channel)
+    {
+        const auto* source_data = source_buffer[channel];
+        auto* dest_data = dest_buffer[channel];
+
+        for (int i = 0; i < num_samples_dest; ++i)
+        {
+            const auto grab_index = (int) ((double) i * resample_factor + overshoot_samples);
+            dest_data[i] = source_data[std::min (grab_index, num_samples_source - 1)];
+        }
+    }
+
+    overshoot_samples = (double) num_samples_dest * resample_factor - std::floor ((double) num_samples_dest * resample_factor);
+}
+} // namespace krusher_detail
+
+inline void krusher_process_lofi_downsample ([[maybe_unused]] void* ctx,
+                                             Krusher_Lofi_Resample_State* state,
+                                             float** buffer,
+                                             int num_channels,
+                                             int num_samples,
+                                             double resample_factor)
+{
+    const auto ds_buffer_size = (int) std::ceil ((double) num_samples / resample_factor);
+
+    auto* temp_data = (float*) alloca (2 * (size_t) ds_buffer_size * sizeof (float));
+    float* ds_buffer[2] = { temp_data, temp_data + ds_buffer_size };
+
+    krusher_detail::process_lofi_resample (buffer, ds_buffer, num_channels, num_samples, ds_buffer_size, resample_factor, state->downsample_overshoot);
+    krusher_detail::process_lofi_resample (ds_buffer, buffer, num_channels, ds_buffer_size, num_samples, 1.0 / resample_factor, state->upsample_overshoot);
+}
+
+//==============================================
+struct Krusher_Bit_Reducer_Filter_State
+{
+    int32_t p1 {};
+    int32_t p2 {};
+};
+
+namespace krusher_detail
+{
+static constexpr uint16_t BIT_MASKS[] = {
+    0, // 0
+    0x0001, // 1
+    0x0003, // 2
+    0x0007, // 3
+    0x000F, // 4
+    0x001F, // 5
+    0x003F, // 6
+    0x007F, // 7
+    0x00FF, // 8
+    0x01FF, // 9
+    0x03FF, // 10
+    0x07FF, // 11
+    0x0FFF, // 12
+    0x1FFF, // 13
+    0x3FFF, // 14
+    0x7FFF, // 15
+};
+
+struct Bit_Reduction_Block
+{
+    uint8_t shift_amount {};
+    std::array<uint16_t, 16> data {};
+};
+
+inline uint16_t encode_sample (uint8_t shift, int bit_depth, int16_t x)
+{
+    const auto value_unsigned = (uint16_t) (x + (1 << 8));
+    return (uint16_t) (value_unsigned >> shift) & BIT_MASKS[bit_depth];
+}
+
+inline int16_t decode_sample (uint8_t shift, uint16_t x)
+{
+    return (int16_t) ((uint16_t) (x << shift) - (1 << 8));
+}
+
+inline void bit_reduce_decode (const Bit_Reduction_Block& br_block,
+                               std::span<int16_t> out,
+                               int filter,
+                               Krusher_Bit_Reducer_Filter_State& state)
+{
+    uint8_t shift_amount = br_block.shift_amount;
+
+    const auto type1_filter = [&state] (int16_t nibble_2r)
+    {
+        const auto y = int32_t (nibble_2r) + ((state.p1 * 15) >> 4);
+        state.p2 = 0;
+        state.p1 = y;
+        return (int16_t) (y >> 4);
+    };
+
+    const auto type2_filter = [&state] (int16_t nibble_2r)
+    {
+        const auto y = int32_t (nibble_2r) + ((state.p1 * 61) >> 5) - ((state.p2 * 15) >> 4);
+        state.p2 = state.p1;
+        state.p1 = y;
+        return (int16_t) (y >> 5);
+    };
+
+    const auto type3_filter = [&state] (int16_t nibble_2r)
+    {
+        const auto y = int32_t (nibble_2r) + ((state.p1 * 115) >> 6) - ((state.p2 * 13) >> 4);
+        state.p2 = state.p1;
+        state.p1 = y;
+        return (int16_t) (y >> 6);
+    };
+
+    for (size_t i = 0; i < 16; ++i)
+    {
+        const auto brr_sample = br_block.data[i];
+
+        switch (filter)
+        {
+            case 0:
+                out[i] = decode_sample (shift_amount, brr_sample);
+                break;
+
+            case 1:
+                out[i] = type1_filter (decode_sample (shift_amount, brr_sample));
+                break;
+
+            case 2:
+                out[i] = type2_filter (decode_sample (shift_amount, brr_sample));
+                break;
+
+            case 3:
+                out[i] = type3_filter (decode_sample (shift_amount, brr_sample));
+                break;
+
+            default:
+                jassertfalse;
+                break;
+        }
+    }
+}
+
+inline Bit_Reduction_Block bit_reduce_encode (std::span<const int16_t> PCM_data, int bit_depth)
+{
+    uint8_t shift_best = 0;
+    double err_min = std::numeric_limits<double>::max();
+
+    for (uint8_t s = 0; s < uint8_t (16 - bit_depth + 1); ++s)
+    {
+        auto err_sq_accum = 0.0;
+        for (size_t i = 0; i < 16; ++i)
+        {
+            const auto pred = decode_sample (s, encode_sample (s, bit_depth, PCM_data[i]));
+            const auto err = double (PCM_data[i] - pred);
+            err_sq_accum += err * err;
+        }
+
+        if (err_sq_accum < err_min)
+        {
+            err_min = err_sq_accum;
+            shift_best = s;
+        }
+    }
+
+    Bit_Reduction_Block brr {};
+    brr.shift_amount = shift_best;
+    for (size_t i = 0; i < 16; ++i)
+        brr.data[i] = encode_sample (shift_best, bit_depth, PCM_data[i]);
+
+    return brr;
+}
+
+inline void convert_float_to_int (std::span<const float> dataFloat, std::span<int16_t> dataInt)
+{
+    for (size_t i = 0; i < dataFloat.size(); ++i)
+        dataInt[i] = int16_t (dataFloat[i] * float (1 << 8));
+}
+
+inline void convert_int_to_float (std::span<const int16_t> dataInt, std::span<float> dataFloat)
+{
+    for (size_t i = 0; i < dataFloat.size(); ++i)
+        dataFloat[i] = ((float) dataInt[i]) / float (1 << 8);
+}
+} // namespace krusher_detail
+
+inline void krusher_bit_reduce_process_block (float** buffer,
+                                              int32_t num_channels,
+                                              int32_t num_samples,
+                                              int32_t filter_index,
+                                              int32_t bit_depth,
+                                              Krusher_Bit_Reducer_Filter_State* filter_states)
+{
+    static constexpr size_t small_block_size = 16;
+    std::array<int16_t, small_block_size> samples_int {};
+
+    for (int channel = 0; channel < num_channels; ++channel)
+    {
+        auto samples_remaining = num_samples;
+        while (samples_remaining > 0)
+        {
+            const auto samples_to_process = std::min (samples_remaining, (int) small_block_size);
+
+            std::span<float> samples_float_span { buffer[channel] + num_samples - samples_remaining, (size_t) samples_to_process };
+            std::fill (samples_int.begin(), samples_int.end(), 0);
+            std::span<int16_t> samples_int_span { samples_int.data(), (size_t) samples_to_process };
+
+            krusher_detail::convert_float_to_int (samples_float_span, samples_int_span);
+
+            if (bit_depth < 12)
+            {
+                const auto br_data = krusher_detail::bit_reduce_encode (samples_int_span, bit_depth);
+                krusher_detail::bit_reduce_decode (br_data, samples_int_span, filter_index, filter_states[channel]);
+            }
+
+            krusher_detail::convert_int_to_float (samples_int_span, samples_float_span);
+
+            samples_remaining -= samples_to_process;
+        }
+    }
+}