Added fixed-point .32 calculation

ikostoski · Sep 29, 2019 · d94f836 · d94f836
1 parent d900459
commit d94f836
Show file tree

Hide file tree

Showing 2 changed files with 186 additions and 59 deletions.
diff --git a/esp32-i2s-slm.ino b/esp32-i2s-slm.ino
@@ -91,34 +91,34 @@
 
 // TDK/InvenSense ICS-43434
 // Datasheet: https://www.invensense.com/wp-content/uploads/2016/02/DS-000069-ICS-43434-v1.1.pdf
-const float ICS43434_B[] = {0.47733, -0.48649, -0.33646, 0.23462, 0.11102};
-const float ICS43434_A[] = {1.0, -1.9307338, 0.8651946, 0.0644284, 0.0011125};
-IIRFilter ICS43434(ICS43434_B, ICS43434_A);
+const double ICS43434_B[] = {0.47733, -0.48649, -0.33646, 0.23462, 0.11102};
+const double ICS43434_A[] = {1.0, -1.9307338, 0.8651946, 0.0644284, 0.0011125};
+IIR_FILTER ICS43434(ICS43434_B, ICS43434_A);
 
 // TDK/InvenSense ICS-43432
 // Datasheet: https://www.invensense.com/wp-content/uploads/2015/02/ICS-43432-data-sheet-v1.3.pdf
-const float ICS43432_B[] = {-0.457337, 1.1222867, -0.77818279, 0.0096893, 0.1034567};
-const float ICS43432_A[] = {1,-3.3420781, 4.4033694, -3.0167073, 1.2265537, -0.2962229, 0.0251086};
-IIRFilter ICS43432(ICS43432_B, ICS43432_A);
+const double ICS43432_B[] = {-0.457337, 1.1222867, -0.77818279, 0.0096893, 0.1034567};
+const double ICS43432_A[] = {1,-3.3420781, 4.4033694, -3.0167073, 1.2265537, -0.2962229, 0.0251086};
+IIR_FILTER ICS43432(ICS43432_B, ICS43432_A);
 
 // TDK/InvenSense INMP441
 // Datasheet: https://www.invensense.com/wp-content/uploads/2015/02/INMP441.pdf
-const float INMP441_B[] = {1.00198, -1.99085, 0.98892};
-const float INMP441_A[] = {1.0, -1.99518, 0.99518};
-IIRFilter INMP441(INMP441_B, INMP441_A);
+const double INMP441_B[] = {1.00198, -1.99085, 0.98892};
+const double INMP441_A[] = {1.0, -1.99518, 0.99518};
+IIR_FILTER INMP441(INMP441_B, INMP441_A);
 
 //
 // A-weighting 6th order IIR Filter, Fs = 48KHz 
 // (By Dr. Matt L., Source: https://dsp.stackexchange.com/a/36122)
 //
-const float A_weighting_B[] = {0.169994948147430, 0.280415310498794, -1.120574766348363, 0.131562559965936, 0.974153561246036, -0.282740857326553, -0.152810756202003};
-const float A_weighting_A[] = {1.0, -2.12979364760736134, 0.42996125885751674, 1.62132698199721426, -0.96669962900852902, 0.00121015844426781, 0.04400300696788968};
-IIRFilter A_weighting(A_weighting_B, A_weighting_A);
+const double A_weighting_B[] = {0.169994948147430, 0.280415310498794, -1.120574766348363, 0.131562559965936, 0.974153561246036, -0.282740857326553, -0.152810756202003};
+const double A_weighting_A[] = {1.0, -2.12979364760736134, 0.42996125885751674, 1.62132698199721426, -0.96669962900852902, 0.00121015844426781, 0.04400300696788968};
+IIR_FILTER A_weighting(A_weighting_B, A_weighting_A);
 
 // No filter, class to disable equalizer or weighting
 class NoFilter {
   public:
-    inline float filter(float value) { return value; };
+    inline IIR_BASE_T filter(IIR_BASE_T value) { return value; };
 };
 NoFilter None;
 
@@ -132,11 +132,11 @@ NoFilter None;
 #endif
 
 // Data we push to 'samples_queue'
-struct samples_sum_t {
+struct samples_queue_t {
   // Sum of squares of mic samples, after Equalizer filter
-  float sum_sqr_SPL;
+  IIR_ACCU_T sum_sqr_SPL;
   // Sum of squares of weighted mic samples
-  float sum_sqr_weighted;
+  IIR_ACCU_T sum_sqr_weighted;
 };
 int32_t samples[SAMPLES_SHORT];
 QueueHandle_t samples_queue;
@@ -150,11 +150,13 @@ QueueHandle_t samples_queue;
 
 void mic_i2s_init() {
   // Setup I2S to sample mono channel for SAMPLE_RATE * SAMPLE_BITS
+  // NOTE: Recent update to Arduino_esp32 (1.0.2 -> 1.0.3)
+  //       seems to have swapped ONLY_LEFT and ONLY_RIGHT channels
   const i2s_config_t i2s_config = {
     .mode = i2s_mode_t(I2S_MODE_MASTER | I2S_MODE_RX),
     .sample_rate = SAMPLE_RATE,
     .bits_per_sample = i2s_bits_per_sample_t(SAMPLE_BITS),
-    .channel_format = I2S_CHANNEL_FMT_ONLY_RIGHT,
+    .channel_format = I2S_CHANNEL_FMT_ONLY_LEFT,
     .communication_format = i2s_comm_format_t(I2S_COMM_FORMAT_I2S | I2S_COMM_FORMAT_I2S_MSB),
     .intr_alloc_flags = ESP_INTR_FLAG_LEVEL1,
     .dma_buf_count = DMA_BANKS,
@@ -194,27 +196,28 @@ void mic_i2s_reader_task(void* parameter) {
 
   // Discard first block, microphone may have startup time (i.e. INMP441 up to 83ms)
   size_t bytes_read = 0;
-  i2s_read(I2S_NUM_0, &samples, SAMPLES_SHORT * sizeof(int32_t), &bytes_read, portMAX_DELAY);
+  i2s_read(I2S_PORT, &samples, SAMPLES_SHORT * sizeof(int32_t), &bytes_read, portMAX_DELAY);
 
   while (true) {
-    float sum_sqr_SPL = 0;
-    float sum_sqr_weighted = 0;
+    IIR_ACCU_T sum_sqr_SPL = 0;
+    IIR_ACCU_T sum_sqr_weighted = 0;
+    samples_queue_t q;
     int32_t sample;
-    samples_sum_t ss;
-    float s = 0;
+    IIR_BASE_T s;
 
-    i2s_read(I2S_NUM_0, &samples, SAMPLES_SHORT * sizeof(int32_t), &bytes_read, portMAX_DELAY);
+    i2s_read(I2S_PORT, &samples, SAMPLES_SHORT * sizeof(int32_t), &bytes_read, portMAX_DELAY);
     for(int i=0; i<SAMPLES_SHORT; i++) {
       sample = SAMPLE_CONVERT(samples[i]);
-      s = MIC_EQUALIZER.filter(sample);
-      sum_sqr_SPL += (s * s);
+      s = sample;
+      s = MIC_EQUALIZER.filter(s);
+      ACCU_SQR(sum_sqr_SPL, s);
       s = WEIGHTING.filter(s);
-      sum_sqr_weighted += (s * s);
+      ACCU_SQR(sum_sqr_weighted, s);
     }
 
-    ss.sum_sqr_SPL = sum_sqr_SPL;
-    ss.sum_sqr_weighted = sum_sqr_weighted;
-    xQueueSend(samples_queue, &ss, portMAX_DELAY);
+    q.sum_sqr_SPL = sum_sqr_SPL;
+    q.sum_sqr_weighted = sum_sqr_weighted;
+    xQueueSend(samples_queue, &q, portMAX_DELAY);
   }
 }
 
@@ -228,27 +231,27 @@ void setup() {
     #endif
   #endif
 
-  samples_queue = xQueueCreate(8, sizeof(samples_sum_t));
+  samples_queue = xQueueCreate(8, sizeof(samples_queue_t));
   xTaskCreate(mic_i2s_reader_task, "Mic I2S Reader", I2S_TASK_STACK, NULL, I2S_TASK_PRI, NULL);
 
-  samples_sum_t ss;
+  samples_queue_t q;
   uint32_t Leq_cnt = 0;
-  double Leq_sum_sqr = 0;
+  IIR_ACCU_T Leq_sum_sqr = 0;
   double Leq_dB = 0;
 
   // Read sum of samaples, calculated by 'i2s_reader_task'
-  while (xQueueReceive(samples_queue, &ss, portMAX_DELAY)) {
+  while (xQueueReceive(samples_queue, &q, portMAX_DELAY)) {
 
     // Calculate dB values relative to MIC_REF_AMPL and adjust for reference dB
-    float short_SPL_dB = MIC_OFFSET_DB + MIC_REF_DB + 20 * log10(sqrt(ss.sum_sqr_SPL / SAMPLES_SHORT) / MIC_REF_AMPL);
+    double short_SPL_dB = MIC_OFFSET_DB + MIC_REF_DB + 20 * log10(sqrt(double(q.sum_sqr_SPL) / SAMPLES_SHORT) / MIC_REF_AMPL);
 
     // In case of acoustic overload, report infinty Leq value
     if (short_SPL_dB > MIC_OVERLOAD_DB) Leq_sum_sqr = INFINITY;
 
-    Leq_sum_sqr += ss.sum_sqr_weighted;
+    Leq_sum_sqr += q.sum_sqr_weighted;
     Leq_cnt += SAMPLES_SHORT;
     if (Leq_cnt >= SAMPLE_RATE * LEQ_PERIOD) {
-      Leq_dB = MIC_OFFSET_DB + MIC_REF_DB + 20 * log10(sqrt(Leq_sum_sqr / Leq_cnt) / MIC_REF_AMPL);
+      Leq_dB = MIC_OFFSET_DB + MIC_REF_DB + 20 * log10(sqrt(double(Leq_sum_sqr) / Leq_cnt) / MIC_REF_AMPL);
       Leq_sum_sqr = 0;
       Leq_cnt = 0;
       printf("%s(%ds) : %.1f\n", LEQ_UNITS, LEQ_PERIOD, Leq_dB);
@@ -264,7 +267,7 @@ void setup() {
         continue;
       }
       // The 'short' Leq line
-      float short_Leq_dB = MIC_OFFSET_DB + MIC_REF_DB + 20 * log10(sqrt(ss.sum_sqr_weighted / SAMPLES_SHORT) / MIC_REF_AMPL);
+      double short_Leq_dB = MIC_OFFSET_DB + MIC_REF_DB + 20 * log10(sqrt(double(q.sum_sqr_weighted) / SAMPLES_SHORT) / MIC_REF_AMPL);
       uint16_t len = uint16_t(((short_Leq_dB - MIC_NOISE_DB) / MIC_OVERLOAD_DB) * display.getWidth());
       display.drawHorizontalLine(0, 0, len);
       display.drawHorizontalLine(0, 1, len);

diff --git a/iir-filter.h b/iir-filter.h
@@ -6,8 +6,8 @@
  *   https://github.com/tttapa/Filters
  *   
  * Original code is licensed under GPL-3.0
- *   
- * Replaced double with float, compacted and added GCC optimization / inlining  
+ *
+ * (c)2019 Ivan Kostoski
  *
  * This program is free software: you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -23,18 +23,144 @@
  * along with this program.  If not, see <https://www.gnu.org/licenses/>.
  */
 
+#include <stdint.h>
+
+/******************************************************************************/
+
+//
+// Fixed point .32 precision (integer math)
+// - On ESP32, this seems to be ~2x faster than (emulated) double-precision
+//   and should be good enough for 2 x 6th order IIR filters
+// - Error (difference to double precision) is <= 0.1dB down to 10Hz
+// - Should not overflow with 24bit microphone values (120dBFS)
+//
+#define IIR_ACCU_T         fixed_point_64_32
+#define IIR_BASE_T         fixed_point_32_32
+#define ACCU_MUL(A, X, Y)  A.accu_mul(X, Y)
+#define ACCU_SQR(A, X)     A.accu_sqr(X)
+
+//
+// Double precision floating point math
+// - Works with up to 4th + 6th order IIR filters
+// - With 2 x 6th order filters, usees up 122ms from 125ms available
+//   on signle core.
+// - Ask Espressif nicely to enable, integrate or otherwise publish the
+//   'double-precission accelerator instructions' with appropriate libraries
+//   (silicon should be in the ESP32 cores), to get ~2x improvement in DP-FPU speed.
+//
+//#define IIR_ACCU_T         double
+//#define IIR_BASE_T         double
+//#define ACCU_MUL(A, X, Y)  A += (X) * (Y)
+//#define ACCU_SQR(A, X)     A += (X) * (X)
+
+// NOTE: IIR filters with single-precision floats may
+//       produce errors (>19db) for low frequencies below 40Hz
+//       (i.e. in A-weighting filter)
+
+/******************************************************************************/
+
+#define IIR_FILTER         IIRFilter<IIR_ACCU_T, IIR_BASE_T>
+
+/******************************************************************************
+ * Fixed point 64.32 and 32.32
+ * - 32bit platforms don't get __int128 in GCC
+ * - Minimalistic implementation (only what is needed for IIRFilter)
+ ******************************************************************************/
+
+#define INLINE            __attribute__((optimize("-O3"), always_inline)) inline
+#define high_word(x)      (x >> 32)
+#define low_word(x)       (uint32_t)x
+
+struct fixed_point_32_32;
+
+struct fixed_point_64_32 {
+  int64_t v;
+  uint32_t f;
+
+  fixed_point_64_32(): v(0), f(0) {};
+  fixed_point_64_32(double d) { v = d; int64_t r = d * 0x100000000LL; f = low_word(r); };
+  explicit operator double() const { return (1.0 * f / 0x100000000LL) + v; }
+
+  fixed_point_64_32& operator += (const fixed_point_64_32& a);
+  void accu_mul(const fixed_point_32_32 a, const fixed_point_32_32 b);
+  void accu_sqr(const fixed_point_32_32 a);
+};
+
+struct fixed_point_32_32 {
+  int32_t v;
+  uint32_t f;
+  fixed_point_32_32(): v(0), f(0) {};
+  fixed_point_32_32(int32_t i): v(i), f(0) {};
+  fixed_point_32_32(long long i) { v = high_word(i); f = low_word(i); };
+  fixed_point_32_32(long long unsigned i) { v = high_word(i); f = low_word(i); };
+  fixed_point_32_32(double d) { int64_t i = d * 0x100000000LL; v = high_word(i); f = low_word(i); };
+  fixed_point_32_32(fixed_point_64_32 fp) { v = fp.v; f = fp.f; };
+  explicit operator int64_t() const { return (int64_t(v) << 32) + f; };
+  explicit operator double() const { return (1.0 * f / 0x100000000LL) + v; }
+};
+
+// Assume we have MUL32_HIGH on ESP32
+#define mul32x32uu(A, B)  ((uint64_t)(A) * (B))
+#define mul32x32su(A, B)  ((int64_t)(A) * (B))
+#define mul32x32ss(A, B)  ((int64_t)(A) * (B))
+
+/*
+ * Fixed point 64.32 - Add a
+ */
+INLINE fixed_point_64_32& fixed_point_64_32::operator += (const fixed_point_64_32& a) {
+  uint64_t sumf = f + a.f;
+  f = low_word(sumf);
+  v += a.v + high_word(sumf);
+  return *this;
+}
+
+/*
+ * Fixed point 64.32 - Multiply a * b and accumulatee
+ */
+INLINE void fixed_point_64_32::accu_mul(const fixed_point_32_32 a, const fixed_point_32_32 b) {
+  int64_t value = mul32x32ss(a.v, b.v);
+  uint64_t fract = high_word(mul32x32uu(a.f, b.f)); // Discarding 32 LSB bits here
+  int64_t p = mul32x32su(a.v, b.f); // a.v * b.f
+  fract += low_word(p);
+  value += high_word(p);
+  p = mul32x32su(b.v, a.f) ; // b.v * a.f
+  fract += low_word(p);
+  value += high_word(p);
+  fract += f; // accumulate with carry
+  f = low_word(fract);
+  v += value + high_word(fract);
+}
+
+/*
+ * Fixed point 64.32, trades one 32x32 multiplication for two shifts
+ */
+INLINE void fixed_point_64_32::accu_sqr(const fixed_point_32_32 a) {
+  int64_t value = mul32x32ss(a.v, a.v);
+  uint64_t fract = high_word(mul32x32uu(a.f, a.f)); // Discarding 32 LSB bits here
+  int64_t p = mul32x32su(a.v, a.f); // a.v * a.f
+  fract += (low_word(p) << 1);
+  value += (high_word(p) << 1);
+  fract += f;
+  f = low_word(fract);
+  v += value + high_word(fract);
+}
+
+/******************************************************************************
+ * IIR Filter implementation, direct form I, template
+ ******************************************************************************/
+template<typename AccuT, typename CoeffT>
 class IIRFilter {
   public:  
     template <size_t B, size_t A>    
-    IIRFilter(const float (&b)[B], const float (&_a)[A]) : lenB(B), lenA(A-1) {
-      x = new float[lenB]();
-      y = new float[lenA]();
-      coeff_b = new float[2*lenB-1];
-      coeff_a = new float[2*lenA-1];
-      float a0 = _a[0];
-      const float *a = &_a[1];
-      for (uint8_t i = 0; i < 2*lenB-1; i++) coeff_b[i] = b[(2*lenB - 1 - i) % lenB] / a0;      
-      for (uint8_t i = 0; i < 2*lenA-1; i++) coeff_a[i] = a[(2*lenA - 2 - i) % lenA] / a0;
+    IIRFilter(const double (&b)[B], const double (&_a)[A]) : lenB(B), lenA(A-1) {
+      x = new CoeffT[lenB]();
+      y = new CoeffT[lenA]();
+      coeff_b = new CoeffT[2*lenB-1];
+      coeff_a = new CoeffT[2*lenA-1];
+      double a0 = -1 / _a[0];
+      const double *a = &_a[1];
+      for (uint8_t i = 0; i < 2*lenB-1; i++) coeff_b[i] = b[(2*lenB - 1 - i) % lenB] * a0;
+      for (uint8_t i = 0; i < 2*lenA-1; i++) coeff_a[i] = a[(2*lenA - 2 - i) % lenA] * a0;
     }
 
     ~IIRFilter() {
@@ -43,16 +169,14 @@ class IIRFilter {
       delete[] coeff_a;
       delete[] coeff_b;
     }
-    
-    inline float filter(float value) __attribute__((optimize("-O3"))) {
-      float b_terms = 0;
+
+    INLINE CoeffT filter(CoeffT value) {
+      AccuT filtered = 0;
       x[i_b] = value;
-      float *b_shift = &coeff_b[lenB - i_b - 1];
-      for(uint8_t i = 0; i < lenB; i++) b_terms += x[i] * b_shift[i];      
-      float a_terms = 0;
-      float *a_shift = &coeff_a[lenA - i_a - 1];
-      for(uint8_t i = 0; i < lenA; i++) a_terms += y[i] * a_shift[i];
-      float filtered = b_terms - a_terms;
+      CoeffT *b_shift = &coeff_b[lenB - i_b - 1];
+      for(uint8_t i = 0; i < lenB; i++) ACCU_MUL(filtered, x[i], b_shift[i]);
+      CoeffT *a_shift = &coeff_a[lenA - i_a - 1];
+      for(uint8_t i = 0; i < lenA; i++) ACCU_MUL(filtered, y[i], a_shift[i]);
       y[i_a] = filtered;
       i_b++;
       if(i_b == lenB) i_b = 0;
@@ -61,8 +185,8 @@ class IIRFilter {
       return filtered;
     }
 
-  private:
+  protected:
     const uint8_t lenB, lenA;
     uint8_t i_b = 0, i_a = 0;
-    float *x, *y, *coeff_b, *coeff_a;
+    CoeffT *x, *y, *coeff_b, *coeff_a;
 };