From 1e6896f918383854ec5a74e860dd7ed58671cae5 Mon Sep 17 00:00:00 2001
From: George Norton <30636555+george-norton@users.noreply.github.com>
Date: Wed, 23 Aug 2023 20:47:01 +0100
Subject: [PATCH] Enable oratory's 15 band EQ (#23)

* Attempt at optimizing so the new filtering runs better.

* Additional improvements.

* Further optimisations.

* Seems to work OK with 10 filters. Just noise with 11.

* Increase config buffer size, and make the bqf_transform function inline

* Remove extra loop and process input evently across both cores.

* Enable 15 band EQ.

* Shift some load of core1

* Revert buffer size change

* Mark USB transfer as done sooner.

* Fast multiply.

* Fix build failure.

* Rollback changes we dont need.

* Fix save config to flash

* Increase filter stages to 20. We cant quite run that many though.

* Replace a few doubles with floats. According to the raspberry-pi-pico-c-sdk manual, doubles are around 3 times slower than floats.
---
 firmware/code/CMakeLists.txt          |  4 +-
 firmware/code/bqf.c                   | 15 -----
 firmware/code/bqf.h                   |  9 +--
 firmware/code/bqf.inl                 | 36 +++++++++++
 firmware/code/configuration_manager.c | 37 +++++++-----
 firmware/code/configuration_types.h   | 25 +++++---
 firmware/code/fix16.h                 | 19 ++----
 firmware/code/{fix16.c => fix16.inl}  | 86 ++++++++-------------------
 firmware/code/i2s.h                   |  2 +-
 firmware/code/ringbuf.c               |  2 +-
 firmware/code/ringbuf.h               |  2 +-
 firmware/code/run.c                   | 29 ++++-----
 12 files changed, 129 insertions(+), 137 deletions(-)
 create mode 100644 firmware/code/bqf.inl
 rename firmware/code/{fix16.c => fix16.inl} (66%)

diff --git a/firmware/code/CMakeLists.txt b/firmware/code/CMakeLists.txt
index aae60e4..7c12e85 100644
--- a/firmware/code/CMakeLists.txt
+++ b/firmware/code/CMakeLists.txt
@@ -14,7 +14,6 @@ add_executable(ploopy_headphones
     run.c
     ringbuf.c
     i2s.c
-    fix16.c
     bqf.c
     configuration_manager.c
 )
@@ -63,6 +62,9 @@ target_compile_definitions(ploopy_headphones PRIVATE
     GIT_HASH="${GIT_HASH}"
 
     PICO_XOSC_STARTUP_DELAY_MULTIPLIER=64
+
+    # Performance, avoid calls to ____wrap___aeabi_lmul_veneer when doing 64bit multiplies
+    PICO_INT64_OPS_IN_RAM=1
 )
 
 pico_enable_stdio_usb(ploopy_headphones 0)
diff --git a/firmware/code/bqf.c b/firmware/code/bqf.c
index 21b67fb..191137c 100644
--- a/firmware/code/bqf.c
+++ b/firmware/code/bqf.c
@@ -467,21 +467,6 @@ void bqf_highshelf_config(double fs, double f0, double dBgain, double Q,
     coefficients->a2 = fix3_28_from_dbl(a2);
 }
 
-fix3_28_t bqf_transform(fix3_28_t x, bqf_coeff_t *coefficients, bqf_mem_t *memory) {
-    fix3_28_t y = fix16_mul(coefficients->b0, x) -
-            fix16_mul(coefficients->a1, memory->y_1) +
-            fix16_mul(coefficients->b1, memory->x_1) -
-            fix16_mul(coefficients->a2, memory->y_2) +
-            fix16_mul(coefficients->b2, memory->x_2);
-
-    memory->x_2 = memory->x_1;
-    memory->x_1 = x;
-    memory->y_2 = memory->y_1;
-    memory->y_1 = y;
-
-    return y;
-}
-
 void bqf_memreset(bqf_mem_t *memory) {
     memory->x_1 = fix16_zero;
     memory->x_2 = fix16_zero;
diff --git a/firmware/code/bqf.h b/firmware/code/bqf.h
index dcdc038..5a38ccb 100644
--- a/firmware/code/bqf.h
+++ b/firmware/code/bqf.h
@@ -41,9 +41,9 @@ typedef struct _bqf_mem_t {
     fix3_28_t y_2;
 } bqf_mem_t;
 
-// In reality we do not have enough CPU resource to run 8 filtering
-// stages without some optimisation.
-#define MAX_FILTER_STAGES 8
+// More filters should be possible, but the config structure
+// might grow beyond the current 512 byte size.
+#define MAX_FILTER_STAGES 20
 extern int filter_stages;
 
 extern bqf_coeff_t bqf_filters_left[MAX_FILTER_STAGES];
@@ -65,7 +65,8 @@ void bqf_peaking_config(double, double, double, double, bqf_coeff_t *);
 void bqf_lowshelf_config(double, double, double, double, bqf_coeff_t *);
 void bqf_highshelf_config(double, double, double, double, bqf_coeff_t *);
 
-fix3_28_t bqf_transform(fix3_28_t, bqf_coeff_t *, bqf_mem_t *);
+static inline fix3_28_t bqf_transform(fix3_28_t, bqf_coeff_t *, bqf_mem_t *);
 void bqf_memreset(bqf_mem_t *);
 
+#include "bqf.inl"
 #endif
diff --git a/firmware/code/bqf.inl b/firmware/code/bqf.inl
new file mode 100644
index 0000000..5fafdf5
--- /dev/null
+++ b/firmware/code/bqf.inl
@@ -0,0 +1,36 @@
+/**
+ * Copyright 2022 Colin Lam, Ploopy Corporation
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ * SPECIAL THANKS TO:
+ * Robert Bristow-Johnson, a.k.a. RBJ
+ * for his exceptional work on biquad formulae as applied to digital
+ * audio filtering, summarised in his pamphlet, "Audio EQ Cookbook".
+ */
+
+static inline fix3_28_t bqf_transform(fix3_28_t x, bqf_coeff_t *coefficients, bqf_mem_t *memory) {
+    fix3_28_t y = fix16_mul(coefficients->b0, x) -
+            fix16_mul(coefficients->a1, memory->y_1) +
+            fix16_mul(coefficients->b1, memory->x_1) -
+            fix16_mul(coefficients->a2, memory->y_2) +
+            fix16_mul(coefficients->b2, memory->x_2);
+
+    memory->x_2 = memory->x_1;
+    memory->x_1 = x;
+    memory->y_2 = memory->y_1;
+    memory->y_1 = y;
+
+    return y;
+}
\ No newline at end of file
diff --git a/firmware/code/configuration_manager.c b/firmware/code/configuration_manager.c
index f030521..e17728a 100644
--- a/firmware/code/configuration_manager.c
+++ b/firmware/code/configuration_manager.c
@@ -52,16 +52,23 @@ static const default_configuration default_config = {
     .set_configuration = { SET_CONFIGURATION, sizeof(default_config) },
     .filters = {
         .filter = { FILTER_CONFIGURATION, sizeof(default_config.filters) },
-        .f1 = { PEAKING,    {0},    40,   -20,  1.4 },
-        .f2 = { LOWSHELF,   {0},    105,  2.5,  0.7 },
-        .f3 = { PEAKING,    {0},    450,  7,    1.8 },
-        .f4 = { PEAKING,    {0},    2100, 8,    3.0 },
-        .f5 = { PEAKING,    {0},    3500, -7.5, 2.9 },
-        .f6 = { PEAKING,    {0},    5200, 5.5,  3.0 },
-        .f7 = { PEAKING,    {0},    6400, -19,  4.0 },
-        .f8 = { PEAKING,    {0},    9000, 3.0,  2.0 }
+        .f1  = { PEAKING,    {0},    38.5, -21.0,  1.4  },
+        .f2  = { PEAKING,    {0},    60,    -6.7,  0.5  },
+        .f3  = { LOWSHELF,   {0},    105,    5.5,  0.71 },
+        .f4  = { PEAKING,    {0},    280,   -3.5,  1.1  },
+        .f5  = { PEAKING,    {0},    350,   -1.6,  6.0  },
+        .f6  = { PEAKING,    {0},    425,    7.8,  1.3  },
+        .f7  = { PEAKING,    {0},    500,   -2.0,  7.0  },
+        .f8  = { PEAKING,    {0},    690,   -5.5,  3.0  },
+        .f9  = { PEAKING,    {0},   1000,   -2.2,  5.0  },
+        .f10 = { PEAKING,    {0},   1530,   -4.0,  2.5  },
+        .f11 = { PEAKING,    {0},   2250,    6.0,  2.0  },
+        .f12 = { PEAKING,    {0},   3430,  -12.2,  2.0  },
+        .f13 = { PEAKING,    {0},   4800,    4.0,  2.0  },
+        .f14 = { PEAKING,    {0},   6200,  -15.0,  3.0  },
+        .f15 = { HIGHSHELF,  {0},  12000,   -6.0,  0.71 }
     },
-    .preprocessing = { .header = { PREPROCESSING_CONFIGURATION, sizeof(default_config.preprocessing) }, -0.16f, true, {0} }
+    .preprocessing = { .header = { PREPROCESSING_CONFIGURATION, sizeof(default_config.preprocessing) }, -0.08f, true, {0} }
 };
 
 // Grab the last 4k page of flash for our configuration strutures.
@@ -74,7 +81,7 @@ const uint8_t *user_configuration = (const uint8_t *) (XIP_BASE + USER_CONFIGURA
  * should handle merging configurations where, for example, only a new
  * filter_configuration_tlv was received.
  */
-#define CFG_BUFFER_SIZE 256
+#define CFG_BUFFER_SIZE 512
 static uint8_t working_configuration[2][CFG_BUFFER_SIZE];
 static uint8_t inactive_working_configuration = 0;
 static uint8_t result_buffer[CFG_BUFFER_SIZE] = { U16_TO_U8S_LE(NOK), U16_TO_U8S_LE(0) };
@@ -129,7 +136,7 @@ bool validate_filter_configuration(filter_configuration_tlv *filters)
                 printf("Error! Not enough data left for filter6 (%d)\n", remaining);
                 return false;
             }
-            if (args->a0 == 0.0) {
+            if (args->a0 == 0.0f) {
                 printf("Error! The a0 co-efficient of an IIR filter must not be 0.\n");
                 return false;
             }
@@ -182,7 +189,7 @@ void apply_filter_configuration(filter_configuration_tlv *filters) {
                 uint32_t checksum = 0;
                 for (int i = 0; i < sizeof(filter6) / 4; i++) checksum ^= ((uint32_t*) args)[i];
                 if (checksum != bqf_filter_checksum[filter_stages]) {
-                    bqf_filters_left[filter_stages].a0 = fix3_28_from_dbl(1.0);
+                    bqf_filters_left[filter_stages].a0 = fix16_one;
                     bqf_filters_left[filter_stages].a1 = fix3_28_from_dbl(args->a1/args->a0);
                     bqf_filters_left[filter_stages].a2 = fix3_28_from_dbl(args->a2/args->a0);
                     bqf_filters_left[filter_stages].b0 = fix3_28_from_dbl(args->b0/args->a0);
@@ -308,7 +315,7 @@ bool apply_configuration(tlv_header *config) {
 #ifndef TEST_TARGET
             case PREPROCESSING_CONFIGURATION: {
                 preprocessing_configuration_tlv* preprocessing_config = (preprocessing_configuration_tlv*) tlv;
-                preprocessing.preamp = fix3_28_from_dbl(1.0 + preprocessing_config->preamp);
+                preprocessing.preamp = fix3_28_from_flt(1.0f + preprocessing_config->preamp);
                 preprocessing.reverse_stereo = preprocessing_config->reverse_stereo;
                 break;
             }
@@ -352,7 +359,7 @@ bool __no_inline_not_in_flash_func(save_configuration)() {
 
         const size_t config_length = config->length - ((size_t)config->value - (size_t)config);
         // Write data to flash
-        uint8_t flash_buffer[FLASH_PAGE_SIZE];
+        uint8_t flash_buffer[CFG_BUFFER_SIZE];
         flash_header_tlv* flash_header = (flash_header_tlv*) flash_buffer;
         flash_header->header.type = FLASH_HEADER;
         flash_header->header.length = sizeof(flash_header_tlv) + config_length;
@@ -362,7 +369,7 @@ bool __no_inline_not_in_flash_func(save_configuration)() {
 
         uint32_t ints = save_and_disable_interrupts();
         flash_range_erase(USER_CONFIGURATION_OFFSET, FLASH_SECTOR_SIZE);
-        flash_range_program(USER_CONFIGURATION_OFFSET, flash_buffer, FLASH_PAGE_SIZE);
+        flash_range_program(USER_CONFIGURATION_OFFSET, flash_buffer, CFG_BUFFER_SIZE);
         restore_interrupts(ints);
 
         power_up_dac();
diff --git a/firmware/code/configuration_types.h b/firmware/code/configuration_types.h
index a501246..051edd4 100644
--- a/firmware/code/configuration_types.h
+++ b/firmware/code/configuration_types.h
@@ -17,8 +17,8 @@
 #include <stdint.h>
 
 #define FLASH_MAGIC 0x2E8AFEDD
-#define CONFIG_VERSION 2
-#define MINIMUM_CONFIG_VERSION 1
+#define CONFIG_VERSION 3
+#define MINIMUM_CONFIG_VERSION 3
 
 enum structure_types {
     // Commands/Responses, these are container TLVs. The Value will be a set of TLV structures.
@@ -53,18 +53,20 @@ typedef struct __attribute__((__packed__)) _tlv_header {
 typedef struct __attribute__((__packed__)) _filter2 {
     uint8_t type;
     uint8_t reserved[3];
-    double f0;
-    double Q;
+    float f0;
+    float Q;
 } filter2;
 
 typedef struct __attribute__((__packed__)) _filter3 {
     uint8_t type;
     uint8_t reserved[3];
-    double f0;
-    double db_gain;
-    double Q;
+    float f0;
+    float db_gain;
+    float Q;
 } filter3;
 
+// WARNING: We wont be able to support more than 8 of these filters
+// due to the config structure size.
 typedef struct __attribute__((__packed__)) _filter6 {
     uint8_t type;
     uint8_t reserved[3];
@@ -98,7 +100,7 @@ typedef struct __attribute__((__packed__)) _flash_header_tlv {
 
 typedef struct __attribute__((__packed__)) _preprocessing_configuration_tlv {
     tlv_header header;
-    double preamp;
+    float preamp;
     uint8_t reverse_stereo;
     uint8_t reserved[3];
 } preprocessing_configuration_tlv;
@@ -137,6 +139,13 @@ typedef struct __attribute__((__packed__)) _default_configuration {
         filter3 f6;
         filter3 f7;
         filter3 f8;
+        filter3 f9;
+        filter3 f10;
+        filter3 f11;
+        filter3 f12;
+        filter3 f13;
+        filter3 f14;
+        filter3 f15;
     } filters;
     preprocessing_configuration_tlv preprocessing;
 } default_configuration;
diff --git a/firmware/code/fix16.h b/firmware/code/fix16.h
index 40acced..66799ad 100644
--- a/firmware/code/fix16.h
+++ b/firmware/code/fix16.h
@@ -25,13 +25,6 @@
 #include <stdbool.h>
 #include <inttypes.h>
 
-// During development, it can be useful to run with real double values for reference.
-//#define USE_DOUBLE
-#ifdef USE_DOUBLE
-typedef double fix16_t;
-static const fix16_t fix16_zero = 0;
-static const fix16_t fix16_one = 1;
-#else
 
 /// @brief Fixed point math type, in format Q3.28. One sign bit, 3 bits for left-of-decimal
 ///and 28 for right-of-decimal. This arrangment works because we normalize the incoming USB
@@ -46,15 +39,15 @@ static const fix3_28_t fix16_one =    0x10000000;
 /// @brief Represents zero in fixed point world.
 static const fix3_28_t fix16_zero = 0x00000000;
 
-#endif
+static inline fix3_28_t norm_fix3_28_from_s16sample(int16_t);
 
+static inline int16_t norm_fix3_28_to_s16sample(fix3_28_t);
 
-fix3_28_t norm_fix3_28_from_s16sample(int16_t);
+static inline fix3_28_t fix3_28_from_flt(float);
 
-int16_t norm_fix3_28_to_s16sample(fix3_28_t);
+static inline fix3_28_t fix3_28_from_dbl(double);
 
-fix3_28_t fix3_28_from_dbl(double);
-
-fix3_28_t fix16_mul(fix3_28_t, fix3_28_t);
+static inline fix3_28_t fix16_mul(fix3_28_t, fix3_28_t);
 
+#include "fix16.inl"
 #endif
\ No newline at end of file
diff --git a/firmware/code/fix16.c b/firmware/code/fix16.inl
similarity index 66%
rename from firmware/code/fix16.c
rename to firmware/code/fix16.inl
index 1695ed6..ca45a2e 100644
--- a/firmware/code/fix16.c
+++ b/firmware/code/fix16.inl
@@ -25,46 +25,10 @@
 #include <limits.h>
 #include "fix16.h"
 
-#ifdef USE_DOUBLE
-fix16_t fix16_from_s16sample(int16_t a) {
-    return a;
-}
-
-int16_t fix16_to_s16sample(fix16_t a) {
-    // Handle rounding up front, adding one can cause an overflow/underflow
-    if (a < 0) {
-        a -= 0.5;
-    } else {
-        a += 0.5;
-    }
-
-    // Saturate the value if an overflow has occurred
-    if (a < SHRT_MIN) {
-        return SHRT_MIN;
-    }
-    if (a < SHRT_MAX) {
-        return SHRT_MAX;
-    }
-    return a;
-}
-
-fix16_t fix16_from_dbl(double a) {
-    return a;
-}
-
-double fix16_to_dbl(fix16_t a) {
-    return a;
-}
-
-fix16_t fix16_mul(fix16_t inArg0, fix16_t inArg1) {
-    return inArg0 * inArg1;
-}
-#else
-
 /// @brief Produces a fixed point number from a 16-bit signed integer, normalized to ]-1,1[.
 /// @param a Signed 16-bit integer.
 /// @return A fixed point number in Q3.28 format, with input normalized to ]-1,1[.
-fix3_28_t norm_fix3_28_from_s16sample(int16_t a) {
+static inline fix3_28_t norm_fix3_28_from_s16sample(int16_t a) {
     /* So, we're using a Q3.28 fixed point system here, and we want the incoming
        audio signal to be represented as a number between -1 and 1. To do this,
        we need the 16-bit value to map to the 28-bit right-of-decimal field in
@@ -79,7 +43,7 @@ fix3_28_t norm_fix3_28_from_s16sample(int16_t a) {
 ///        calculated sample to one that the DAC can understand.
 /// @param a
 /// @return Signed 16-bit integer.
-int16_t norm_fix3_28_to_s16sample(fix3_28_t a) {
+static inline int16_t norm_fix3_28_to_s16sample(fix3_28_t a) {
     // Handle rounding up front, adding one can cause an overflow/underflow
 
     // It's not clear exactly how this works, so we'll disable it for now.
@@ -110,8 +74,13 @@ int16_t norm_fix3_28_to_s16sample(fix3_28_t a) {
     return (a >> 12);
 }
 
+static inline fix3_28_t fix3_28_from_flt(float a) {
+    float temp = a * fix16_one;
+    temp += ((temp >= 0) ? 0.5f : -0.5f);
+    return (fix3_28_t)temp;
+}
 
-fix3_28_t fix3_28_from_dbl(double a) {
+static inline fix3_28_t fix3_28_from_dbl(double a) {
     double temp = a * fix16_one;
     temp += (double)((temp >= 0) ? 0.5f : -0.5f);
     return (fix3_28_t)temp;
@@ -121,27 +90,22 @@ fix3_28_t fix3_28_from_dbl(double a) {
 /// @param inArg0 Q3.28 format fixed point number.
 /// @param inArg1 Q3.28 format fixed point number.
 /// @return A Q3.28 fixed point number that represents the truncated result of inArg0 x inArg1.
-fix3_28_t fix16_mul(fix3_28_t inArg0, fix3_28_t inArg1) {
-    const int64_t product = (int64_t)inArg0 * inArg1;
+static inline fix3_28_t fix16_mul(fix3_28_t inArg0, fix3_28_t inArg1) {
+    int32_t A = (inArg0 >> 14), C = (inArg1 >> 14);
+    uint32_t B = (inArg0 & 0x3FFF), D = (inArg1 & 0x3FFF);
+    int32_t AC = A*C;
+    int32_t AD_CB = A*D + C*B;
+    int32_t product_hi = AC + (AD_CB >> 14);
 
-    /* Since we're expecting 2 Q3.28 numbers, the multiplication result should be a Q7.56 number.
-       To bring this number back to the right order of magnitude, we need to shift
-       it to the right by 28. */
-    fix3_28_t result = product >> 28;
+#if HANDLE_CARRY
+	// Handle carry from lower bits to upper part of result.
+    uint32_t BD = B*D;
+	uint32_t ad_cb_temp = AD_CB << 14;
+	uint32_t product_lo = BD + ad_cb_temp;
 
-    // Handle rounding where we are choppping off low order bits
-    // Disabled for now, too much load. We get crackling when adjusting
-    // the volume.
-    #if 0
-    if (product & 0x4000) {
-        if (result >= 0) {
-            result++;
-        }
-        else {
-            result--;
-        }
-    }
-    #endif
-    return result;
-}
-#endif
\ No newline at end of file
+	if (product_lo < BD)
+		product_hi++;
+#endif
+
+    return product_hi;
+}
\ No newline at end of file
diff --git a/firmware/code/i2s.h b/firmware/code/i2s.h
index ca30353..432b8e9 100644
--- a/firmware/code/i2s.h
+++ b/firmware/code/i2s.h
@@ -70,4 +70,4 @@ void feed_dma(i2s_obj_t *, uint8_t *);
 
 uint32_t copy_userbuf_to_ringbuf(i2s_obj_t *, const uint8_t *, uint);
 
-#endif
\ No newline at end of file
+#endif
diff --git a/firmware/code/ringbuf.c b/firmware/code/ringbuf.c
index b6399f3..4c417c5 100644
--- a/firmware/code/ringbuf.c
+++ b/firmware/code/ringbuf.c
@@ -78,4 +78,4 @@ size_t ringbuf_available_data(ring_buf_t *rbuf) {
 
 size_t ringbuf_available_space(ring_buf_t *rbuf) {
     return rbuf->size - ringbuf_available_data(rbuf) - 1;
-}
\ No newline at end of file
+}
diff --git a/firmware/code/ringbuf.h b/firmware/code/ringbuf.h
index 5e1cbcc..8543179 100644
--- a/firmware/code/ringbuf.h
+++ b/firmware/code/ringbuf.h
@@ -42,4 +42,4 @@ bool ringbuf_is_full(ring_buf_t *);
 size_t ringbuf_available_data(ring_buf_t *);
 size_t ringbuf_available_space(ring_buf_t *);
 
-#endif
\ No newline at end of file
+#endif
diff --git a/firmware/code/run.c b/firmware/code/run.c
index 0920b1b..3a33f89 100644
--- a/firmware/code/run.c
+++ b/firmware/code/run.c
@@ -118,7 +118,7 @@ static void update_volume()
 // PCM data into I2S data that gets shipped out to the PCM3060. It really
 // belongs with the other USB-related code due to its utter indecipherability,
 // but it's placed here to emphasize its importance.
-static void _as_audio_packet(struct usb_endpoint *ep) {
+static void __no_inline_not_in_flash_func(_as_audio_packet)(struct usb_endpoint *ep) {
     struct usb_buffer *usb_buffer = usb_current_out_packet_buffer(ep);
     int16_t *in = (int16_t *) usb_buffer->data;
     int32_t *out = (int32_t *) userbuf;
@@ -156,15 +156,22 @@ static void _as_audio_packet(struct usb_endpoint *ep) {
     uint32_t ready = multicore_fifo_pop_blocking();
     multicore_fifo_push_blocking(CORE0_READY);
 
+    // Update the volume if required. We do this from core1 as
+    // core0 is more heavily loaded, doing this from core0 can
+    // lead to audio crackling.
+    update_volume();
+
+    // Update filters if required
+    apply_config_changes();
+
     // keep on truckin'
     usb_grow_transfer(ep->current_transfer, 1);
     usb_packet_done(ep);
 }
 
-void core1_entry() {
+void __no_inline_not_in_flash_func(core1_entry)() {
     uint8_t *userbuf = (uint8_t *) multicore_fifo_pop_blocking();
     int32_t *out = (int32_t *) userbuf;
-    int limit_counter = 100;
 
     // Signal that the thread has started
     multicore_fifo_push_blocking(CORE1_READY);
@@ -191,19 +198,6 @@ void core1_entry() {
             }
         }
 
-        // Update the volume and filter configs if required. We do this from
-        // core1 as core0 is more heavily loaded, doing this from core0 can
-        // lead to audio crackling.
-        // Use of a counter reduces the amount of crackling when changing
-        // volume.
-        if (limit_counter != 0)
-            limit_counter--;
-        else {
-            limit_counter = 100;
-            update_volume();
-            apply_config_changes();
-        }
-
         // Signal to core 0 that the data has all been transformed
         multicore_fifo_push_blocking(CORE1_READY);
 
@@ -253,7 +247,7 @@ void setup() {
     // The PCM3060 supports standard mode (100kbps) or fast mode (400kbps)
     // we run in fast mode so we dont block the core for too long while
     // updating the volume.
-    i2c_init(i2c0, 100000);
+    i2c_init(i2c0, 400000);
     gpio_set_function(PCM3060_SDA_PIN, GPIO_FUNC_I2C);
     gpio_set_function(PCM3060_SCL_PIN, GPIO_FUNC_I2C);
     gpio_pull_up(PCM3060_SDA_PIN);
@@ -297,6 +291,7 @@ void setup() {
  * IF YOU DO, YOU COULD BLOW UP YOUR HARDWARE!                               *
  * YOU WERE WARNED!!!!!!!!!!!!!!!!                                           *
  ****************************************************************************/
+ // TODO: roundf will be much faster than round, but it might mess with timings
 void configure_neg_switch_pwm() {
     gpio_set_function(NEG_SWITCH_PWM_PIN, GPIO_FUNC_PWM);
     uint slice_num = pwm_gpio_to_slice_num(NEG_SWITCH_PWM_PIN);