Seems to work OK with 10 filters. Just noise with 11.

2023-08-19 00:44:00 +01:00 · 2023-08-19 00:44:00 +01:00 · 635dac208a
parent d2bb88a9fb
commit 635dac208a
9 changed files with 52 additions and 123 deletions
--- a/firmware/code/CMakeLists.txt
+++ b/firmware/code/CMakeLists.txt
@ -14,7 +14,6 @@ add_executable(ploopy_headphones
    run.c
    ringbuf.c
    i2s.c
    fix16.c
    bqf.c
    configuration_manager.c
 )
--- a/firmware/code/bqf.h
+++ b/firmware/code/bqf.h
@ -43,7 +43,7 @@ typedef struct _bqf_mem_t {
 // In reality we do not have enough CPU resource to run 8 filtering
 // stages without some optimisation.
-#define MAX_FILTER_STAGES 8
+#define MAX_FILTER_STAGES 10
 extern int filter_stages;
 extern bqf_coeff_t bqf_filters_left[MAX_FILTER_STAGES];
--- a/firmware/code/fix16.h
+++ b/firmware/code/fix16.h
@ -25,13 +25,6 @@
 #include <stdbool.h>
 #include <inttypes.h>
 // During development, it can be useful to run with real double values for reference.
 //#define USE_DOUBLE
 #ifdef USE_DOUBLE
 typedef double fix16_t;
 static const fix16_t fix16_zero = 0;
 static const fix16_t fix16_one = 1;
 #else
 /// @brief Fixed point math type, in format Q3.28. One sign bit, 3 bits for left-of-decimal
 ///and 28 for right-of-decimal. This arrangment works because we normalize the incoming USB
@ -46,15 +39,13 @@ static const fix3_28_t fix16_one =    0x10000000;
 /// @brief Represents zero in fixed point world.
 static const fix3_28_t fix16_zero = 0x00000000;
-#endif
+static inline fix3_28_t norm_fix3_28_from_s16sample(int16_t);
 static inline int16_t norm_fix3_28_to_s16sample(fix3_28_t);
-fix3_28_t norm_fix3_28_from_s16sample(int16_t);
+static inline fix3_28_t fix3_28_from_dbl(double);
-int16_t norm_fix3_28_to_s16sample(fix3_28_t);
+static inline fix3_28_t fix16_mul(fix3_28_t, fix3_28_t);
 fix3_28_t fix3_28_from_dbl(double);
 fix3_28_t fix16_mul(fix3_28_t, fix3_28_t);
 #include "fix16.inl"
 #endif
--- a/firmware/code/fix16.inl
+++ b/firmware/code/fix16.inl
@ -25,46 +25,10 @@
 #include <limits.h>
 #include "fix16.h"
 #ifdef USE_DOUBLE
 fix16_t fix16_from_s16sample(int16_t a) {
    return a;
 }
 int16_t fix16_to_s16sample(fix16_t a) {
    // Handle rounding up front, adding one can cause an overflow/underflow
    if (a < 0) {
        a -= 0.5;
    } else {
        a += 0.5;
    }
    // Saturate the value if an overflow has occurred
    if (a < SHRT_MIN) {
        return SHRT_MIN;
    }
    if (a < SHRT_MAX) {
        return SHRT_MAX;
    }
    return a;
 }
 fix16_t fix16_from_dbl(double a) {
    return a;
 }
 double fix16_to_dbl(fix16_t a) {
    return a;
 }
 fix16_t fix16_mul(fix16_t inArg0, fix16_t inArg1) {
    return inArg0 * inArg1;
 }
 #else
 /// @brief Produces a fixed point number from a 16-bit signed integer, normalized to ]-1,1[.
 /// @param a Signed 16-bit integer.
 /// @return A fixed point number in Q3.28 format, with input normalized to ]-1,1[.
-fix3_28_t norm_fix3_28_from_s16sample(int16_t a) {
+static inline fix3_28_t norm_fix3_28_from_s16sample(int16_t a) {
    /* So, we're using a Q3.28 fixed point system here, and we want the incoming
       audio signal to be represented as a number between -1 and 1. To do this,
       we need the 16-bit value to map to the 28-bit right-of-decimal field in
@ -79,7 +43,7 @@ fix3_28_t norm_fix3_28_from_s16sample(int16_t a) {
 ///        calculated sample to one that the DAC can understand.
 /// @param a
 /// @return Signed 16-bit integer.
-int16_t norm_fix3_28_to_s16sample(fix3_28_t a) {
+static inline int16_t norm_fix3_28_to_s16sample(fix3_28_t a) {
    // Handle rounding up front, adding one can cause an overflow/underflow
    // It's not clear exactly how this works, so we'll disable it for now.
@ -110,8 +74,7 @@ int16_t norm_fix3_28_to_s16sample(fix3_28_t a) {
    return (a >> 12);
 }
-
+static inline fix3_28_t fix3_28_from_dbl(double a) {
 fix3_28_t fix3_28_from_dbl(double a) {
    double temp = a * fix16_one;
    temp += (double)((temp >= 0) ? 0.5f : -0.5f);
    return (fix3_28_t)temp;
@ -121,7 +84,7 @@ fix3_28_t fix3_28_from_dbl(double a) {
 /// @param inArg0 Q3.28 format fixed point number.
 /// @param inArg1 Q3.28 format fixed point number.
 /// @return A Q3.28 fixed point number that represents the truncated result of inArg0 x inArg1.
-fix3_28_t fix16_mul(fix3_28_t inArg0, fix3_28_t inArg1) {
+static inline fix3_28_t fix16_mul(fix3_28_t inArg0, fix3_28_t inArg1) {
    const int64_t product = (int64_t)inArg0 * inArg1;
    /* Since we're expecting 2 Q3.28 numbers, the multiplication result should be a Q7.56 number.
@ -143,5 +106,4 @@ fix3_28_t fix16_mul(fix3_28_t inArg0, fix3_28_t inArg1) {
    }
    #endif
    return result;
-}
+}
 #endif
--- a/firmware/code/i2s.c
+++ b/firmware/code/i2s.c
@ -64,7 +64,7 @@ void i2s_write_init(i2s_obj_t *self) {
            self->prog_offset + self->pio_program->length - 1);
    pio_sm_set_config(self->pio, self->sm, &config);
-    uint8_t *rbs = malloc(sizeof(uint8_t) * RINGBUF_LEN_IN_BYTES);
+    uint32_t *rbs = malloc(sizeof(uint8_t) * RINGBUF_LEN_IN_BYTES);
    ringbuf_init(&self->ring_buffer, rbs, RINGBUF_LEN_IN_BYTES);
    irq_set_exclusive_handler(DMA_IRQ_1, dma_irq_write_handler);
@ -169,27 +169,27 @@ uint8_t *dma_get_buffer(i2s_obj_t *i2s_obj, uint channel) {
 void feed_dma(i2s_obj_t *self, uint8_t *dma_buffer_p) {
    // when data exists, copy samples from ring buffer
    if (ringbuf_available_data(&self->ring_buffer) >= SIZEOF_HALF_DMA_BUFFER_IN_BYTES) {
-        for (uint32_t i = 0; i < SIZEOF_HALF_DMA_BUFFER_IN_BYTES; i++)
+        for (uint32_t i = 0; i < SIZEOF_HALF_DMA_BUFFER_IN_BYTES; i+=4)
-            ringbuf_pop(&self->ring_buffer, &dma_buffer_p[i]);
+            ringbuf_pop(&self->ring_buffer, (uint32_t*)&dma_buffer_p[i]);
    } else {
        // underflow.  clear buffer to transmit "silence" on the I2S bus
        memset(dma_buffer_p, 0, SIZEOF_HALF_DMA_BUFFER_IN_BYTES);
    }
 }
-uint i2s_stream_write(i2s_obj_t *self, const uint8_t *buf_out, uint size) {
+uint i2s_stream_write(i2s_obj_t *self, const uint32_t *buf_out, uint size) {
    if (size == 0) {
        //printf("ERROR: buffer can't be length zero");
        exit(1);
    }
-    uint32_t num_bytes_written = copy_userbuf_to_ringbuf(self, buf_out, size);
+    uint32_t num_words_written = copy_userbuf_to_ringbuf(self, buf_out, size);
-    return num_bytes_written;
+    return num_words_written;
 }
 // TODO maybe we can skip every fourth byte, if we're doing this in 24-bit...
 // could save on some processing power
-uint32_t copy_userbuf_to_ringbuf(i2s_obj_t *self, const uint8_t *buf_out, uint size) {
+uint32_t copy_userbuf_to_ringbuf(i2s_obj_t *self, const uint32_t *buf_out, uint size) {
    uint32_t a_index = 0;
    while (a_index < size) {
--- a/firmware/code/i2s.h
+++ b/firmware/code/i2s.h
@ -59,7 +59,7 @@ typedef struct _i2s_obj_t {
 extern i2s_obj_t i2s_write_obj;
 void i2s_write_init(i2s_obj_t *);
-uint i2s_stream_write(i2s_obj_t *, const uint8_t *, uint);
+uint i2s_stream_write(i2s_obj_t *, const uint32_t *, uint);
 void dma_irq_handler(uint8_t);
 void dma_irq_write_handler(void);
@ -68,6 +68,6 @@ void dma_configure(i2s_obj_t *);
 uint8_t *dma_get_buffer(i2s_obj_t *, uint);
 void feed_dma(i2s_obj_t *, uint8_t *);
-uint32_t copy_userbuf_to_ringbuf(i2s_obj_t *, const uint8_t *, uint);
+uint32_t copy_userbuf_to_ringbuf(i2s_obj_t *, const uint32_t *, uint);
 #endif
--- a/firmware/code/ringbuf.c
+++ b/firmware/code/ringbuf.c
@ -33,14 +33,14 @@
 // - Sequential atomic operations
 // One byte of capacity is used to detect buffer empty/full
-void ringbuf_init(ring_buf_t *rbuf, uint8_t *buffer, size_t size) {
+void ringbuf_init(ring_buf_t *rbuf, uint32_t *buffer, size_t size) {
    rbuf->buffer = buffer;
    rbuf->size = size;
    rbuf->head = 0;
    rbuf->tail = 0;
 }
-bool ringbuf_push(ring_buf_t *rbuf, uint8_t data) {
+bool ringbuf_push(ring_buf_t *rbuf, uint32_t data) {
    size_t next_tail = (rbuf->tail + 1) % rbuf->size;
    if (next_tail != rbuf->head) {
@ -53,7 +53,7 @@ bool ringbuf_push(ring_buf_t *rbuf, uint8_t data) {
    return false;
 }
-bool ringbuf_pop(ring_buf_t *rbuf, uint8_t *data) {
+bool ringbuf_pop(ring_buf_t *rbuf, uint32_t *data) {
    if (rbuf->head == rbuf->tail) {
        // empty
        return false;
--- a/firmware/code/ringbuf.h
+++ b/firmware/code/ringbuf.h
@ -28,15 +28,15 @@
 #include "pico/stdlib.h"
 typedef struct _ring_buf_t {
-    uint8_t *buffer;
+    uint32_t *buffer;
    size_t head;
    size_t tail;
    size_t size;
 } ring_buf_t;
-void ringbuf_init(ring_buf_t *, uint8_t *, size_t);
+void ringbuf_init(ring_buf_t *, uint32_t *, size_t);
-bool ringbuf_push(ring_buf_t *, uint8_t );
+bool ringbuf_push(ring_buf_t *, uint32_t );
-bool ringbuf_pop(ring_buf_t *, uint8_t *);
+bool ringbuf_pop(ring_buf_t *, uint32_t *);
 bool ringbuf_is_empty(ring_buf_t *);
 bool ringbuf_is_full(ring_buf_t *);
 size_t ringbuf_available_data(ring_buf_t *);
--- a/firmware/code/run.c
+++ b/firmware/code/run.c
@ -123,83 +123,60 @@ static void __no_inline_not_in_flash_func(_as_audio_packet)(struct usb_endpoint
    int16_t *in = (int16_t *) usb_buffer->data;
    int32_t *out = (int32_t *) userbuf;
    int samples = usb_buffer->data_len / 2;
- 
+
-    multicore_fifo_push_blocking(CORE0_READY);
+    // TODO: For some reason if we try to process in from both cores the left and right channels
-    multicore_fifo_push_blocking((uintptr_t) in);
+    // flip back and forth..
    if (preprocessing.reverse_stereo) {
        for (int i = 0; i < samples; i+=2) {
            out[i] = fix16_mul(norm_fix3_28_from_s16sample(in[i+1]), preprocessing.preamp);
            out[i+1] = fix16_mul(norm_fix3_28_from_s16sample(in[i]), preprocessing.preamp);
        }
    }
    else {
        for (int i = 0; i < samples; i++)
            out[i] = fix16_mul(norm_fix3_28_from_s16sample(in[i]), preprocessing.preamp);
    }
    multicore_fifo_push_blocking(samples);
    if (preprocessing.reverse_stereo) {
        in++;
    }
    for (int i = 0; i < samples; i += 2) {
        // Preamp the sample
        fix3_28_t x_f16 = fix16_mul(norm_fix3_28_from_s16sample((int16_t) in[i]), preprocessing.preamp);
        // Run the filters
        for (int j = 0; j < filter_stages; j++) {
-            x_f16 = bqf_transform(x_f16, &bqf_filters_left[j], &bqf_filters_mem_left[j]);
+            out[i] = bqf_transform(out[i], &bqf_filters_left[j], &bqf_filters_mem_left[j]);
        }
-        // Convert back to sample
+        out[i] = (int32_t) norm_fix3_28_to_s16sample(out[i]);
        out[i] = (int32_t) norm_fix3_28_to_s16sample(x_f16);
    }
    // Signal to core 1 that we have processed our samples, so it can write to I2S
    multicore_fifo_push_blocking(CORE0_READY);
    update_volume();
    apply_config_changes();
    // keep on truckin'
    usb_grow_transfer(ep->current_transfer, 1);
    usb_packet_done(ep);
 }
 void __no_inline_not_in_flash_func(core1_entry)() {
-    uint8_t *userbuf = (uint8_t *) multicore_fifo_pop_blocking();
+    uint32_t *userbuf = (uint32_t *) multicore_fifo_pop_blocking();
    int32_t *out = (int32_t *) userbuf;
    int limit_counter = 100;
    // Signal that the thread has started
    multicore_fifo_push_blocking(CORE1_READY);
    while (true) {
        // Block until the userbuf is filled with data
        uint32_t ready = multicore_fifo_pop_blocking();
        while (ready != CORE0_READY)
            ready = multicore_fifo_pop_blocking();
        int16_t *in = (int16_t *) multicore_fifo_pop_blocking();
        const uint32_t samples = multicore_fifo_pop_blocking();
        if (preprocessing.reverse_stereo) {
            in--;
        }
        for (int i = 1; i < samples; i += 2) {
            // Preamp the sample
            fix3_28_t x_f16 = fix16_mul(norm_fix3_28_from_s16sample((int16_t) in[i]), preprocessing.preamp);
            // Run the filters
            for (int j = 0; j < filter_stages; j++) {
-                x_f16 = bqf_transform(x_f16, &bqf_filters_right[j],  &bqf_filters_mem_right[j]);
+                out[i] = bqf_transform(out[i], &bqf_filters_right[j], &bqf_filters_mem_right[j]);
            }
-            // Convert back to sample
+            out[i] = (int32_t) norm_fix3_28_to_s16sample(out[i]);
            out[i] = (int32_t) norm_fix3_28_to_s16sample(x_f16);
        }
-        // Update the volume and filter configs if required. We do this from
+        // Wait for Core 0 to finish running its filtering before we apply config updates
-        // core1 as core0 is more heavily loaded, doing this from core0 can
+        multicore_fifo_pop_blocking();
-        // lead to audio crackling.
+        i2s_stream_write(&i2s_write_obj, userbuf, samples);
        // Use of a counter reduces the amount of crackling when changing
        // volume.
        if (limit_counter != 0)
            limit_counter--;
        else {
            limit_counter = 100;
            update_volume();
            apply_config_changes();
        }
        // Signal to core 0 that the data has all been transformed
        multicore_fifo_push_blocking(CORE1_READY);
        i2s_stream_write(&i2s_write_obj, userbuf, samples * 4);
    }
 }