diff --git a/firmware/code/CMakeLists.txt b/firmware/code/CMakeLists.txt index aae60e4..74d4658 100644 --- a/firmware/code/CMakeLists.txt +++ b/firmware/code/CMakeLists.txt @@ -14,7 +14,6 @@ add_executable(ploopy_headphones run.c ringbuf.c i2s.c - fix16.c bqf.c configuration_manager.c ) diff --git a/firmware/code/bqf.h b/firmware/code/bqf.h index dcdc038..8e5933c 100644 --- a/firmware/code/bqf.h +++ b/firmware/code/bqf.h @@ -43,7 +43,7 @@ typedef struct _bqf_mem_t { // In reality we do not have enough CPU resource to run 8 filtering // stages without some optimisation. -#define MAX_FILTER_STAGES 8 +#define MAX_FILTER_STAGES 10 extern int filter_stages; extern bqf_coeff_t bqf_filters_left[MAX_FILTER_STAGES]; diff --git a/firmware/code/fix16.h b/firmware/code/fix16.h index 40acced..9d7887e 100644 --- a/firmware/code/fix16.h +++ b/firmware/code/fix16.h @@ -25,13 +25,6 @@ #include #include -// During development, it can be useful to run with real double values for reference. -//#define USE_DOUBLE -#ifdef USE_DOUBLE -typedef double fix16_t; -static const fix16_t fix16_zero = 0; -static const fix16_t fix16_one = 1; -#else /// @brief Fixed point math type, in format Q3.28. One sign bit, 3 bits for left-of-decimal ///and 28 for right-of-decimal. This arrangment works because we normalize the incoming USB @@ -46,15 +39,13 @@ static const fix3_28_t fix16_one = 0x10000000; /// @brief Represents zero in fixed point world. static const fix3_28_t fix16_zero = 0x00000000; -#endif +static inline fix3_28_t norm_fix3_28_from_s16sample(int16_t); +static inline int16_t norm_fix3_28_to_s16sample(fix3_28_t); -fix3_28_t norm_fix3_28_from_s16sample(int16_t); +static inline fix3_28_t fix3_28_from_dbl(double); -int16_t norm_fix3_28_to_s16sample(fix3_28_t); - -fix3_28_t fix3_28_from_dbl(double); - -fix3_28_t fix16_mul(fix3_28_t, fix3_28_t); +static inline fix3_28_t fix16_mul(fix3_28_t, fix3_28_t); +#include "fix16.inl" #endif \ No newline at end of file diff --git a/firmware/code/fix16.c b/firmware/code/fix16.inl similarity index 81% rename from firmware/code/fix16.c rename to firmware/code/fix16.inl index 1695ed6..8ce3f5c 100644 --- a/firmware/code/fix16.c +++ b/firmware/code/fix16.inl @@ -25,46 +25,10 @@ #include #include "fix16.h" -#ifdef USE_DOUBLE -fix16_t fix16_from_s16sample(int16_t a) { - return a; -} - -int16_t fix16_to_s16sample(fix16_t a) { - // Handle rounding up front, adding one can cause an overflow/underflow - if (a < 0) { - a -= 0.5; - } else { - a += 0.5; - } - - // Saturate the value if an overflow has occurred - if (a < SHRT_MIN) { - return SHRT_MIN; - } - if (a < SHRT_MAX) { - return SHRT_MAX; - } - return a; -} - -fix16_t fix16_from_dbl(double a) { - return a; -} - -double fix16_to_dbl(fix16_t a) { - return a; -} - -fix16_t fix16_mul(fix16_t inArg0, fix16_t inArg1) { - return inArg0 * inArg1; -} -#else - /// @brief Produces a fixed point number from a 16-bit signed integer, normalized to ]-1,1[. /// @param a Signed 16-bit integer. /// @return A fixed point number in Q3.28 format, with input normalized to ]-1,1[. -fix3_28_t norm_fix3_28_from_s16sample(int16_t a) { +static inline fix3_28_t norm_fix3_28_from_s16sample(int16_t a) { /* So, we're using a Q3.28 fixed point system here, and we want the incoming audio signal to be represented as a number between -1 and 1. To do this, we need the 16-bit value to map to the 28-bit right-of-decimal field in @@ -79,7 +43,7 @@ fix3_28_t norm_fix3_28_from_s16sample(int16_t a) { /// calculated sample to one that the DAC can understand. /// @param a /// @return Signed 16-bit integer. -int16_t norm_fix3_28_to_s16sample(fix3_28_t a) { +static inline int16_t norm_fix3_28_to_s16sample(fix3_28_t a) { // Handle rounding up front, adding one can cause an overflow/underflow // It's not clear exactly how this works, so we'll disable it for now. @@ -110,8 +74,7 @@ int16_t norm_fix3_28_to_s16sample(fix3_28_t a) { return (a >> 12); } - -fix3_28_t fix3_28_from_dbl(double a) { +static inline fix3_28_t fix3_28_from_dbl(double a) { double temp = a * fix16_one; temp += (double)((temp >= 0) ? 0.5f : -0.5f); return (fix3_28_t)temp; @@ -121,7 +84,7 @@ fix3_28_t fix3_28_from_dbl(double a) { /// @param inArg0 Q3.28 format fixed point number. /// @param inArg1 Q3.28 format fixed point number. /// @return A Q3.28 fixed point number that represents the truncated result of inArg0 x inArg1. -fix3_28_t fix16_mul(fix3_28_t inArg0, fix3_28_t inArg1) { +static inline fix3_28_t fix16_mul(fix3_28_t inArg0, fix3_28_t inArg1) { const int64_t product = (int64_t)inArg0 * inArg1; /* Since we're expecting 2 Q3.28 numbers, the multiplication result should be a Q7.56 number. @@ -143,5 +106,4 @@ fix3_28_t fix16_mul(fix3_28_t inArg0, fix3_28_t inArg1) { } #endif return result; -} -#endif \ No newline at end of file +} \ No newline at end of file diff --git a/firmware/code/i2s.c b/firmware/code/i2s.c index 89e0b21..73bc12c 100644 --- a/firmware/code/i2s.c +++ b/firmware/code/i2s.c @@ -64,7 +64,7 @@ void i2s_write_init(i2s_obj_t *self) { self->prog_offset + self->pio_program->length - 1); pio_sm_set_config(self->pio, self->sm, &config); - uint8_t *rbs = malloc(sizeof(uint8_t) * RINGBUF_LEN_IN_BYTES); + uint32_t *rbs = malloc(sizeof(uint8_t) * RINGBUF_LEN_IN_BYTES); ringbuf_init(&self->ring_buffer, rbs, RINGBUF_LEN_IN_BYTES); irq_set_exclusive_handler(DMA_IRQ_1, dma_irq_write_handler); @@ -169,27 +169,27 @@ uint8_t *dma_get_buffer(i2s_obj_t *i2s_obj, uint channel) { void feed_dma(i2s_obj_t *self, uint8_t *dma_buffer_p) { // when data exists, copy samples from ring buffer if (ringbuf_available_data(&self->ring_buffer) >= SIZEOF_HALF_DMA_BUFFER_IN_BYTES) { - for (uint32_t i = 0; i < SIZEOF_HALF_DMA_BUFFER_IN_BYTES; i++) - ringbuf_pop(&self->ring_buffer, &dma_buffer_p[i]); + for (uint32_t i = 0; i < SIZEOF_HALF_DMA_BUFFER_IN_BYTES; i+=4) + ringbuf_pop(&self->ring_buffer, (uint32_t*)&dma_buffer_p[i]); } else { // underflow. clear buffer to transmit "silence" on the I2S bus memset(dma_buffer_p, 0, SIZEOF_HALF_DMA_BUFFER_IN_BYTES); } } -uint i2s_stream_write(i2s_obj_t *self, const uint8_t *buf_out, uint size) { +uint i2s_stream_write(i2s_obj_t *self, const uint32_t *buf_out, uint size) { if (size == 0) { //printf("ERROR: buffer can't be length zero"); exit(1); } - uint32_t num_bytes_written = copy_userbuf_to_ringbuf(self, buf_out, size); - return num_bytes_written; + uint32_t num_words_written = copy_userbuf_to_ringbuf(self, buf_out, size); + return num_words_written; } // TODO maybe we can skip every fourth byte, if we're doing this in 24-bit... // could save on some processing power -uint32_t copy_userbuf_to_ringbuf(i2s_obj_t *self, const uint8_t *buf_out, uint size) { +uint32_t copy_userbuf_to_ringbuf(i2s_obj_t *self, const uint32_t *buf_out, uint size) { uint32_t a_index = 0; while (a_index < size) { diff --git a/firmware/code/i2s.h b/firmware/code/i2s.h index ca30353..faf0ec6 100644 --- a/firmware/code/i2s.h +++ b/firmware/code/i2s.h @@ -59,7 +59,7 @@ typedef struct _i2s_obj_t { extern i2s_obj_t i2s_write_obj; void i2s_write_init(i2s_obj_t *); -uint i2s_stream_write(i2s_obj_t *, const uint8_t *, uint); +uint i2s_stream_write(i2s_obj_t *, const uint32_t *, uint); void dma_irq_handler(uint8_t); void dma_irq_write_handler(void); @@ -68,6 +68,6 @@ void dma_configure(i2s_obj_t *); uint8_t *dma_get_buffer(i2s_obj_t *, uint); void feed_dma(i2s_obj_t *, uint8_t *); -uint32_t copy_userbuf_to_ringbuf(i2s_obj_t *, const uint8_t *, uint); +uint32_t copy_userbuf_to_ringbuf(i2s_obj_t *, const uint32_t *, uint); #endif \ No newline at end of file diff --git a/firmware/code/ringbuf.c b/firmware/code/ringbuf.c index b6399f3..ca466fe 100644 --- a/firmware/code/ringbuf.c +++ b/firmware/code/ringbuf.c @@ -33,14 +33,14 @@ // - Sequential atomic operations // One byte of capacity is used to detect buffer empty/full -void ringbuf_init(ring_buf_t *rbuf, uint8_t *buffer, size_t size) { +void ringbuf_init(ring_buf_t *rbuf, uint32_t *buffer, size_t size) { rbuf->buffer = buffer; rbuf->size = size; rbuf->head = 0; rbuf->tail = 0; } -bool ringbuf_push(ring_buf_t *rbuf, uint8_t data) { +bool ringbuf_push(ring_buf_t *rbuf, uint32_t data) { size_t next_tail = (rbuf->tail + 1) % rbuf->size; if (next_tail != rbuf->head) { @@ -53,7 +53,7 @@ bool ringbuf_push(ring_buf_t *rbuf, uint8_t data) { return false; } -bool ringbuf_pop(ring_buf_t *rbuf, uint8_t *data) { +bool ringbuf_pop(ring_buf_t *rbuf, uint32_t *data) { if (rbuf->head == rbuf->tail) { // empty return false; diff --git a/firmware/code/ringbuf.h b/firmware/code/ringbuf.h index 5e1cbcc..dd83f85 100644 --- a/firmware/code/ringbuf.h +++ b/firmware/code/ringbuf.h @@ -28,15 +28,15 @@ #include "pico/stdlib.h" typedef struct _ring_buf_t { - uint8_t *buffer; + uint32_t *buffer; size_t head; size_t tail; size_t size; } ring_buf_t; -void ringbuf_init(ring_buf_t *, uint8_t *, size_t); -bool ringbuf_push(ring_buf_t *, uint8_t ); -bool ringbuf_pop(ring_buf_t *, uint8_t *); +void ringbuf_init(ring_buf_t *, uint32_t *, size_t); +bool ringbuf_push(ring_buf_t *, uint32_t ); +bool ringbuf_pop(ring_buf_t *, uint32_t *); bool ringbuf_is_empty(ring_buf_t *); bool ringbuf_is_full(ring_buf_t *); size_t ringbuf_available_data(ring_buf_t *); diff --git a/firmware/code/run.c b/firmware/code/run.c index 67649e6..d6f13fd 100644 --- a/firmware/code/run.c +++ b/firmware/code/run.c @@ -123,83 +123,60 @@ static void __no_inline_not_in_flash_func(_as_audio_packet)(struct usb_endpoint int16_t *in = (int16_t *) usb_buffer->data; int32_t *out = (int32_t *) userbuf; int samples = usb_buffer->data_len / 2; - - multicore_fifo_push_blocking(CORE0_READY); - multicore_fifo_push_blocking((uintptr_t) in); + + // TODO: For some reason if we try to process in from both cores the left and right channels + // flip back and forth.. + if (preprocessing.reverse_stereo) { + for (int i = 0; i < samples; i+=2) { + out[i] = fix16_mul(norm_fix3_28_from_s16sample(in[i+1]), preprocessing.preamp); + out[i+1] = fix16_mul(norm_fix3_28_from_s16sample(in[i]), preprocessing.preamp); + } + } + else { + for (int i = 0; i < samples; i++) + out[i] = fix16_mul(norm_fix3_28_from_s16sample(in[i]), preprocessing.preamp); + } + multicore_fifo_push_blocking(samples); - if (preprocessing.reverse_stereo) { - in++; - } for (int i = 0; i < samples; i += 2) { - // Preamp the sample - fix3_28_t x_f16 = fix16_mul(norm_fix3_28_from_s16sample((int16_t) in[i]), preprocessing.preamp); - - // Run the filters for (int j = 0; j < filter_stages; j++) { - x_f16 = bqf_transform(x_f16, &bqf_filters_left[j], &bqf_filters_mem_left[j]); + out[i] = bqf_transform(out[i], &bqf_filters_left[j], &bqf_filters_mem_left[j]); } - // Convert back to sample - out[i] = (int32_t) norm_fix3_28_to_s16sample(x_f16); + out[i] = (int32_t) norm_fix3_28_to_s16sample(out[i]); } // Signal to core 1 that we have processed our samples, so it can write to I2S multicore_fifo_push_blocking(CORE0_READY); + update_volume(); + apply_config_changes(); + // keep on truckin' usb_grow_transfer(ep->current_transfer, 1); usb_packet_done(ep); } void __no_inline_not_in_flash_func(core1_entry)() { - uint8_t *userbuf = (uint8_t *) multicore_fifo_pop_blocking(); + uint32_t *userbuf = (uint32_t *) multicore_fifo_pop_blocking(); int32_t *out = (int32_t *) userbuf; - int limit_counter = 100; // Signal that the thread has started multicore_fifo_push_blocking(CORE1_READY); while (true) { - // Block until the userbuf is filled with data - uint32_t ready = multicore_fifo_pop_blocking(); - while (ready != CORE0_READY) - ready = multicore_fifo_pop_blocking(); - - int16_t *in = (int16_t *) multicore_fifo_pop_blocking(); const uint32_t samples = multicore_fifo_pop_blocking(); - if (preprocessing.reverse_stereo) { - in--; - } for (int i = 1; i < samples; i += 2) { - // Preamp the sample - fix3_28_t x_f16 = fix16_mul(norm_fix3_28_from_s16sample((int16_t) in[i]), preprocessing.preamp); - - // Run the filters for (int j = 0; j < filter_stages; j++) { - x_f16 = bqf_transform(x_f16, &bqf_filters_right[j], &bqf_filters_mem_right[j]); + out[i] = bqf_transform(out[i], &bqf_filters_right[j], &bqf_filters_mem_right[j]); } - // Convert back to sample - out[i] = (int32_t) norm_fix3_28_to_s16sample(x_f16); + out[i] = (int32_t) norm_fix3_28_to_s16sample(out[i]); } - // Update the volume and filter configs if required. We do this from - // core1 as core0 is more heavily loaded, doing this from core0 can - // lead to audio crackling. - // Use of a counter reduces the amount of crackling when changing - // volume. - if (limit_counter != 0) - limit_counter--; - else { - limit_counter = 100; - update_volume(); - apply_config_changes(); - } - - // Signal to core 0 that the data has all been transformed - multicore_fifo_push_blocking(CORE1_READY); - - i2s_stream_write(&i2s_write_obj, userbuf, samples * 4); + // Wait for Core 0 to finish running its filtering before we apply config updates + multicore_fifo_pop_blocking(); + i2s_stream_write(&i2s_write_obj, userbuf, samples); } }