Seems to work OK with 10 filters. Just noise with 11.

This commit is contained in:
George Norton 2023-08-19 00:44:00 +01:00
parent d2bb88a9fb
commit 635dac208a
9 changed files with 52 additions and 123 deletions

View File

@ -14,7 +14,6 @@ add_executable(ploopy_headphones
run.c run.c
ringbuf.c ringbuf.c
i2s.c i2s.c
fix16.c
bqf.c bqf.c
configuration_manager.c configuration_manager.c
) )

View File

@ -43,7 +43,7 @@ typedef struct _bqf_mem_t {
// In reality we do not have enough CPU resource to run 8 filtering // In reality we do not have enough CPU resource to run 8 filtering
// stages without some optimisation. // stages without some optimisation.
#define MAX_FILTER_STAGES 8 #define MAX_FILTER_STAGES 10
extern int filter_stages; extern int filter_stages;
extern bqf_coeff_t bqf_filters_left[MAX_FILTER_STAGES]; extern bqf_coeff_t bqf_filters_left[MAX_FILTER_STAGES];

View File

@ -25,13 +25,6 @@
#include <stdbool.h> #include <stdbool.h>
#include <inttypes.h> #include <inttypes.h>
// During development, it can be useful to run with real double values for reference.
//#define USE_DOUBLE
#ifdef USE_DOUBLE
typedef double fix16_t;
static const fix16_t fix16_zero = 0;
static const fix16_t fix16_one = 1;
#else
/// @brief Fixed point math type, in format Q3.28. One sign bit, 3 bits for left-of-decimal /// @brief Fixed point math type, in format Q3.28. One sign bit, 3 bits for left-of-decimal
///and 28 for right-of-decimal. This arrangment works because we normalize the incoming USB ///and 28 for right-of-decimal. This arrangment works because we normalize the incoming USB
@ -46,15 +39,13 @@ static const fix3_28_t fix16_one = 0x10000000;
/// @brief Represents zero in fixed point world. /// @brief Represents zero in fixed point world.
static const fix3_28_t fix16_zero = 0x00000000; static const fix3_28_t fix16_zero = 0x00000000;
#endif static inline fix3_28_t norm_fix3_28_from_s16sample(int16_t);
static inline int16_t norm_fix3_28_to_s16sample(fix3_28_t);
fix3_28_t norm_fix3_28_from_s16sample(int16_t); static inline fix3_28_t fix3_28_from_dbl(double);
int16_t norm_fix3_28_to_s16sample(fix3_28_t); static inline fix3_28_t fix16_mul(fix3_28_t, fix3_28_t);
fix3_28_t fix3_28_from_dbl(double);
fix3_28_t fix16_mul(fix3_28_t, fix3_28_t);
#include "fix16.inl"
#endif #endif

View File

@ -25,46 +25,10 @@
#include <limits.h> #include <limits.h>
#include "fix16.h" #include "fix16.h"
#ifdef USE_DOUBLE
fix16_t fix16_from_s16sample(int16_t a) {
return a;
}
int16_t fix16_to_s16sample(fix16_t a) {
// Handle rounding up front, adding one can cause an overflow/underflow
if (a < 0) {
a -= 0.5;
} else {
a += 0.5;
}
// Saturate the value if an overflow has occurred
if (a < SHRT_MIN) {
return SHRT_MIN;
}
if (a < SHRT_MAX) {
return SHRT_MAX;
}
return a;
}
fix16_t fix16_from_dbl(double a) {
return a;
}
double fix16_to_dbl(fix16_t a) {
return a;
}
fix16_t fix16_mul(fix16_t inArg0, fix16_t inArg1) {
return inArg0 * inArg1;
}
#else
/// @brief Produces a fixed point number from a 16-bit signed integer, normalized to ]-1,1[. /// @brief Produces a fixed point number from a 16-bit signed integer, normalized to ]-1,1[.
/// @param a Signed 16-bit integer. /// @param a Signed 16-bit integer.
/// @return A fixed point number in Q3.28 format, with input normalized to ]-1,1[. /// @return A fixed point number in Q3.28 format, with input normalized to ]-1,1[.
fix3_28_t norm_fix3_28_from_s16sample(int16_t a) { static inline fix3_28_t norm_fix3_28_from_s16sample(int16_t a) {
/* So, we're using a Q3.28 fixed point system here, and we want the incoming /* So, we're using a Q3.28 fixed point system here, and we want the incoming
audio signal to be represented as a number between -1 and 1. To do this, audio signal to be represented as a number between -1 and 1. To do this,
we need the 16-bit value to map to the 28-bit right-of-decimal field in we need the 16-bit value to map to the 28-bit right-of-decimal field in
@ -79,7 +43,7 @@ fix3_28_t norm_fix3_28_from_s16sample(int16_t a) {
/// calculated sample to one that the DAC can understand. /// calculated sample to one that the DAC can understand.
/// @param a /// @param a
/// @return Signed 16-bit integer. /// @return Signed 16-bit integer.
int16_t norm_fix3_28_to_s16sample(fix3_28_t a) { static inline int16_t norm_fix3_28_to_s16sample(fix3_28_t a) {
// Handle rounding up front, adding one can cause an overflow/underflow // Handle rounding up front, adding one can cause an overflow/underflow
// It's not clear exactly how this works, so we'll disable it for now. // It's not clear exactly how this works, so we'll disable it for now.
@ -110,8 +74,7 @@ int16_t norm_fix3_28_to_s16sample(fix3_28_t a) {
return (a >> 12); return (a >> 12);
} }
static inline fix3_28_t fix3_28_from_dbl(double a) {
fix3_28_t fix3_28_from_dbl(double a) {
double temp = a * fix16_one; double temp = a * fix16_one;
temp += (double)((temp >= 0) ? 0.5f : -0.5f); temp += (double)((temp >= 0) ? 0.5f : -0.5f);
return (fix3_28_t)temp; return (fix3_28_t)temp;
@ -121,7 +84,7 @@ fix3_28_t fix3_28_from_dbl(double a) {
/// @param inArg0 Q3.28 format fixed point number. /// @param inArg0 Q3.28 format fixed point number.
/// @param inArg1 Q3.28 format fixed point number. /// @param inArg1 Q3.28 format fixed point number.
/// @return A Q3.28 fixed point number that represents the truncated result of inArg0 x inArg1. /// @return A Q3.28 fixed point number that represents the truncated result of inArg0 x inArg1.
fix3_28_t fix16_mul(fix3_28_t inArg0, fix3_28_t inArg1) { static inline fix3_28_t fix16_mul(fix3_28_t inArg0, fix3_28_t inArg1) {
const int64_t product = (int64_t)inArg0 * inArg1; const int64_t product = (int64_t)inArg0 * inArg1;
/* Since we're expecting 2 Q3.28 numbers, the multiplication result should be a Q7.56 number. /* Since we're expecting 2 Q3.28 numbers, the multiplication result should be a Q7.56 number.
@ -143,5 +106,4 @@ fix3_28_t fix16_mul(fix3_28_t inArg0, fix3_28_t inArg1) {
} }
#endif #endif
return result; return result;
} }
#endif

View File

@ -64,7 +64,7 @@ void i2s_write_init(i2s_obj_t *self) {
self->prog_offset + self->pio_program->length - 1); self->prog_offset + self->pio_program->length - 1);
pio_sm_set_config(self->pio, self->sm, &config); pio_sm_set_config(self->pio, self->sm, &config);
uint8_t *rbs = malloc(sizeof(uint8_t) * RINGBUF_LEN_IN_BYTES); uint32_t *rbs = malloc(sizeof(uint8_t) * RINGBUF_LEN_IN_BYTES);
ringbuf_init(&self->ring_buffer, rbs, RINGBUF_LEN_IN_BYTES); ringbuf_init(&self->ring_buffer, rbs, RINGBUF_LEN_IN_BYTES);
irq_set_exclusive_handler(DMA_IRQ_1, dma_irq_write_handler); irq_set_exclusive_handler(DMA_IRQ_1, dma_irq_write_handler);
@ -169,27 +169,27 @@ uint8_t *dma_get_buffer(i2s_obj_t *i2s_obj, uint channel) {
void feed_dma(i2s_obj_t *self, uint8_t *dma_buffer_p) { void feed_dma(i2s_obj_t *self, uint8_t *dma_buffer_p) {
// when data exists, copy samples from ring buffer // when data exists, copy samples from ring buffer
if (ringbuf_available_data(&self->ring_buffer) >= SIZEOF_HALF_DMA_BUFFER_IN_BYTES) { if (ringbuf_available_data(&self->ring_buffer) >= SIZEOF_HALF_DMA_BUFFER_IN_BYTES) {
for (uint32_t i = 0; i < SIZEOF_HALF_DMA_BUFFER_IN_BYTES; i++) for (uint32_t i = 0; i < SIZEOF_HALF_DMA_BUFFER_IN_BYTES; i+=4)
ringbuf_pop(&self->ring_buffer, &dma_buffer_p[i]); ringbuf_pop(&self->ring_buffer, (uint32_t*)&dma_buffer_p[i]);
} else { } else {
// underflow. clear buffer to transmit "silence" on the I2S bus // underflow. clear buffer to transmit "silence" on the I2S bus
memset(dma_buffer_p, 0, SIZEOF_HALF_DMA_BUFFER_IN_BYTES); memset(dma_buffer_p, 0, SIZEOF_HALF_DMA_BUFFER_IN_BYTES);
} }
} }
uint i2s_stream_write(i2s_obj_t *self, const uint8_t *buf_out, uint size) { uint i2s_stream_write(i2s_obj_t *self, const uint32_t *buf_out, uint size) {
if (size == 0) { if (size == 0) {
//printf("ERROR: buffer can't be length zero"); //printf("ERROR: buffer can't be length zero");
exit(1); exit(1);
} }
uint32_t num_bytes_written = copy_userbuf_to_ringbuf(self, buf_out, size); uint32_t num_words_written = copy_userbuf_to_ringbuf(self, buf_out, size);
return num_bytes_written; return num_words_written;
} }
// TODO maybe we can skip every fourth byte, if we're doing this in 24-bit... // TODO maybe we can skip every fourth byte, if we're doing this in 24-bit...
// could save on some processing power // could save on some processing power
uint32_t copy_userbuf_to_ringbuf(i2s_obj_t *self, const uint8_t *buf_out, uint size) { uint32_t copy_userbuf_to_ringbuf(i2s_obj_t *self, const uint32_t *buf_out, uint size) {
uint32_t a_index = 0; uint32_t a_index = 0;
while (a_index < size) { while (a_index < size) {

View File

@ -59,7 +59,7 @@ typedef struct _i2s_obj_t {
extern i2s_obj_t i2s_write_obj; extern i2s_obj_t i2s_write_obj;
void i2s_write_init(i2s_obj_t *); void i2s_write_init(i2s_obj_t *);
uint i2s_stream_write(i2s_obj_t *, const uint8_t *, uint); uint i2s_stream_write(i2s_obj_t *, const uint32_t *, uint);
void dma_irq_handler(uint8_t); void dma_irq_handler(uint8_t);
void dma_irq_write_handler(void); void dma_irq_write_handler(void);
@ -68,6 +68,6 @@ void dma_configure(i2s_obj_t *);
uint8_t *dma_get_buffer(i2s_obj_t *, uint); uint8_t *dma_get_buffer(i2s_obj_t *, uint);
void feed_dma(i2s_obj_t *, uint8_t *); void feed_dma(i2s_obj_t *, uint8_t *);
uint32_t copy_userbuf_to_ringbuf(i2s_obj_t *, const uint8_t *, uint); uint32_t copy_userbuf_to_ringbuf(i2s_obj_t *, const uint32_t *, uint);
#endif #endif

View File

@ -33,14 +33,14 @@
// - Sequential atomic operations // - Sequential atomic operations
// One byte of capacity is used to detect buffer empty/full // One byte of capacity is used to detect buffer empty/full
void ringbuf_init(ring_buf_t *rbuf, uint8_t *buffer, size_t size) { void ringbuf_init(ring_buf_t *rbuf, uint32_t *buffer, size_t size) {
rbuf->buffer = buffer; rbuf->buffer = buffer;
rbuf->size = size; rbuf->size = size;
rbuf->head = 0; rbuf->head = 0;
rbuf->tail = 0; rbuf->tail = 0;
} }
bool ringbuf_push(ring_buf_t *rbuf, uint8_t data) { bool ringbuf_push(ring_buf_t *rbuf, uint32_t data) {
size_t next_tail = (rbuf->tail + 1) % rbuf->size; size_t next_tail = (rbuf->tail + 1) % rbuf->size;
if (next_tail != rbuf->head) { if (next_tail != rbuf->head) {
@ -53,7 +53,7 @@ bool ringbuf_push(ring_buf_t *rbuf, uint8_t data) {
return false; return false;
} }
bool ringbuf_pop(ring_buf_t *rbuf, uint8_t *data) { bool ringbuf_pop(ring_buf_t *rbuf, uint32_t *data) {
if (rbuf->head == rbuf->tail) { if (rbuf->head == rbuf->tail) {
// empty // empty
return false; return false;

View File

@ -28,15 +28,15 @@
#include "pico/stdlib.h" #include "pico/stdlib.h"
typedef struct _ring_buf_t { typedef struct _ring_buf_t {
uint8_t *buffer; uint32_t *buffer;
size_t head; size_t head;
size_t tail; size_t tail;
size_t size; size_t size;
} ring_buf_t; } ring_buf_t;
void ringbuf_init(ring_buf_t *, uint8_t *, size_t); void ringbuf_init(ring_buf_t *, uint32_t *, size_t);
bool ringbuf_push(ring_buf_t *, uint8_t ); bool ringbuf_push(ring_buf_t *, uint32_t );
bool ringbuf_pop(ring_buf_t *, uint8_t *); bool ringbuf_pop(ring_buf_t *, uint32_t *);
bool ringbuf_is_empty(ring_buf_t *); bool ringbuf_is_empty(ring_buf_t *);
bool ringbuf_is_full(ring_buf_t *); bool ringbuf_is_full(ring_buf_t *);
size_t ringbuf_available_data(ring_buf_t *); size_t ringbuf_available_data(ring_buf_t *);

View File

@ -123,83 +123,60 @@ static void __no_inline_not_in_flash_func(_as_audio_packet)(struct usb_endpoint
int16_t *in = (int16_t *) usb_buffer->data; int16_t *in = (int16_t *) usb_buffer->data;
int32_t *out = (int32_t *) userbuf; int32_t *out = (int32_t *) userbuf;
int samples = usb_buffer->data_len / 2; int samples = usb_buffer->data_len / 2;
multicore_fifo_push_blocking(CORE0_READY); // TODO: For some reason if we try to process in from both cores the left and right channels
multicore_fifo_push_blocking((uintptr_t) in); // flip back and forth..
if (preprocessing.reverse_stereo) {
for (int i = 0; i < samples; i+=2) {
out[i] = fix16_mul(norm_fix3_28_from_s16sample(in[i+1]), preprocessing.preamp);
out[i+1] = fix16_mul(norm_fix3_28_from_s16sample(in[i]), preprocessing.preamp);
}
}
else {
for (int i = 0; i < samples; i++)
out[i] = fix16_mul(norm_fix3_28_from_s16sample(in[i]), preprocessing.preamp);
}
multicore_fifo_push_blocking(samples); multicore_fifo_push_blocking(samples);
if (preprocessing.reverse_stereo) {
in++;
}
for (int i = 0; i < samples; i += 2) { for (int i = 0; i < samples; i += 2) {
// Preamp the sample
fix3_28_t x_f16 = fix16_mul(norm_fix3_28_from_s16sample((int16_t) in[i]), preprocessing.preamp);
// Run the filters
for (int j = 0; j < filter_stages; j++) { for (int j = 0; j < filter_stages; j++) {
x_f16 = bqf_transform(x_f16, &bqf_filters_left[j], &bqf_filters_mem_left[j]); out[i] = bqf_transform(out[i], &bqf_filters_left[j], &bqf_filters_mem_left[j]);
} }
// Convert back to sample out[i] = (int32_t) norm_fix3_28_to_s16sample(out[i]);
out[i] = (int32_t) norm_fix3_28_to_s16sample(x_f16);
} }
// Signal to core 1 that we have processed our samples, so it can write to I2S // Signal to core 1 that we have processed our samples, so it can write to I2S
multicore_fifo_push_blocking(CORE0_READY); multicore_fifo_push_blocking(CORE0_READY);
update_volume();
apply_config_changes();
// keep on truckin' // keep on truckin'
usb_grow_transfer(ep->current_transfer, 1); usb_grow_transfer(ep->current_transfer, 1);
usb_packet_done(ep); usb_packet_done(ep);
} }
void __no_inline_not_in_flash_func(core1_entry)() { void __no_inline_not_in_flash_func(core1_entry)() {
uint8_t *userbuf = (uint8_t *) multicore_fifo_pop_blocking(); uint32_t *userbuf = (uint32_t *) multicore_fifo_pop_blocking();
int32_t *out = (int32_t *) userbuf; int32_t *out = (int32_t *) userbuf;
int limit_counter = 100;
// Signal that the thread has started // Signal that the thread has started
multicore_fifo_push_blocking(CORE1_READY); multicore_fifo_push_blocking(CORE1_READY);
while (true) { while (true) {
// Block until the userbuf is filled with data
uint32_t ready = multicore_fifo_pop_blocking();
while (ready != CORE0_READY)
ready = multicore_fifo_pop_blocking();
int16_t *in = (int16_t *) multicore_fifo_pop_blocking();
const uint32_t samples = multicore_fifo_pop_blocking(); const uint32_t samples = multicore_fifo_pop_blocking();
if (preprocessing.reverse_stereo) {
in--;
}
for (int i = 1; i < samples; i += 2) { for (int i = 1; i < samples; i += 2) {
// Preamp the sample
fix3_28_t x_f16 = fix16_mul(norm_fix3_28_from_s16sample((int16_t) in[i]), preprocessing.preamp);
// Run the filters
for (int j = 0; j < filter_stages; j++) { for (int j = 0; j < filter_stages; j++) {
x_f16 = bqf_transform(x_f16, &bqf_filters_right[j], &bqf_filters_mem_right[j]); out[i] = bqf_transform(out[i], &bqf_filters_right[j], &bqf_filters_mem_right[j]);
} }
// Convert back to sample out[i] = (int32_t) norm_fix3_28_to_s16sample(out[i]);
out[i] = (int32_t) norm_fix3_28_to_s16sample(x_f16);
} }
// Update the volume and filter configs if required. We do this from // Wait for Core 0 to finish running its filtering before we apply config updates
// core1 as core0 is more heavily loaded, doing this from core0 can multicore_fifo_pop_blocking();
// lead to audio crackling. i2s_stream_write(&i2s_write_obj, userbuf, samples);
// Use of a counter reduces the amount of crackling when changing
// volume.
if (limit_counter != 0)
limit_counter--;
else {
limit_counter = 100;
update_volume();
apply_config_changes();
}
// Signal to core 0 that the data has all been transformed
multicore_fifo_push_blocking(CORE1_READY);
i2s_stream_write(&i2s_write_obj, userbuf, samples * 4);
} }
} }