Seems to work OK with 10 filters. Just noise with 11.

This commit is contained in:
George Norton 2023-08-19 00:44:00 +01:00
parent d2bb88a9fb
commit 635dac208a
9 changed files with 52 additions and 123 deletions

View File

@ -14,7 +14,6 @@ add_executable(ploopy_headphones
run.c
ringbuf.c
i2s.c
fix16.c
bqf.c
configuration_manager.c
)

View File

@ -43,7 +43,7 @@ typedef struct _bqf_mem_t {
// In reality we do not have enough CPU resource to run 8 filtering
// stages without some optimisation.
#define MAX_FILTER_STAGES 8
#define MAX_FILTER_STAGES 10
extern int filter_stages;
extern bqf_coeff_t bqf_filters_left[MAX_FILTER_STAGES];

View File

@ -25,13 +25,6 @@
#include <stdbool.h>
#include <inttypes.h>
// During development, it can be useful to run with real double values for reference.
//#define USE_DOUBLE
#ifdef USE_DOUBLE
typedef double fix16_t;
static const fix16_t fix16_zero = 0;
static const fix16_t fix16_one = 1;
#else
/// @brief Fixed point math type, in format Q3.28. One sign bit, 3 bits for left-of-decimal
///and 28 for right-of-decimal. This arrangment works because we normalize the incoming USB
@ -46,15 +39,13 @@ static const fix3_28_t fix16_one = 0x10000000;
/// @brief Represents zero in fixed point world.
static const fix3_28_t fix16_zero = 0x00000000;
#endif
static inline fix3_28_t norm_fix3_28_from_s16sample(int16_t);
static inline int16_t norm_fix3_28_to_s16sample(fix3_28_t);
fix3_28_t norm_fix3_28_from_s16sample(int16_t);
static inline fix3_28_t fix3_28_from_dbl(double);
int16_t norm_fix3_28_to_s16sample(fix3_28_t);
fix3_28_t fix3_28_from_dbl(double);
fix3_28_t fix16_mul(fix3_28_t, fix3_28_t);
static inline fix3_28_t fix16_mul(fix3_28_t, fix3_28_t);
#include "fix16.inl"
#endif

View File

@ -25,46 +25,10 @@
#include <limits.h>
#include "fix16.h"
#ifdef USE_DOUBLE
fix16_t fix16_from_s16sample(int16_t a) {
return a;
}
int16_t fix16_to_s16sample(fix16_t a) {
// Handle rounding up front, adding one can cause an overflow/underflow
if (a < 0) {
a -= 0.5;
} else {
a += 0.5;
}
// Saturate the value if an overflow has occurred
if (a < SHRT_MIN) {
return SHRT_MIN;
}
if (a < SHRT_MAX) {
return SHRT_MAX;
}
return a;
}
fix16_t fix16_from_dbl(double a) {
return a;
}
double fix16_to_dbl(fix16_t a) {
return a;
}
fix16_t fix16_mul(fix16_t inArg0, fix16_t inArg1) {
return inArg0 * inArg1;
}
#else
/// @brief Produces a fixed point number from a 16-bit signed integer, normalized to ]-1,1[.
/// @param a Signed 16-bit integer.
/// @return A fixed point number in Q3.28 format, with input normalized to ]-1,1[.
fix3_28_t norm_fix3_28_from_s16sample(int16_t a) {
static inline fix3_28_t norm_fix3_28_from_s16sample(int16_t a) {
/* So, we're using a Q3.28 fixed point system here, and we want the incoming
audio signal to be represented as a number between -1 and 1. To do this,
we need the 16-bit value to map to the 28-bit right-of-decimal field in
@ -79,7 +43,7 @@ fix3_28_t norm_fix3_28_from_s16sample(int16_t a) {
/// calculated sample to one that the DAC can understand.
/// @param a
/// @return Signed 16-bit integer.
int16_t norm_fix3_28_to_s16sample(fix3_28_t a) {
static inline int16_t norm_fix3_28_to_s16sample(fix3_28_t a) {
// Handle rounding up front, adding one can cause an overflow/underflow
// It's not clear exactly how this works, so we'll disable it for now.
@ -110,8 +74,7 @@ int16_t norm_fix3_28_to_s16sample(fix3_28_t a) {
return (a >> 12);
}
fix3_28_t fix3_28_from_dbl(double a) {
static inline fix3_28_t fix3_28_from_dbl(double a) {
double temp = a * fix16_one;
temp += (double)((temp >= 0) ? 0.5f : -0.5f);
return (fix3_28_t)temp;
@ -121,7 +84,7 @@ fix3_28_t fix3_28_from_dbl(double a) {
/// @param inArg0 Q3.28 format fixed point number.
/// @param inArg1 Q3.28 format fixed point number.
/// @return A Q3.28 fixed point number that represents the truncated result of inArg0 x inArg1.
fix3_28_t fix16_mul(fix3_28_t inArg0, fix3_28_t inArg1) {
static inline fix3_28_t fix16_mul(fix3_28_t inArg0, fix3_28_t inArg1) {
const int64_t product = (int64_t)inArg0 * inArg1;
/* Since we're expecting 2 Q3.28 numbers, the multiplication result should be a Q7.56 number.
@ -143,5 +106,4 @@ fix3_28_t fix16_mul(fix3_28_t inArg0, fix3_28_t inArg1) {
}
#endif
return result;
}
#endif
}

View File

@ -64,7 +64,7 @@ void i2s_write_init(i2s_obj_t *self) {
self->prog_offset + self->pio_program->length - 1);
pio_sm_set_config(self->pio, self->sm, &config);
uint8_t *rbs = malloc(sizeof(uint8_t) * RINGBUF_LEN_IN_BYTES);
uint32_t *rbs = malloc(sizeof(uint8_t) * RINGBUF_LEN_IN_BYTES);
ringbuf_init(&self->ring_buffer, rbs, RINGBUF_LEN_IN_BYTES);
irq_set_exclusive_handler(DMA_IRQ_1, dma_irq_write_handler);
@ -169,27 +169,27 @@ uint8_t *dma_get_buffer(i2s_obj_t *i2s_obj, uint channel) {
void feed_dma(i2s_obj_t *self, uint8_t *dma_buffer_p) {
// when data exists, copy samples from ring buffer
if (ringbuf_available_data(&self->ring_buffer) >= SIZEOF_HALF_DMA_BUFFER_IN_BYTES) {
for (uint32_t i = 0; i < SIZEOF_HALF_DMA_BUFFER_IN_BYTES; i++)
ringbuf_pop(&self->ring_buffer, &dma_buffer_p[i]);
for (uint32_t i = 0; i < SIZEOF_HALF_DMA_BUFFER_IN_BYTES; i+=4)
ringbuf_pop(&self->ring_buffer, (uint32_t*)&dma_buffer_p[i]);
} else {
// underflow. clear buffer to transmit "silence" on the I2S bus
memset(dma_buffer_p, 0, SIZEOF_HALF_DMA_BUFFER_IN_BYTES);
}
}
uint i2s_stream_write(i2s_obj_t *self, const uint8_t *buf_out, uint size) {
uint i2s_stream_write(i2s_obj_t *self, const uint32_t *buf_out, uint size) {
if (size == 0) {
//printf("ERROR: buffer can't be length zero");
exit(1);
}
uint32_t num_bytes_written = copy_userbuf_to_ringbuf(self, buf_out, size);
return num_bytes_written;
uint32_t num_words_written = copy_userbuf_to_ringbuf(self, buf_out, size);
return num_words_written;
}
// TODO maybe we can skip every fourth byte, if we're doing this in 24-bit...
// could save on some processing power
uint32_t copy_userbuf_to_ringbuf(i2s_obj_t *self, const uint8_t *buf_out, uint size) {
uint32_t copy_userbuf_to_ringbuf(i2s_obj_t *self, const uint32_t *buf_out, uint size) {
uint32_t a_index = 0;
while (a_index < size) {

View File

@ -59,7 +59,7 @@ typedef struct _i2s_obj_t {
extern i2s_obj_t i2s_write_obj;
void i2s_write_init(i2s_obj_t *);
uint i2s_stream_write(i2s_obj_t *, const uint8_t *, uint);
uint i2s_stream_write(i2s_obj_t *, const uint32_t *, uint);
void dma_irq_handler(uint8_t);
void dma_irq_write_handler(void);
@ -68,6 +68,6 @@ void dma_configure(i2s_obj_t *);
uint8_t *dma_get_buffer(i2s_obj_t *, uint);
void feed_dma(i2s_obj_t *, uint8_t *);
uint32_t copy_userbuf_to_ringbuf(i2s_obj_t *, const uint8_t *, uint);
uint32_t copy_userbuf_to_ringbuf(i2s_obj_t *, const uint32_t *, uint);
#endif

View File

@ -33,14 +33,14 @@
// - Sequential atomic operations
// One byte of capacity is used to detect buffer empty/full
void ringbuf_init(ring_buf_t *rbuf, uint8_t *buffer, size_t size) {
void ringbuf_init(ring_buf_t *rbuf, uint32_t *buffer, size_t size) {
rbuf->buffer = buffer;
rbuf->size = size;
rbuf->head = 0;
rbuf->tail = 0;
}
bool ringbuf_push(ring_buf_t *rbuf, uint8_t data) {
bool ringbuf_push(ring_buf_t *rbuf, uint32_t data) {
size_t next_tail = (rbuf->tail + 1) % rbuf->size;
if (next_tail != rbuf->head) {
@ -53,7 +53,7 @@ bool ringbuf_push(ring_buf_t *rbuf, uint8_t data) {
return false;
}
bool ringbuf_pop(ring_buf_t *rbuf, uint8_t *data) {
bool ringbuf_pop(ring_buf_t *rbuf, uint32_t *data) {
if (rbuf->head == rbuf->tail) {
// empty
return false;

View File

@ -28,15 +28,15 @@
#include "pico/stdlib.h"
typedef struct _ring_buf_t {
uint8_t *buffer;
uint32_t *buffer;
size_t head;
size_t tail;
size_t size;
} ring_buf_t;
void ringbuf_init(ring_buf_t *, uint8_t *, size_t);
bool ringbuf_push(ring_buf_t *, uint8_t );
bool ringbuf_pop(ring_buf_t *, uint8_t *);
void ringbuf_init(ring_buf_t *, uint32_t *, size_t);
bool ringbuf_push(ring_buf_t *, uint32_t );
bool ringbuf_pop(ring_buf_t *, uint32_t *);
bool ringbuf_is_empty(ring_buf_t *);
bool ringbuf_is_full(ring_buf_t *);
size_t ringbuf_available_data(ring_buf_t *);

View File

@ -123,83 +123,60 @@ static void __no_inline_not_in_flash_func(_as_audio_packet)(struct usb_endpoint
int16_t *in = (int16_t *) usb_buffer->data;
int32_t *out = (int32_t *) userbuf;
int samples = usb_buffer->data_len / 2;
multicore_fifo_push_blocking(CORE0_READY);
multicore_fifo_push_blocking((uintptr_t) in);
// TODO: For some reason if we try to process in from both cores the left and right channels
// flip back and forth..
if (preprocessing.reverse_stereo) {
for (int i = 0; i < samples; i+=2) {
out[i] = fix16_mul(norm_fix3_28_from_s16sample(in[i+1]), preprocessing.preamp);
out[i+1] = fix16_mul(norm_fix3_28_from_s16sample(in[i]), preprocessing.preamp);
}
}
else {
for (int i = 0; i < samples; i++)
out[i] = fix16_mul(norm_fix3_28_from_s16sample(in[i]), preprocessing.preamp);
}
multicore_fifo_push_blocking(samples);
if (preprocessing.reverse_stereo) {
in++;
}
for (int i = 0; i < samples; i += 2) {
// Preamp the sample
fix3_28_t x_f16 = fix16_mul(norm_fix3_28_from_s16sample((int16_t) in[i]), preprocessing.preamp);
// Run the filters
for (int j = 0; j < filter_stages; j++) {
x_f16 = bqf_transform(x_f16, &bqf_filters_left[j], &bqf_filters_mem_left[j]);
out[i] = bqf_transform(out[i], &bqf_filters_left[j], &bqf_filters_mem_left[j]);
}
// Convert back to sample
out[i] = (int32_t) norm_fix3_28_to_s16sample(x_f16);
out[i] = (int32_t) norm_fix3_28_to_s16sample(out[i]);
}
// Signal to core 1 that we have processed our samples, so it can write to I2S
multicore_fifo_push_blocking(CORE0_READY);
update_volume();
apply_config_changes();
// keep on truckin'
usb_grow_transfer(ep->current_transfer, 1);
usb_packet_done(ep);
}
void __no_inline_not_in_flash_func(core1_entry)() {
uint8_t *userbuf = (uint8_t *) multicore_fifo_pop_blocking();
uint32_t *userbuf = (uint32_t *) multicore_fifo_pop_blocking();
int32_t *out = (int32_t *) userbuf;
int limit_counter = 100;
// Signal that the thread has started
multicore_fifo_push_blocking(CORE1_READY);
while (true) {
// Block until the userbuf is filled with data
uint32_t ready = multicore_fifo_pop_blocking();
while (ready != CORE0_READY)
ready = multicore_fifo_pop_blocking();
int16_t *in = (int16_t *) multicore_fifo_pop_blocking();
const uint32_t samples = multicore_fifo_pop_blocking();
if (preprocessing.reverse_stereo) {
in--;
}
for (int i = 1; i < samples; i += 2) {
// Preamp the sample
fix3_28_t x_f16 = fix16_mul(norm_fix3_28_from_s16sample((int16_t) in[i]), preprocessing.preamp);
// Run the filters
for (int j = 0; j < filter_stages; j++) {
x_f16 = bqf_transform(x_f16, &bqf_filters_right[j], &bqf_filters_mem_right[j]);
out[i] = bqf_transform(out[i], &bqf_filters_right[j], &bqf_filters_mem_right[j]);
}
// Convert back to sample
out[i] = (int32_t) norm_fix3_28_to_s16sample(x_f16);
out[i] = (int32_t) norm_fix3_28_to_s16sample(out[i]);
}
// Update the volume and filter configs if required. We do this from
// core1 as core0 is more heavily loaded, doing this from core0 can
// lead to audio crackling.
// Use of a counter reduces the amount of crackling when changing
// volume.
if (limit_counter != 0)
limit_counter--;
else {
limit_counter = 100;
update_volume();
apply_config_changes();
}
// Signal to core 0 that the data has all been transformed
multicore_fifo_push_blocking(CORE1_READY);
i2s_stream_write(&i2s_write_obj, userbuf, samples * 4);
// Wait for Core 0 to finish running its filtering before we apply config updates
multicore_fifo_pop_blocking();
i2s_stream_write(&i2s_write_obj, userbuf, samples);
}
}