Seems to work OK with 10 filters. Just noise with 11.
This commit is contained in:
parent
d2bb88a9fb
commit
635dac208a
|
@ -14,7 +14,6 @@ add_executable(ploopy_headphones
|
|||
run.c
|
||||
ringbuf.c
|
||||
i2s.c
|
||||
fix16.c
|
||||
bqf.c
|
||||
configuration_manager.c
|
||||
)
|
||||
|
|
|
@ -43,7 +43,7 @@ typedef struct _bqf_mem_t {
|
|||
|
||||
// In reality we do not have enough CPU resource to run 8 filtering
|
||||
// stages without some optimisation.
|
||||
#define MAX_FILTER_STAGES 8
|
||||
#define MAX_FILTER_STAGES 10
|
||||
extern int filter_stages;
|
||||
|
||||
extern bqf_coeff_t bqf_filters_left[MAX_FILTER_STAGES];
|
||||
|
|
|
@ -25,13 +25,6 @@
|
|||
#include <stdbool.h>
|
||||
#include <inttypes.h>
|
||||
|
||||
// During development, it can be useful to run with real double values for reference.
|
||||
//#define USE_DOUBLE
|
||||
#ifdef USE_DOUBLE
|
||||
typedef double fix16_t;
|
||||
static const fix16_t fix16_zero = 0;
|
||||
static const fix16_t fix16_one = 1;
|
||||
#else
|
||||
|
||||
/// @brief Fixed point math type, in format Q3.28. One sign bit, 3 bits for left-of-decimal
|
||||
///and 28 for right-of-decimal. This arrangment works because we normalize the incoming USB
|
||||
|
@ -46,15 +39,13 @@ static const fix3_28_t fix16_one = 0x10000000;
|
|||
/// @brief Represents zero in fixed point world.
|
||||
static const fix3_28_t fix16_zero = 0x00000000;
|
||||
|
||||
#endif
|
||||
static inline fix3_28_t norm_fix3_28_from_s16sample(int16_t);
|
||||
|
||||
static inline int16_t norm_fix3_28_to_s16sample(fix3_28_t);
|
||||
|
||||
fix3_28_t norm_fix3_28_from_s16sample(int16_t);
|
||||
static inline fix3_28_t fix3_28_from_dbl(double);
|
||||
|
||||
int16_t norm_fix3_28_to_s16sample(fix3_28_t);
|
||||
|
||||
fix3_28_t fix3_28_from_dbl(double);
|
||||
|
||||
fix3_28_t fix16_mul(fix3_28_t, fix3_28_t);
|
||||
static inline fix3_28_t fix16_mul(fix3_28_t, fix3_28_t);
|
||||
|
||||
#include "fix16.inl"
|
||||
#endif
|
|
@ -25,46 +25,10 @@
|
|||
#include <limits.h>
|
||||
#include "fix16.h"
|
||||
|
||||
#ifdef USE_DOUBLE
|
||||
fix16_t fix16_from_s16sample(int16_t a) {
|
||||
return a;
|
||||
}
|
||||
|
||||
int16_t fix16_to_s16sample(fix16_t a) {
|
||||
// Handle rounding up front, adding one can cause an overflow/underflow
|
||||
if (a < 0) {
|
||||
a -= 0.5;
|
||||
} else {
|
||||
a += 0.5;
|
||||
}
|
||||
|
||||
// Saturate the value if an overflow has occurred
|
||||
if (a < SHRT_MIN) {
|
||||
return SHRT_MIN;
|
||||
}
|
||||
if (a < SHRT_MAX) {
|
||||
return SHRT_MAX;
|
||||
}
|
||||
return a;
|
||||
}
|
||||
|
||||
fix16_t fix16_from_dbl(double a) {
|
||||
return a;
|
||||
}
|
||||
|
||||
double fix16_to_dbl(fix16_t a) {
|
||||
return a;
|
||||
}
|
||||
|
||||
fix16_t fix16_mul(fix16_t inArg0, fix16_t inArg1) {
|
||||
return inArg0 * inArg1;
|
||||
}
|
||||
#else
|
||||
|
||||
/// @brief Produces a fixed point number from a 16-bit signed integer, normalized to ]-1,1[.
|
||||
/// @param a Signed 16-bit integer.
|
||||
/// @return A fixed point number in Q3.28 format, with input normalized to ]-1,1[.
|
||||
fix3_28_t norm_fix3_28_from_s16sample(int16_t a) {
|
||||
static inline fix3_28_t norm_fix3_28_from_s16sample(int16_t a) {
|
||||
/* So, we're using a Q3.28 fixed point system here, and we want the incoming
|
||||
audio signal to be represented as a number between -1 and 1. To do this,
|
||||
we need the 16-bit value to map to the 28-bit right-of-decimal field in
|
||||
|
@ -79,7 +43,7 @@ fix3_28_t norm_fix3_28_from_s16sample(int16_t a) {
|
|||
/// calculated sample to one that the DAC can understand.
|
||||
/// @param a
|
||||
/// @return Signed 16-bit integer.
|
||||
int16_t norm_fix3_28_to_s16sample(fix3_28_t a) {
|
||||
static inline int16_t norm_fix3_28_to_s16sample(fix3_28_t a) {
|
||||
// Handle rounding up front, adding one can cause an overflow/underflow
|
||||
|
||||
// It's not clear exactly how this works, so we'll disable it for now.
|
||||
|
@ -110,8 +74,7 @@ int16_t norm_fix3_28_to_s16sample(fix3_28_t a) {
|
|||
return (a >> 12);
|
||||
}
|
||||
|
||||
|
||||
fix3_28_t fix3_28_from_dbl(double a) {
|
||||
static inline fix3_28_t fix3_28_from_dbl(double a) {
|
||||
double temp = a * fix16_one;
|
||||
temp += (double)((temp >= 0) ? 0.5f : -0.5f);
|
||||
return (fix3_28_t)temp;
|
||||
|
@ -121,7 +84,7 @@ fix3_28_t fix3_28_from_dbl(double a) {
|
|||
/// @param inArg0 Q3.28 format fixed point number.
|
||||
/// @param inArg1 Q3.28 format fixed point number.
|
||||
/// @return A Q3.28 fixed point number that represents the truncated result of inArg0 x inArg1.
|
||||
fix3_28_t fix16_mul(fix3_28_t inArg0, fix3_28_t inArg1) {
|
||||
static inline fix3_28_t fix16_mul(fix3_28_t inArg0, fix3_28_t inArg1) {
|
||||
const int64_t product = (int64_t)inArg0 * inArg1;
|
||||
|
||||
/* Since we're expecting 2 Q3.28 numbers, the multiplication result should be a Q7.56 number.
|
||||
|
@ -143,5 +106,4 @@ fix3_28_t fix16_mul(fix3_28_t inArg0, fix3_28_t inArg1) {
|
|||
}
|
||||
#endif
|
||||
return result;
|
||||
}
|
||||
#endif
|
||||
}
|
|
@ -64,7 +64,7 @@ void i2s_write_init(i2s_obj_t *self) {
|
|||
self->prog_offset + self->pio_program->length - 1);
|
||||
pio_sm_set_config(self->pio, self->sm, &config);
|
||||
|
||||
uint8_t *rbs = malloc(sizeof(uint8_t) * RINGBUF_LEN_IN_BYTES);
|
||||
uint32_t *rbs = malloc(sizeof(uint8_t) * RINGBUF_LEN_IN_BYTES);
|
||||
ringbuf_init(&self->ring_buffer, rbs, RINGBUF_LEN_IN_BYTES);
|
||||
|
||||
irq_set_exclusive_handler(DMA_IRQ_1, dma_irq_write_handler);
|
||||
|
@ -169,27 +169,27 @@ uint8_t *dma_get_buffer(i2s_obj_t *i2s_obj, uint channel) {
|
|||
void feed_dma(i2s_obj_t *self, uint8_t *dma_buffer_p) {
|
||||
// when data exists, copy samples from ring buffer
|
||||
if (ringbuf_available_data(&self->ring_buffer) >= SIZEOF_HALF_DMA_BUFFER_IN_BYTES) {
|
||||
for (uint32_t i = 0; i < SIZEOF_HALF_DMA_BUFFER_IN_BYTES; i++)
|
||||
ringbuf_pop(&self->ring_buffer, &dma_buffer_p[i]);
|
||||
for (uint32_t i = 0; i < SIZEOF_HALF_DMA_BUFFER_IN_BYTES; i+=4)
|
||||
ringbuf_pop(&self->ring_buffer, (uint32_t*)&dma_buffer_p[i]);
|
||||
} else {
|
||||
// underflow. clear buffer to transmit "silence" on the I2S bus
|
||||
memset(dma_buffer_p, 0, SIZEOF_HALF_DMA_BUFFER_IN_BYTES);
|
||||
}
|
||||
}
|
||||
|
||||
uint i2s_stream_write(i2s_obj_t *self, const uint8_t *buf_out, uint size) {
|
||||
uint i2s_stream_write(i2s_obj_t *self, const uint32_t *buf_out, uint size) {
|
||||
if (size == 0) {
|
||||
//printf("ERROR: buffer can't be length zero");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
uint32_t num_bytes_written = copy_userbuf_to_ringbuf(self, buf_out, size);
|
||||
return num_bytes_written;
|
||||
uint32_t num_words_written = copy_userbuf_to_ringbuf(self, buf_out, size);
|
||||
return num_words_written;
|
||||
}
|
||||
|
||||
// TODO maybe we can skip every fourth byte, if we're doing this in 24-bit...
|
||||
// could save on some processing power
|
||||
uint32_t copy_userbuf_to_ringbuf(i2s_obj_t *self, const uint8_t *buf_out, uint size) {
|
||||
uint32_t copy_userbuf_to_ringbuf(i2s_obj_t *self, const uint32_t *buf_out, uint size) {
|
||||
uint32_t a_index = 0;
|
||||
|
||||
while (a_index < size) {
|
||||
|
|
|
@ -59,7 +59,7 @@ typedef struct _i2s_obj_t {
|
|||
extern i2s_obj_t i2s_write_obj;
|
||||
|
||||
void i2s_write_init(i2s_obj_t *);
|
||||
uint i2s_stream_write(i2s_obj_t *, const uint8_t *, uint);
|
||||
uint i2s_stream_write(i2s_obj_t *, const uint32_t *, uint);
|
||||
|
||||
void dma_irq_handler(uint8_t);
|
||||
void dma_irq_write_handler(void);
|
||||
|
@ -68,6 +68,6 @@ void dma_configure(i2s_obj_t *);
|
|||
uint8_t *dma_get_buffer(i2s_obj_t *, uint);
|
||||
void feed_dma(i2s_obj_t *, uint8_t *);
|
||||
|
||||
uint32_t copy_userbuf_to_ringbuf(i2s_obj_t *, const uint8_t *, uint);
|
||||
uint32_t copy_userbuf_to_ringbuf(i2s_obj_t *, const uint32_t *, uint);
|
||||
|
||||
#endif
|
|
@ -33,14 +33,14 @@
|
|||
// - Sequential atomic operations
|
||||
// One byte of capacity is used to detect buffer empty/full
|
||||
|
||||
void ringbuf_init(ring_buf_t *rbuf, uint8_t *buffer, size_t size) {
|
||||
void ringbuf_init(ring_buf_t *rbuf, uint32_t *buffer, size_t size) {
|
||||
rbuf->buffer = buffer;
|
||||
rbuf->size = size;
|
||||
rbuf->head = 0;
|
||||
rbuf->tail = 0;
|
||||
}
|
||||
|
||||
bool ringbuf_push(ring_buf_t *rbuf, uint8_t data) {
|
||||
bool ringbuf_push(ring_buf_t *rbuf, uint32_t data) {
|
||||
size_t next_tail = (rbuf->tail + 1) % rbuf->size;
|
||||
|
||||
if (next_tail != rbuf->head) {
|
||||
|
@ -53,7 +53,7 @@ bool ringbuf_push(ring_buf_t *rbuf, uint8_t data) {
|
|||
return false;
|
||||
}
|
||||
|
||||
bool ringbuf_pop(ring_buf_t *rbuf, uint8_t *data) {
|
||||
bool ringbuf_pop(ring_buf_t *rbuf, uint32_t *data) {
|
||||
if (rbuf->head == rbuf->tail) {
|
||||
// empty
|
||||
return false;
|
||||
|
|
|
@ -28,15 +28,15 @@
|
|||
#include "pico/stdlib.h"
|
||||
|
||||
typedef struct _ring_buf_t {
|
||||
uint8_t *buffer;
|
||||
uint32_t *buffer;
|
||||
size_t head;
|
||||
size_t tail;
|
||||
size_t size;
|
||||
} ring_buf_t;
|
||||
|
||||
void ringbuf_init(ring_buf_t *, uint8_t *, size_t);
|
||||
bool ringbuf_push(ring_buf_t *, uint8_t );
|
||||
bool ringbuf_pop(ring_buf_t *, uint8_t *);
|
||||
void ringbuf_init(ring_buf_t *, uint32_t *, size_t);
|
||||
bool ringbuf_push(ring_buf_t *, uint32_t );
|
||||
bool ringbuf_pop(ring_buf_t *, uint32_t *);
|
||||
bool ringbuf_is_empty(ring_buf_t *);
|
||||
bool ringbuf_is_full(ring_buf_t *);
|
||||
size_t ringbuf_available_data(ring_buf_t *);
|
||||
|
|
|
@ -123,83 +123,60 @@ static void __no_inline_not_in_flash_func(_as_audio_packet)(struct usb_endpoint
|
|||
int16_t *in = (int16_t *) usb_buffer->data;
|
||||
int32_t *out = (int32_t *) userbuf;
|
||||
int samples = usb_buffer->data_len / 2;
|
||||
|
||||
multicore_fifo_push_blocking(CORE0_READY);
|
||||
multicore_fifo_push_blocking((uintptr_t) in);
|
||||
|
||||
// TODO: For some reason if we try to process in from both cores the left and right channels
|
||||
// flip back and forth..
|
||||
if (preprocessing.reverse_stereo) {
|
||||
for (int i = 0; i < samples; i+=2) {
|
||||
out[i] = fix16_mul(norm_fix3_28_from_s16sample(in[i+1]), preprocessing.preamp);
|
||||
out[i+1] = fix16_mul(norm_fix3_28_from_s16sample(in[i]), preprocessing.preamp);
|
||||
}
|
||||
}
|
||||
else {
|
||||
for (int i = 0; i < samples; i++)
|
||||
out[i] = fix16_mul(norm_fix3_28_from_s16sample(in[i]), preprocessing.preamp);
|
||||
}
|
||||
|
||||
multicore_fifo_push_blocking(samples);
|
||||
|
||||
if (preprocessing.reverse_stereo) {
|
||||
in++;
|
||||
}
|
||||
for (int i = 0; i < samples; i += 2) {
|
||||
// Preamp the sample
|
||||
fix3_28_t x_f16 = fix16_mul(norm_fix3_28_from_s16sample((int16_t) in[i]), preprocessing.preamp);
|
||||
|
||||
// Run the filters
|
||||
for (int j = 0; j < filter_stages; j++) {
|
||||
x_f16 = bqf_transform(x_f16, &bqf_filters_left[j], &bqf_filters_mem_left[j]);
|
||||
out[i] = bqf_transform(out[i], &bqf_filters_left[j], &bqf_filters_mem_left[j]);
|
||||
}
|
||||
// Convert back to sample
|
||||
out[i] = (int32_t) norm_fix3_28_to_s16sample(x_f16);
|
||||
out[i] = (int32_t) norm_fix3_28_to_s16sample(out[i]);
|
||||
}
|
||||
|
||||
// Signal to core 1 that we have processed our samples, so it can write to I2S
|
||||
multicore_fifo_push_blocking(CORE0_READY);
|
||||
|
||||
update_volume();
|
||||
apply_config_changes();
|
||||
|
||||
// keep on truckin'
|
||||
usb_grow_transfer(ep->current_transfer, 1);
|
||||
usb_packet_done(ep);
|
||||
}
|
||||
|
||||
void __no_inline_not_in_flash_func(core1_entry)() {
|
||||
uint8_t *userbuf = (uint8_t *) multicore_fifo_pop_blocking();
|
||||
uint32_t *userbuf = (uint32_t *) multicore_fifo_pop_blocking();
|
||||
int32_t *out = (int32_t *) userbuf;
|
||||
int limit_counter = 100;
|
||||
|
||||
// Signal that the thread has started
|
||||
multicore_fifo_push_blocking(CORE1_READY);
|
||||
|
||||
while (true) {
|
||||
// Block until the userbuf is filled with data
|
||||
uint32_t ready = multicore_fifo_pop_blocking();
|
||||
while (ready != CORE0_READY)
|
||||
ready = multicore_fifo_pop_blocking();
|
||||
|
||||
int16_t *in = (int16_t *) multicore_fifo_pop_blocking();
|
||||
const uint32_t samples = multicore_fifo_pop_blocking();
|
||||
|
||||
if (preprocessing.reverse_stereo) {
|
||||
in--;
|
||||
}
|
||||
for (int i = 1; i < samples; i += 2) {
|
||||
// Preamp the sample
|
||||
fix3_28_t x_f16 = fix16_mul(norm_fix3_28_from_s16sample((int16_t) in[i]), preprocessing.preamp);
|
||||
|
||||
// Run the filters
|
||||
for (int j = 0; j < filter_stages; j++) {
|
||||
x_f16 = bqf_transform(x_f16, &bqf_filters_right[j], &bqf_filters_mem_right[j]);
|
||||
out[i] = bqf_transform(out[i], &bqf_filters_right[j], &bqf_filters_mem_right[j]);
|
||||
}
|
||||
// Convert back to sample
|
||||
out[i] = (int32_t) norm_fix3_28_to_s16sample(x_f16);
|
||||
out[i] = (int32_t) norm_fix3_28_to_s16sample(out[i]);
|
||||
}
|
||||
|
||||
// Update the volume and filter configs if required. We do this from
|
||||
// core1 as core0 is more heavily loaded, doing this from core0 can
|
||||
// lead to audio crackling.
|
||||
// Use of a counter reduces the amount of crackling when changing
|
||||
// volume.
|
||||
if (limit_counter != 0)
|
||||
limit_counter--;
|
||||
else {
|
||||
limit_counter = 100;
|
||||
update_volume();
|
||||
apply_config_changes();
|
||||
}
|
||||
|
||||
// Signal to core 0 that the data has all been transformed
|
||||
multicore_fifo_push_blocking(CORE1_READY);
|
||||
|
||||
i2s_stream_write(&i2s_write_obj, userbuf, samples * 4);
|
||||
// Wait for Core 0 to finish running its filtering before we apply config updates
|
||||
multicore_fifo_pop_blocking();
|
||||
i2s_stream_write(&i2s_write_obj, userbuf, samples);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue