Enable oratory's 15 band EQ (#23)

* Attempt at optimizing so the new filtering runs better.

* Additional improvements.

* Further optimisations.

* Seems to work OK with 10 filters. Just noise with 11.

* Increase config buffer size, and make the bqf_transform function inline

* Remove extra loop and process input evently across both cores.

* Enable 15 band EQ.

* Shift some load of core1

* Revert buffer size change

* Mark USB transfer as done sooner.

* Fast multiply.

* Fix build failure.

* Rollback changes we dont need.

* Fix save config to flash

* Increase filter stages to 20. We cant quite run that many though.

* Replace a few doubles with floats. According to the raspberry-pi-pico-c-sdk manual, doubles are around 3 times slower than floats.
This commit is contained in:
George Norton 2023-08-23 20:47:01 +01:00 committed by GitHub
parent fd6cbf54d5
commit 1e6896f918
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 129 additions and 137 deletions

View File

@ -14,7 +14,6 @@ add_executable(ploopy_headphones
run.c
ringbuf.c
i2s.c
fix16.c
bqf.c
configuration_manager.c
)
@ -63,6 +62,9 @@ target_compile_definitions(ploopy_headphones PRIVATE
GIT_HASH="${GIT_HASH}"
PICO_XOSC_STARTUP_DELAY_MULTIPLIER=64
# Performance, avoid calls to ____wrap___aeabi_lmul_veneer when doing 64bit multiplies
PICO_INT64_OPS_IN_RAM=1
)
pico_enable_stdio_usb(ploopy_headphones 0)

View File

@ -467,21 +467,6 @@ void bqf_highshelf_config(double fs, double f0, double dBgain, double Q,
coefficients->a2 = fix3_28_from_dbl(a2);
}
fix3_28_t bqf_transform(fix3_28_t x, bqf_coeff_t *coefficients, bqf_mem_t *memory) {
fix3_28_t y = fix16_mul(coefficients->b0, x) -
fix16_mul(coefficients->a1, memory->y_1) +
fix16_mul(coefficients->b1, memory->x_1) -
fix16_mul(coefficients->a2, memory->y_2) +
fix16_mul(coefficients->b2, memory->x_2);
memory->x_2 = memory->x_1;
memory->x_1 = x;
memory->y_2 = memory->y_1;
memory->y_1 = y;
return y;
}
void bqf_memreset(bqf_mem_t *memory) {
memory->x_1 = fix16_zero;
memory->x_2 = fix16_zero;

View File

@ -41,9 +41,9 @@ typedef struct _bqf_mem_t {
fix3_28_t y_2;
} bqf_mem_t;
// In reality we do not have enough CPU resource to run 8 filtering
// stages without some optimisation.
#define MAX_FILTER_STAGES 8
// More filters should be possible, but the config structure
// might grow beyond the current 512 byte size.
#define MAX_FILTER_STAGES 20
extern int filter_stages;
extern bqf_coeff_t bqf_filters_left[MAX_FILTER_STAGES];
@ -65,7 +65,8 @@ void bqf_peaking_config(double, double, double, double, bqf_coeff_t *);
void bqf_lowshelf_config(double, double, double, double, bqf_coeff_t *);
void bqf_highshelf_config(double, double, double, double, bqf_coeff_t *);
fix3_28_t bqf_transform(fix3_28_t, bqf_coeff_t *, bqf_mem_t *);
static inline fix3_28_t bqf_transform(fix3_28_t, bqf_coeff_t *, bqf_mem_t *);
void bqf_memreset(bqf_mem_t *);
#include "bqf.inl"
#endif

36
firmware/code/bqf.inl Normal file
View File

@ -0,0 +1,36 @@
/**
* Copyright 2022 Colin Lam, Ploopy Corporation
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* SPECIAL THANKS TO:
* Robert Bristow-Johnson, a.k.a. RBJ
* for his exceptional work on biquad formulae as applied to digital
* audio filtering, summarised in his pamphlet, "Audio EQ Cookbook".
*/
static inline fix3_28_t bqf_transform(fix3_28_t x, bqf_coeff_t *coefficients, bqf_mem_t *memory) {
fix3_28_t y = fix16_mul(coefficients->b0, x) -
fix16_mul(coefficients->a1, memory->y_1) +
fix16_mul(coefficients->b1, memory->x_1) -
fix16_mul(coefficients->a2, memory->y_2) +
fix16_mul(coefficients->b2, memory->x_2);
memory->x_2 = memory->x_1;
memory->x_1 = x;
memory->y_2 = memory->y_1;
memory->y_1 = y;
return y;
}

View File

@ -52,16 +52,23 @@ static const default_configuration default_config = {
.set_configuration = { SET_CONFIGURATION, sizeof(default_config) },
.filters = {
.filter = { FILTER_CONFIGURATION, sizeof(default_config.filters) },
.f1 = { PEAKING, {0}, 40, -20, 1.4 },
.f2 = { LOWSHELF, {0}, 105, 2.5, 0.7 },
.f3 = { PEAKING, {0}, 450, 7, 1.8 },
.f4 = { PEAKING, {0}, 2100, 8, 3.0 },
.f5 = { PEAKING, {0}, 3500, -7.5, 2.9 },
.f6 = { PEAKING, {0}, 5200, 5.5, 3.0 },
.f7 = { PEAKING, {0}, 6400, -19, 4.0 },
.f8 = { PEAKING, {0}, 9000, 3.0, 2.0 }
.f1 = { PEAKING, {0}, 38.5, -21.0, 1.4 },
.f2 = { PEAKING, {0}, 60, -6.7, 0.5 },
.f3 = { LOWSHELF, {0}, 105, 5.5, 0.71 },
.f4 = { PEAKING, {0}, 280, -3.5, 1.1 },
.f5 = { PEAKING, {0}, 350, -1.6, 6.0 },
.f6 = { PEAKING, {0}, 425, 7.8, 1.3 },
.f7 = { PEAKING, {0}, 500, -2.0, 7.0 },
.f8 = { PEAKING, {0}, 690, -5.5, 3.0 },
.f9 = { PEAKING, {0}, 1000, -2.2, 5.0 },
.f10 = { PEAKING, {0}, 1530, -4.0, 2.5 },
.f11 = { PEAKING, {0}, 2250, 6.0, 2.0 },
.f12 = { PEAKING, {0}, 3430, -12.2, 2.0 },
.f13 = { PEAKING, {0}, 4800, 4.0, 2.0 },
.f14 = { PEAKING, {0}, 6200, -15.0, 3.0 },
.f15 = { HIGHSHELF, {0}, 12000, -6.0, 0.71 }
},
.preprocessing = { .header = { PREPROCESSING_CONFIGURATION, sizeof(default_config.preprocessing) }, -0.16f, true, {0} }
.preprocessing = { .header = { PREPROCESSING_CONFIGURATION, sizeof(default_config.preprocessing) }, -0.08f, true, {0} }
};
// Grab the last 4k page of flash for our configuration strutures.
@ -74,7 +81,7 @@ const uint8_t *user_configuration = (const uint8_t *) (XIP_BASE + USER_CONFIGURA
* should handle merging configurations where, for example, only a new
* filter_configuration_tlv was received.
*/
#define CFG_BUFFER_SIZE 256
#define CFG_BUFFER_SIZE 512
static uint8_t working_configuration[2][CFG_BUFFER_SIZE];
static uint8_t inactive_working_configuration = 0;
static uint8_t result_buffer[CFG_BUFFER_SIZE] = { U16_TO_U8S_LE(NOK), U16_TO_U8S_LE(0) };
@ -129,7 +136,7 @@ bool validate_filter_configuration(filter_configuration_tlv *filters)
printf("Error! Not enough data left for filter6 (%d)\n", remaining);
return false;
}
if (args->a0 == 0.0) {
if (args->a0 == 0.0f) {
printf("Error! The a0 co-efficient of an IIR filter must not be 0.\n");
return false;
}
@ -182,7 +189,7 @@ void apply_filter_configuration(filter_configuration_tlv *filters) {
uint32_t checksum = 0;
for (int i = 0; i < sizeof(filter6) / 4; i++) checksum ^= ((uint32_t*) args)[i];
if (checksum != bqf_filter_checksum[filter_stages]) {
bqf_filters_left[filter_stages].a0 = fix3_28_from_dbl(1.0);
bqf_filters_left[filter_stages].a0 = fix16_one;
bqf_filters_left[filter_stages].a1 = fix3_28_from_dbl(args->a1/args->a0);
bqf_filters_left[filter_stages].a2 = fix3_28_from_dbl(args->a2/args->a0);
bqf_filters_left[filter_stages].b0 = fix3_28_from_dbl(args->b0/args->a0);
@ -308,7 +315,7 @@ bool apply_configuration(tlv_header *config) {
#ifndef TEST_TARGET
case PREPROCESSING_CONFIGURATION: {
preprocessing_configuration_tlv* preprocessing_config = (preprocessing_configuration_tlv*) tlv;
preprocessing.preamp = fix3_28_from_dbl(1.0 + preprocessing_config->preamp);
preprocessing.preamp = fix3_28_from_flt(1.0f + preprocessing_config->preamp);
preprocessing.reverse_stereo = preprocessing_config->reverse_stereo;
break;
}
@ -352,7 +359,7 @@ bool __no_inline_not_in_flash_func(save_configuration)() {
const size_t config_length = config->length - ((size_t)config->value - (size_t)config);
// Write data to flash
uint8_t flash_buffer[FLASH_PAGE_SIZE];
uint8_t flash_buffer[CFG_BUFFER_SIZE];
flash_header_tlv* flash_header = (flash_header_tlv*) flash_buffer;
flash_header->header.type = FLASH_HEADER;
flash_header->header.length = sizeof(flash_header_tlv) + config_length;
@ -362,7 +369,7 @@ bool __no_inline_not_in_flash_func(save_configuration)() {
uint32_t ints = save_and_disable_interrupts();
flash_range_erase(USER_CONFIGURATION_OFFSET, FLASH_SECTOR_SIZE);
flash_range_program(USER_CONFIGURATION_OFFSET, flash_buffer, FLASH_PAGE_SIZE);
flash_range_program(USER_CONFIGURATION_OFFSET, flash_buffer, CFG_BUFFER_SIZE);
restore_interrupts(ints);
power_up_dac();

View File

@ -17,8 +17,8 @@
#include <stdint.h>
#define FLASH_MAGIC 0x2E8AFEDD
#define CONFIG_VERSION 2
#define MINIMUM_CONFIG_VERSION 1
#define CONFIG_VERSION 3
#define MINIMUM_CONFIG_VERSION 3
enum structure_types {
// Commands/Responses, these are container TLVs. The Value will be a set of TLV structures.
@ -53,18 +53,20 @@ typedef struct __attribute__((__packed__)) _tlv_header {
typedef struct __attribute__((__packed__)) _filter2 {
uint8_t type;
uint8_t reserved[3];
double f0;
double Q;
float f0;
float Q;
} filter2;
typedef struct __attribute__((__packed__)) _filter3 {
uint8_t type;
uint8_t reserved[3];
double f0;
double db_gain;
double Q;
float f0;
float db_gain;
float Q;
} filter3;
// WARNING: We wont be able to support more than 8 of these filters
// due to the config structure size.
typedef struct __attribute__((__packed__)) _filter6 {
uint8_t type;
uint8_t reserved[3];
@ -98,7 +100,7 @@ typedef struct __attribute__((__packed__)) _flash_header_tlv {
typedef struct __attribute__((__packed__)) _preprocessing_configuration_tlv {
tlv_header header;
double preamp;
float preamp;
uint8_t reverse_stereo;
uint8_t reserved[3];
} preprocessing_configuration_tlv;
@ -137,6 +139,13 @@ typedef struct __attribute__((__packed__)) _default_configuration {
filter3 f6;
filter3 f7;
filter3 f8;
filter3 f9;
filter3 f10;
filter3 f11;
filter3 f12;
filter3 f13;
filter3 f14;
filter3 f15;
} filters;
preprocessing_configuration_tlv preprocessing;
} default_configuration;

View File

@ -25,13 +25,6 @@
#include <stdbool.h>
#include <inttypes.h>
// During development, it can be useful to run with real double values for reference.
//#define USE_DOUBLE
#ifdef USE_DOUBLE
typedef double fix16_t;
static const fix16_t fix16_zero = 0;
static const fix16_t fix16_one = 1;
#else
/// @brief Fixed point math type, in format Q3.28. One sign bit, 3 bits for left-of-decimal
///and 28 for right-of-decimal. This arrangment works because we normalize the incoming USB
@ -46,15 +39,15 @@ static const fix3_28_t fix16_one = 0x10000000;
/// @brief Represents zero in fixed point world.
static const fix3_28_t fix16_zero = 0x00000000;
#endif
static inline fix3_28_t norm_fix3_28_from_s16sample(int16_t);
static inline int16_t norm_fix3_28_to_s16sample(fix3_28_t);
fix3_28_t norm_fix3_28_from_s16sample(int16_t);
static inline fix3_28_t fix3_28_from_flt(float);
int16_t norm_fix3_28_to_s16sample(fix3_28_t);
static inline fix3_28_t fix3_28_from_dbl(double);
fix3_28_t fix3_28_from_dbl(double);
fix3_28_t fix16_mul(fix3_28_t, fix3_28_t);
static inline fix3_28_t fix16_mul(fix3_28_t, fix3_28_t);
#include "fix16.inl"
#endif

View File

@ -25,46 +25,10 @@
#include <limits.h>
#include "fix16.h"
#ifdef USE_DOUBLE
fix16_t fix16_from_s16sample(int16_t a) {
return a;
}
int16_t fix16_to_s16sample(fix16_t a) {
// Handle rounding up front, adding one can cause an overflow/underflow
if (a < 0) {
a -= 0.5;
} else {
a += 0.5;
}
// Saturate the value if an overflow has occurred
if (a < SHRT_MIN) {
return SHRT_MIN;
}
if (a < SHRT_MAX) {
return SHRT_MAX;
}
return a;
}
fix16_t fix16_from_dbl(double a) {
return a;
}
double fix16_to_dbl(fix16_t a) {
return a;
}
fix16_t fix16_mul(fix16_t inArg0, fix16_t inArg1) {
return inArg0 * inArg1;
}
#else
/// @brief Produces a fixed point number from a 16-bit signed integer, normalized to ]-1,1[.
/// @param a Signed 16-bit integer.
/// @return A fixed point number in Q3.28 format, with input normalized to ]-1,1[.
fix3_28_t norm_fix3_28_from_s16sample(int16_t a) {
static inline fix3_28_t norm_fix3_28_from_s16sample(int16_t a) {
/* So, we're using a Q3.28 fixed point system here, and we want the incoming
audio signal to be represented as a number between -1 and 1. To do this,
we need the 16-bit value to map to the 28-bit right-of-decimal field in
@ -79,7 +43,7 @@ fix3_28_t norm_fix3_28_from_s16sample(int16_t a) {
/// calculated sample to one that the DAC can understand.
/// @param a
/// @return Signed 16-bit integer.
int16_t norm_fix3_28_to_s16sample(fix3_28_t a) {
static inline int16_t norm_fix3_28_to_s16sample(fix3_28_t a) {
// Handle rounding up front, adding one can cause an overflow/underflow
// It's not clear exactly how this works, so we'll disable it for now.
@ -110,8 +74,13 @@ int16_t norm_fix3_28_to_s16sample(fix3_28_t a) {
return (a >> 12);
}
static inline fix3_28_t fix3_28_from_flt(float a) {
float temp = a * fix16_one;
temp += ((temp >= 0) ? 0.5f : -0.5f);
return (fix3_28_t)temp;
}
fix3_28_t fix3_28_from_dbl(double a) {
static inline fix3_28_t fix3_28_from_dbl(double a) {
double temp = a * fix16_one;
temp += (double)((temp >= 0) ? 0.5f : -0.5f);
return (fix3_28_t)temp;
@ -121,27 +90,22 @@ fix3_28_t fix3_28_from_dbl(double a) {
/// @param inArg0 Q3.28 format fixed point number.
/// @param inArg1 Q3.28 format fixed point number.
/// @return A Q3.28 fixed point number that represents the truncated result of inArg0 x inArg1.
fix3_28_t fix16_mul(fix3_28_t inArg0, fix3_28_t inArg1) {
const int64_t product = (int64_t)inArg0 * inArg1;
static inline fix3_28_t fix16_mul(fix3_28_t inArg0, fix3_28_t inArg1) {
int32_t A = (inArg0 >> 14), C = (inArg1 >> 14);
uint32_t B = (inArg0 & 0x3FFF), D = (inArg1 & 0x3FFF);
int32_t AC = A*C;
int32_t AD_CB = A*D + C*B;
int32_t product_hi = AC + (AD_CB >> 14);
/* Since we're expecting 2 Q3.28 numbers, the multiplication result should be a Q7.56 number.
To bring this number back to the right order of magnitude, we need to shift
it to the right by 28. */
fix3_28_t result = product >> 28;
#if HANDLE_CARRY
// Handle carry from lower bits to upper part of result.
uint32_t BD = B*D;
uint32_t ad_cb_temp = AD_CB << 14;
uint32_t product_lo = BD + ad_cb_temp;
// Handle rounding where we are choppping off low order bits
// Disabled for now, too much load. We get crackling when adjusting
// the volume.
#if 0
if (product & 0x4000) {
if (result >= 0) {
result++;
}
else {
result--;
}
}
#endif
return result;
}
#endif
if (product_lo < BD)
product_hi++;
#endif
return product_hi;
}

View File

@ -70,4 +70,4 @@ void feed_dma(i2s_obj_t *, uint8_t *);
uint32_t copy_userbuf_to_ringbuf(i2s_obj_t *, const uint8_t *, uint);
#endif
#endif

View File

@ -78,4 +78,4 @@ size_t ringbuf_available_data(ring_buf_t *rbuf) {
size_t ringbuf_available_space(ring_buf_t *rbuf) {
return rbuf->size - ringbuf_available_data(rbuf) - 1;
}
}

View File

@ -42,4 +42,4 @@ bool ringbuf_is_full(ring_buf_t *);
size_t ringbuf_available_data(ring_buf_t *);
size_t ringbuf_available_space(ring_buf_t *);
#endif
#endif

View File

@ -118,7 +118,7 @@ static void update_volume()
// PCM data into I2S data that gets shipped out to the PCM3060. It really
// belongs with the other USB-related code due to its utter indecipherability,
// but it's placed here to emphasize its importance.
static void _as_audio_packet(struct usb_endpoint *ep) {
static void __no_inline_not_in_flash_func(_as_audio_packet)(struct usb_endpoint *ep) {
struct usb_buffer *usb_buffer = usb_current_out_packet_buffer(ep);
int16_t *in = (int16_t *) usb_buffer->data;
int32_t *out = (int32_t *) userbuf;
@ -156,15 +156,22 @@ static void _as_audio_packet(struct usb_endpoint *ep) {
uint32_t ready = multicore_fifo_pop_blocking();
multicore_fifo_push_blocking(CORE0_READY);
// Update the volume if required. We do this from core1 as
// core0 is more heavily loaded, doing this from core0 can
// lead to audio crackling.
update_volume();
// Update filters if required
apply_config_changes();
// keep on truckin'
usb_grow_transfer(ep->current_transfer, 1);
usb_packet_done(ep);
}
void core1_entry() {
void __no_inline_not_in_flash_func(core1_entry)() {
uint8_t *userbuf = (uint8_t *) multicore_fifo_pop_blocking();
int32_t *out = (int32_t *) userbuf;
int limit_counter = 100;
// Signal that the thread has started
multicore_fifo_push_blocking(CORE1_READY);
@ -191,19 +198,6 @@ void core1_entry() {
}
}
// Update the volume and filter configs if required. We do this from
// core1 as core0 is more heavily loaded, doing this from core0 can
// lead to audio crackling.
// Use of a counter reduces the amount of crackling when changing
// volume.
if (limit_counter != 0)
limit_counter--;
else {
limit_counter = 100;
update_volume();
apply_config_changes();
}
// Signal to core 0 that the data has all been transformed
multicore_fifo_push_blocking(CORE1_READY);
@ -253,7 +247,7 @@ void setup() {
// The PCM3060 supports standard mode (100kbps) or fast mode (400kbps)
// we run in fast mode so we dont block the core for too long while
// updating the volume.
i2c_init(i2c0, 100000);
i2c_init(i2c0, 400000);
gpio_set_function(PCM3060_SDA_PIN, GPIO_FUNC_I2C);
gpio_set_function(PCM3060_SCL_PIN, GPIO_FUNC_I2C);
gpio_pull_up(PCM3060_SDA_PIN);
@ -297,6 +291,7 @@ void setup() {
* IF YOU DO, YOU COULD BLOW UP YOUR HARDWARE! *
* YOU WERE WARNED!!!!!!!!!!!!!!!! *
****************************************************************************/
// TODO: roundf will be much faster than round, but it might mess with timings
void configure_neg_switch_pwm() {
gpio_set_function(NEG_SWITCH_PWM_PIN, GPIO_FUNC_PWM);
uint slice_num = pwm_gpio_to_slice_num(NEG_SWITCH_PWM_PIN);