Seems to work OK with 10 filters. Just noise with 11.
This commit is contained in:
		
							parent
							
								
									d2bb88a9fb
								
							
						
					
					
						commit
						635dac208a
					
				|  | @ -14,7 +14,6 @@ add_executable(ploopy_headphones | |||
|     run.c | ||||
|     ringbuf.c | ||||
|     i2s.c | ||||
|     fix16.c | ||||
|     bqf.c | ||||
|     configuration_manager.c | ||||
| ) | ||||
|  |  | |||
|  | @ -43,7 +43,7 @@ typedef struct _bqf_mem_t { | |||
| 
 | ||||
| // In reality we do not have enough CPU resource to run 8 filtering
 | ||||
| // stages without some optimisation.
 | ||||
| #define MAX_FILTER_STAGES 8 | ||||
| #define MAX_FILTER_STAGES 10 | ||||
| extern int filter_stages; | ||||
| 
 | ||||
| extern bqf_coeff_t bqf_filters_left[MAX_FILTER_STAGES]; | ||||
|  |  | |||
|  | @ -25,13 +25,6 @@ | |||
| #include <stdbool.h> | ||||
| #include <inttypes.h> | ||||
| 
 | ||||
| // During development, it can be useful to run with real double values for reference.
 | ||||
| //#define USE_DOUBLE
 | ||||
| #ifdef USE_DOUBLE | ||||
| typedef double fix16_t; | ||||
| static const fix16_t fix16_zero = 0; | ||||
| static const fix16_t fix16_one = 1; | ||||
| #else | ||||
| 
 | ||||
| /// @brief Fixed point math type, in format Q3.28. One sign bit, 3 bits for left-of-decimal
 | ||||
| ///and 28 for right-of-decimal. This arrangment works because we normalize the incoming USB
 | ||||
|  | @ -46,15 +39,13 @@ static const fix3_28_t fix16_one =    0x10000000; | |||
| /// @brief Represents zero in fixed point world.
 | ||||
| static const fix3_28_t fix16_zero = 0x00000000; | ||||
| 
 | ||||
| #endif | ||||
| static inline fix3_28_t norm_fix3_28_from_s16sample(int16_t); | ||||
| 
 | ||||
| static inline int16_t norm_fix3_28_to_s16sample(fix3_28_t); | ||||
| 
 | ||||
| fix3_28_t norm_fix3_28_from_s16sample(int16_t); | ||||
| static inline fix3_28_t fix3_28_from_dbl(double); | ||||
| 
 | ||||
| int16_t norm_fix3_28_to_s16sample(fix3_28_t); | ||||
| 
 | ||||
| fix3_28_t fix3_28_from_dbl(double); | ||||
| 
 | ||||
| fix3_28_t fix16_mul(fix3_28_t, fix3_28_t); | ||||
| static inline fix3_28_t fix16_mul(fix3_28_t, fix3_28_t); | ||||
| 
 | ||||
| #include "fix16.inl" | ||||
| #endif | ||||
|  | @ -25,46 +25,10 @@ | |||
| #include <limits.h> | ||||
| #include "fix16.h" | ||||
| 
 | ||||
| #ifdef USE_DOUBLE | ||||
| fix16_t fix16_from_s16sample(int16_t a) { | ||||
|     return a; | ||||
| } | ||||
| 
 | ||||
| int16_t fix16_to_s16sample(fix16_t a) { | ||||
|     // Handle rounding up front, adding one can cause an overflow/underflow | ||||
|     if (a < 0) { | ||||
|         a -= 0.5; | ||||
|     } else { | ||||
|         a += 0.5; | ||||
|     } | ||||
| 
 | ||||
|     // Saturate the value if an overflow has occurred | ||||
|     if (a < SHRT_MIN) { | ||||
|         return SHRT_MIN; | ||||
|     } | ||||
|     if (a < SHRT_MAX) { | ||||
|         return SHRT_MAX; | ||||
|     } | ||||
|     return a; | ||||
| } | ||||
| 
 | ||||
| fix16_t fix16_from_dbl(double a) { | ||||
|     return a; | ||||
| } | ||||
| 
 | ||||
| double fix16_to_dbl(fix16_t a) { | ||||
|     return a; | ||||
| } | ||||
| 
 | ||||
| fix16_t fix16_mul(fix16_t inArg0, fix16_t inArg1) { | ||||
|     return inArg0 * inArg1; | ||||
| } | ||||
| #else | ||||
| 
 | ||||
| /// @brief Produces a fixed point number from a 16-bit signed integer, normalized to ]-1,1[. | ||||
| /// @param a Signed 16-bit integer. | ||||
| /// @return A fixed point number in Q3.28 format, with input normalized to ]-1,1[. | ||||
| fix3_28_t norm_fix3_28_from_s16sample(int16_t a) { | ||||
| static inline fix3_28_t norm_fix3_28_from_s16sample(int16_t a) { | ||||
|     /* So, we're using a Q3.28 fixed point system here, and we want the incoming | ||||
|        audio signal to be represented as a number between -1 and 1. To do this, | ||||
|        we need the 16-bit value to map to the 28-bit right-of-decimal field in | ||||
|  | @ -79,7 +43,7 @@ fix3_28_t norm_fix3_28_from_s16sample(int16_t a) { | |||
| ///        calculated sample to one that the DAC can understand. | ||||
| /// @param a | ||||
| /// @return Signed 16-bit integer. | ||||
| int16_t norm_fix3_28_to_s16sample(fix3_28_t a) { | ||||
| static inline int16_t norm_fix3_28_to_s16sample(fix3_28_t a) { | ||||
|     // Handle rounding up front, adding one can cause an overflow/underflow | ||||
| 
 | ||||
|     // It's not clear exactly how this works, so we'll disable it for now. | ||||
|  | @ -110,8 +74,7 @@ int16_t norm_fix3_28_to_s16sample(fix3_28_t a) { | |||
|     return (a >> 12); | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| fix3_28_t fix3_28_from_dbl(double a) { | ||||
| static inline fix3_28_t fix3_28_from_dbl(double a) { | ||||
|     double temp = a * fix16_one; | ||||
|     temp += (double)((temp >= 0) ? 0.5f : -0.5f); | ||||
|     return (fix3_28_t)temp; | ||||
|  | @ -121,7 +84,7 @@ fix3_28_t fix3_28_from_dbl(double a) { | |||
| /// @param inArg0 Q3.28 format fixed point number. | ||||
| /// @param inArg1 Q3.28 format fixed point number. | ||||
| /// @return A Q3.28 fixed point number that represents the truncated result of inArg0 x inArg1. | ||||
| fix3_28_t fix16_mul(fix3_28_t inArg0, fix3_28_t inArg1) { | ||||
| static inline fix3_28_t fix16_mul(fix3_28_t inArg0, fix3_28_t inArg1) { | ||||
|     const int64_t product = (int64_t)inArg0 * inArg1; | ||||
| 
 | ||||
|     /* Since we're expecting 2 Q3.28 numbers, the multiplication result should be a Q7.56 number. | ||||
|  | @ -143,5 +106,4 @@ fix3_28_t fix16_mul(fix3_28_t inArg0, fix3_28_t inArg1) { | |||
|     } | ||||
|     #endif | ||||
|     return result; | ||||
| } | ||||
| #endif | ||||
| } | ||||
|  | @ -64,7 +64,7 @@ void i2s_write_init(i2s_obj_t *self) { | |||
|             self->prog_offset + self->pio_program->length - 1); | ||||
|     pio_sm_set_config(self->pio, self->sm, &config); | ||||
| 
 | ||||
|     uint8_t *rbs = malloc(sizeof(uint8_t) * RINGBUF_LEN_IN_BYTES); | ||||
|     uint32_t *rbs = malloc(sizeof(uint8_t) * RINGBUF_LEN_IN_BYTES); | ||||
|     ringbuf_init(&self->ring_buffer, rbs, RINGBUF_LEN_IN_BYTES); | ||||
| 
 | ||||
|     irq_set_exclusive_handler(DMA_IRQ_1, dma_irq_write_handler); | ||||
|  | @ -169,27 +169,27 @@ uint8_t *dma_get_buffer(i2s_obj_t *i2s_obj, uint channel) { | |||
| void feed_dma(i2s_obj_t *self, uint8_t *dma_buffer_p) { | ||||
|     // when data exists, copy samples from ring buffer
 | ||||
|     if (ringbuf_available_data(&self->ring_buffer) >= SIZEOF_HALF_DMA_BUFFER_IN_BYTES) { | ||||
|         for (uint32_t i = 0; i < SIZEOF_HALF_DMA_BUFFER_IN_BYTES; i++) | ||||
|             ringbuf_pop(&self->ring_buffer, &dma_buffer_p[i]); | ||||
|         for (uint32_t i = 0; i < SIZEOF_HALF_DMA_BUFFER_IN_BYTES; i+=4) | ||||
|             ringbuf_pop(&self->ring_buffer, (uint32_t*)&dma_buffer_p[i]); | ||||
|     } else { | ||||
|         // underflow.  clear buffer to transmit "silence" on the I2S bus
 | ||||
|         memset(dma_buffer_p, 0, SIZEOF_HALF_DMA_BUFFER_IN_BYTES); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| uint i2s_stream_write(i2s_obj_t *self, const uint8_t *buf_out, uint size) { | ||||
| uint i2s_stream_write(i2s_obj_t *self, const uint32_t *buf_out, uint size) { | ||||
|     if (size == 0) { | ||||
|         //printf("ERROR: buffer can't be length zero");
 | ||||
|         exit(1); | ||||
|     } | ||||
| 
 | ||||
|     uint32_t num_bytes_written = copy_userbuf_to_ringbuf(self, buf_out, size); | ||||
|     return num_bytes_written; | ||||
|     uint32_t num_words_written = copy_userbuf_to_ringbuf(self, buf_out, size); | ||||
|     return num_words_written; | ||||
| } | ||||
| 
 | ||||
| // TODO maybe we can skip every fourth byte, if we're doing this in 24-bit...
 | ||||
| // could save on some processing power
 | ||||
| uint32_t copy_userbuf_to_ringbuf(i2s_obj_t *self, const uint8_t *buf_out, uint size) { | ||||
| uint32_t copy_userbuf_to_ringbuf(i2s_obj_t *self, const uint32_t *buf_out, uint size) { | ||||
|     uint32_t a_index = 0; | ||||
| 
 | ||||
|     while (a_index < size) { | ||||
|  |  | |||
|  | @ -59,7 +59,7 @@ typedef struct _i2s_obj_t { | |||
| extern i2s_obj_t i2s_write_obj; | ||||
| 
 | ||||
| void i2s_write_init(i2s_obj_t *); | ||||
| uint i2s_stream_write(i2s_obj_t *, const uint8_t *, uint); | ||||
| uint i2s_stream_write(i2s_obj_t *, const uint32_t *, uint); | ||||
| 
 | ||||
| void dma_irq_handler(uint8_t); | ||||
| void dma_irq_write_handler(void); | ||||
|  | @ -68,6 +68,6 @@ void dma_configure(i2s_obj_t *); | |||
| uint8_t *dma_get_buffer(i2s_obj_t *, uint); | ||||
| void feed_dma(i2s_obj_t *, uint8_t *); | ||||
| 
 | ||||
| uint32_t copy_userbuf_to_ringbuf(i2s_obj_t *, const uint8_t *, uint); | ||||
| uint32_t copy_userbuf_to_ringbuf(i2s_obj_t *, const uint32_t *, uint); | ||||
| 
 | ||||
| #endif | ||||
|  | @ -33,14 +33,14 @@ | |||
| // - Sequential atomic operations
 | ||||
| // One byte of capacity is used to detect buffer empty/full
 | ||||
| 
 | ||||
| void ringbuf_init(ring_buf_t *rbuf, uint8_t *buffer, size_t size) { | ||||
| void ringbuf_init(ring_buf_t *rbuf, uint32_t *buffer, size_t size) { | ||||
|     rbuf->buffer = buffer; | ||||
|     rbuf->size = size; | ||||
|     rbuf->head = 0; | ||||
|     rbuf->tail = 0; | ||||
| } | ||||
| 
 | ||||
| bool ringbuf_push(ring_buf_t *rbuf, uint8_t data) { | ||||
| bool ringbuf_push(ring_buf_t *rbuf, uint32_t data) { | ||||
|     size_t next_tail = (rbuf->tail + 1) % rbuf->size; | ||||
| 
 | ||||
|     if (next_tail != rbuf->head) { | ||||
|  | @ -53,7 +53,7 @@ bool ringbuf_push(ring_buf_t *rbuf, uint8_t data) { | |||
|     return false; | ||||
| } | ||||
| 
 | ||||
| bool ringbuf_pop(ring_buf_t *rbuf, uint8_t *data) { | ||||
| bool ringbuf_pop(ring_buf_t *rbuf, uint32_t *data) { | ||||
|     if (rbuf->head == rbuf->tail) { | ||||
|         // empty
 | ||||
|         return false; | ||||
|  |  | |||
|  | @ -28,15 +28,15 @@ | |||
| #include "pico/stdlib.h" | ||||
| 
 | ||||
| typedef struct _ring_buf_t { | ||||
|     uint8_t *buffer; | ||||
|     uint32_t *buffer; | ||||
|     size_t head; | ||||
|     size_t tail; | ||||
|     size_t size; | ||||
| } ring_buf_t; | ||||
| 
 | ||||
| void ringbuf_init(ring_buf_t *, uint8_t *, size_t); | ||||
| bool ringbuf_push(ring_buf_t *, uint8_t ); | ||||
| bool ringbuf_pop(ring_buf_t *, uint8_t *); | ||||
| void ringbuf_init(ring_buf_t *, uint32_t *, size_t); | ||||
| bool ringbuf_push(ring_buf_t *, uint32_t ); | ||||
| bool ringbuf_pop(ring_buf_t *, uint32_t *); | ||||
| bool ringbuf_is_empty(ring_buf_t *); | ||||
| bool ringbuf_is_full(ring_buf_t *); | ||||
| size_t ringbuf_available_data(ring_buf_t *); | ||||
|  |  | |||
|  | @ -123,83 +123,60 @@ static void __no_inline_not_in_flash_func(_as_audio_packet)(struct usb_endpoint | |||
|     int16_t *in = (int16_t *) usb_buffer->data; | ||||
|     int32_t *out = (int32_t *) userbuf; | ||||
|     int samples = usb_buffer->data_len / 2; | ||||
|   | ||||
|     multicore_fifo_push_blocking(CORE0_READY); | ||||
|     multicore_fifo_push_blocking((uintptr_t) in); | ||||
| 
 | ||||
|     // TODO: For some reason if we try to process in from both cores the left and right channels
 | ||||
|     // flip back and forth..
 | ||||
|     if (preprocessing.reverse_stereo) { | ||||
|         for (int i = 0; i < samples; i+=2) { | ||||
|             out[i] = fix16_mul(norm_fix3_28_from_s16sample(in[i+1]), preprocessing.preamp); | ||||
|             out[i+1] = fix16_mul(norm_fix3_28_from_s16sample(in[i]), preprocessing.preamp); | ||||
|         } | ||||
|     } | ||||
|     else { | ||||
|         for (int i = 0; i < samples; i++) | ||||
|             out[i] = fix16_mul(norm_fix3_28_from_s16sample(in[i]), preprocessing.preamp); | ||||
|     } | ||||
| 
 | ||||
|     multicore_fifo_push_blocking(samples); | ||||
| 
 | ||||
|     if (preprocessing.reverse_stereo) { | ||||
|         in++; | ||||
|     } | ||||
|     for (int i = 0; i < samples; i += 2) { | ||||
|         // Preamp the sample
 | ||||
|         fix3_28_t x_f16 = fix16_mul(norm_fix3_28_from_s16sample((int16_t) in[i]), preprocessing.preamp); | ||||
| 
 | ||||
|         // Run the filters
 | ||||
|         for (int j = 0; j < filter_stages; j++) { | ||||
|             x_f16 = bqf_transform(x_f16, &bqf_filters_left[j], &bqf_filters_mem_left[j]); | ||||
|             out[i] = bqf_transform(out[i], &bqf_filters_left[j], &bqf_filters_mem_left[j]); | ||||
|         } | ||||
|         // Convert back to sample
 | ||||
|         out[i] = (int32_t) norm_fix3_28_to_s16sample(x_f16); | ||||
|         out[i] = (int32_t) norm_fix3_28_to_s16sample(out[i]); | ||||
|     } | ||||
| 
 | ||||
|     // Signal to core 1 that we have processed our samples, so it can write to I2S
 | ||||
|     multicore_fifo_push_blocking(CORE0_READY); | ||||
| 
 | ||||
|     update_volume(); | ||||
|     apply_config_changes(); | ||||
| 
 | ||||
|     // keep on truckin'
 | ||||
|     usb_grow_transfer(ep->current_transfer, 1); | ||||
|     usb_packet_done(ep); | ||||
| } | ||||
| 
 | ||||
| void __no_inline_not_in_flash_func(core1_entry)() { | ||||
|     uint8_t *userbuf = (uint8_t *) multicore_fifo_pop_blocking(); | ||||
|     uint32_t *userbuf = (uint32_t *) multicore_fifo_pop_blocking(); | ||||
|     int32_t *out = (int32_t *) userbuf; | ||||
|     int limit_counter = 100; | ||||
| 
 | ||||
|     // Signal that the thread has started
 | ||||
|     multicore_fifo_push_blocking(CORE1_READY); | ||||
| 
 | ||||
|     while (true) { | ||||
|         // Block until the userbuf is filled with data
 | ||||
|         uint32_t ready = multicore_fifo_pop_blocking(); | ||||
|         while (ready != CORE0_READY) | ||||
|             ready = multicore_fifo_pop_blocking(); | ||||
|          | ||||
|         int16_t *in = (int16_t *) multicore_fifo_pop_blocking(); | ||||
|         const uint32_t samples = multicore_fifo_pop_blocking(); | ||||
| 
 | ||||
|         if (preprocessing.reverse_stereo) { | ||||
|             in--; | ||||
|         } | ||||
|         for (int i = 1; i < samples; i += 2) { | ||||
|             // Preamp the sample
 | ||||
|             fix3_28_t x_f16 = fix16_mul(norm_fix3_28_from_s16sample((int16_t) in[i]), preprocessing.preamp); | ||||
| 
 | ||||
|             // Run the filters
 | ||||
|             for (int j = 0; j < filter_stages; j++) { | ||||
|                 x_f16 = bqf_transform(x_f16, &bqf_filters_right[j],  &bqf_filters_mem_right[j]); | ||||
|                 out[i] = bqf_transform(out[i], &bqf_filters_right[j], &bqf_filters_mem_right[j]); | ||||
|             } | ||||
|             // Convert back to sample
 | ||||
|             out[i] = (int32_t) norm_fix3_28_to_s16sample(x_f16); | ||||
|             out[i] = (int32_t) norm_fix3_28_to_s16sample(out[i]); | ||||
|         } | ||||
| 
 | ||||
|         // Update the volume and filter configs if required. We do this from
 | ||||
|         // core1 as core0 is more heavily loaded, doing this from core0 can
 | ||||
|         // lead to audio crackling.
 | ||||
|         // Use of a counter reduces the amount of crackling when changing
 | ||||
|         // volume.
 | ||||
|         if (limit_counter != 0) | ||||
|             limit_counter--; | ||||
|         else { | ||||
|             limit_counter = 100; | ||||
|             update_volume(); | ||||
|             apply_config_changes(); | ||||
|         } | ||||
| 
 | ||||
|         // Signal to core 0 that the data has all been transformed
 | ||||
|         multicore_fifo_push_blocking(CORE1_READY); | ||||
| 
 | ||||
|         i2s_stream_write(&i2s_write_obj, userbuf, samples * 4); | ||||
|         // Wait for Core 0 to finish running its filtering before we apply config updates
 | ||||
|         multicore_fifo_pop_blocking(); | ||||
|         i2s_stream_write(&i2s_write_obj, userbuf, samples); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue
	
	 George Norton
						George Norton