diff --git a/shared-module/audiomixer/Mixer.c b/shared-module/audiomixer/Mixer.c index afa3a06323..4a72dab28d 100644 --- a/shared-module/audiomixer/Mixer.c +++ b/shared-module/audiomixer/Mixer.c @@ -101,198 +101,141 @@ void audiomixer_mixer_reset_buffer(audiomixer_mixer_obj_t* self, } } -uint32_t add8signed(uint32_t a, uint32_t b) { - #if (defined (__ARM_ARCH_7EM__) && (__ARM_ARCH_7EM__ == 1)) //Cortex-M4 w/FPU - return __SHADD8(a, b); - #else - uint32_t result = 0; - for (int8_t i = 0; i < 4; i++) { - int8_t ai = a >> (sizeof(int8_t) * 8 * i); - int8_t bi = b >> (sizeof(int8_t) * 8 * i); - int32_t intermediate = (int32_t) ai + bi / 2; - if (intermediate > CHAR_MAX) { - intermediate = CHAR_MAX; - } else if (intermediate < CHAR_MIN) { - intermediate = CHAR_MIN; - } - result |= ((uint32_t) intermediate & 0xff) << (sizeof(int8_t) * 8 * i); - } - return result; - #endif -} - -uint32_t add8unsigned(uint32_t a, uint32_t b) { - #if (defined (__ARM_ARCH_7EM__) && (__ARM_ARCH_7EM__ == 1)) //Cortex-M4 w/FPU - return __UHADD8(a, b); - #else - uint32_t result = 0; - for (int8_t i = 0; i < 4; i++) { - uint8_t ai = (a >> (sizeof(uint8_t) * 8 * i)); - uint8_t bi = (b >> (sizeof(uint8_t) * 8 * i)); - int32_t intermediate = (int32_t) (ai + bi) / 2; - if (intermediate > UCHAR_MAX) { - intermediate = UCHAR_MAX; - } - result |= ((uint32_t) intermediate & 0xff) << (sizeof(uint8_t) * 8 * i); - } - return result; - #endif -} - -uint32_t add16signed(uint32_t a, uint32_t b) { - #if (defined (__ARM_ARCH_7EM__) && (__ARM_ARCH_7EM__ == 1)) //Cortex-M4 w/FPU - return __SHADD16(a, b); - #else - uint32_t result = 0; - for (int8_t i = 0; i < 2; i++) { - int16_t ai = a >> (sizeof(int16_t) * 8 * i); - int16_t bi = b >> (sizeof(int16_t) * 8 * i); - int32_t intermediate = (int32_t) ai + bi / 2; - if (intermediate > SHRT_MAX) { - intermediate = SHRT_MAX; - } else if (intermediate < SHRT_MIN) { - intermediate = SHRT_MIN; - } - result |= (((uint32_t) intermediate) & 0xffff) << (sizeof(int16_t) * 8 * i); - } - return result; - #endif -} - -uint32_t add16unsigned(uint32_t a, uint32_t b) { - #if (defined (__ARM_ARCH_7EM__) && (__ARM_ARCH_7EM__ == 1)) //Cortex-M4 w/FPU - return __UHADD16(a, b); - #else - uint32_t result = 0; - for (int8_t i = 0; i < 2; i++) { - int16_t ai = (a >> (sizeof(uint16_t) * 8 * i)) - 0x8000; - int16_t bi = (b >> (sizeof(uint16_t) * 8 * i)) - 0x8000; - int32_t intermediate = (int32_t) ai + bi / 2; - if (intermediate > USHRT_MAX) { - intermediate = USHRT_MAX; - } - result |= ((uint32_t) intermediate & 0xffff) << (sizeof(int16_t) * 8 * i); - } - return result; - #endif -} - -static inline uint32_t mult8unsigned(uint32_t val, int32_t mul) { - // if mul == 0, no need in wasting cycles - if (mul == 0) { - return 0; - } - /* TODO: workout ARMv7 instructions - #if (defined (__ARM_ARCH_7EM__) && (__ARM_ARCH_7EM__ == 1)) //Cortex-M4 w/FPU - return val; - #else*/ - uint32_t result = 0; - float mod_mul = (float) mul / (float) ((1<<15)-1); - for (int8_t i = 0; i < 4; i++) { - uint8_t ai = val >> (sizeof(uint8_t) * 8 * i); - int32_t intermediate = ai * mod_mul; - if (intermediate > SHRT_MAX) { - intermediate = SHRT_MAX; - } - result |= ((uint32_t) intermediate & 0xff) << (sizeof(uint8_t) * 8 * i); - } - - return result; - //#endif -} - -static inline uint32_t mult8signed(uint32_t val, int32_t mul) { - // if mul == 0, no need in wasting cycles - if (mul == 0) { - return 0; - } - /* TODO: workout ARMv7 instructions - #if (defined (__ARM_ARCH_7EM__) && (__ARM_ARCH_7EM__ == 1)) //Cortex-M4 w/FPU - return val; - #else - */ - uint32_t result = 0; - float mod_mul = (float)mul / (float)((1<<15)-1); - for (int8_t i = 0; i < 4; i++) { - int16_t ai = val >> (sizeof(int8_t) * 8 * i); - int32_t intermediate = ai * mod_mul; - if (intermediate > CHAR_MAX) { - intermediate = CHAR_MAX; - } else if (intermediate < CHAR_MIN) { - intermediate = CHAR_MIN; - } - result |= (((uint32_t) intermediate) & 0xff) << (sizeof(int16_t) * 8 * i); - } - return result; - //#endif -} - -//TODO: -static inline uint32_t mult16unsigned(uint32_t val, int32_t mul) { - // if mul == 0, no need in wasting cycles - if (mul == 0) { - return 0; - } - /* TODO: the below ARMv7m instructions "work", but the amplitude is much higher/louder - #if (defined (__ARM_ARCH_7EM__) && (__ARM_ARCH_7EM__ == 1)) //Cortex-M4 w/FPU - // there is no unsigned equivalent to the 'SMULWx' ARMv7 Thumb function, - // so we have to do it by hand. - uint32_t lo = val & 0xffff; - uint32_t hi = val >> 16; - //mp_printf(&mp_plat_print, "pre-asm: (mul: %d)\n\tval: %x\tlo: %x\thi: %x\n", mul, val, lo, hi); - uint32_t val_lo; - asm volatile("mul %0, %1, %2" : "=r" (val_lo) : "r" (mul), "r" (lo)); - asm volatile("mla %0, %1, %2, %3" : "=r" (val) : "r" (mul), "r" (hi), "r" (val_lo)); - //mp_printf(&mp_plat_print, "post-asm:\n\tval: %x\tlo: %x\n\n", val, val_lo); - return val; - #else - */ - uint32_t result = 0; - float mod_mul = (float)mul / (float)((1<<15)-1); - for (int8_t i = 0; i < 2; i++) { - int16_t ai = (val >> (sizeof(uint16_t) * 8 * i)) - 0x8000; - int32_t intermediate = ai * mod_mul; - if (intermediate > SHRT_MAX) { - intermediate = SHRT_MAX; - } else if (intermediate < SHRT_MIN) { - intermediate = SHRT_MIN; - } - result |= (((uint32_t) intermediate) + 0x8000) << (sizeof(int16_t) * 8 * i); - } - return result; - //#endif +__attribute__((always_inline)) +static inline uint32_t add16signed(uint32_t a, uint32_t b) { + return __QADD16(a, b); } +__attribute__((always_inline)) static inline uint32_t mult16signed(uint32_t val, int32_t mul) { - // if mul == 0, no need in wasting cycles - if (mul == 0) { - return 0; - } - #if (defined (__ARM_ARCH_7EM__) && (__ARM_ARCH_7EM__ == 1)) //Cortex-M4 w/FPU + mul <<= 16; int32_t hi, lo; enum { bits = 16 }; // saturate to 16 bits - enum { shift = 0 }; // shift is done automatically + enum { shift = 15 }; // shift is done automatically asm volatile("smulwb %0, %1, %2" : "=r" (lo) : "r" (mul), "r" (val)); asm volatile("smulwt %0, %1, %2" : "=r" (hi) : "r" (mul), "r" (val)); asm volatile("ssat %0, %1, %2, asr %3" : "=r" (lo) : "I" (bits), "r" (lo), "I" (shift)); asm volatile("ssat %0, %1, %2, asr %3" : "=r" (hi) : "I" (bits), "r" (hi), "I" (shift)); asm volatile("pkhbt %0, %1, %2, lsl #16" : "=r" (val) : "r" (lo), "r" (hi)); // pack return val; - #else - uint32_t result = 0; - float mod_mul = (float)mul / (float)((1<<15)-1); - for (int8_t i = 0; i < 2; i++) { - int16_t ai = val >> (sizeof(int16_t) * 8 * i); - int32_t intermediate = ai * mod_mul; - if (intermediate > SHRT_MAX) { - intermediate = SHRT_MAX; - } else if (intermediate < SHRT_MIN) { - intermediate = SHRT_MIN; +} + +static inline uint32_t tounsigned8(uint32_t val) { + return __UADD8(val, 0x80808080); +} + +static inline uint32_t tounsigned16(uint32_t val) { + return __UADD16(val, 0x80008000); +} + +static inline uint32_t tosigned16(uint32_t val) { + return __UADD16(val, 0x80008000); +} + +static inline uint32_t unpack8(uint16_t val) { + return ((val & 0xff00) << 16) | ((val & 0x00ff) << 8); +} + +static inline uint32_t pack8(uint32_t val) { + return ((val & 0xff000000) >> 16) | ((val & 0xff00) >> 8); +} + +static void mix_down_one_voice(audiomixer_mixer_obj_t* self, + audiomixer_mixervoice_obj_t* voice, bool voices_active, + uint32_t* word_buffer, uint32_t length) { + bool voice_done = voice->sample == NULL; + while (!voice_done && length != 0) { + if (voice->buffer_length == 0) { + if (!voice->more_data) { + if (voice->loop) { + audiosample_reset_buffer(voice->sample, false, 0); + } else { + voice->sample = NULL; + voice_done = true; + break; + } + } + if (!voice_done) { + // Load another buffer + audioio_get_buffer_result_t result = audiosample_get_buffer(voice->sample, false, 0, (uint8_t**) &voice->remaining_buffer, &voice->buffer_length); + // Track length in terms of words. + voice->buffer_length /= sizeof(uint32_t); + voice->more_data = result == GET_BUFFER_MORE_DATA; + } + } + + uint32_t n = MIN(voice->buffer_length, length); + uint32_t *src = voice->remaining_buffer; + uint16_t level = voice->level; + + // First active voice gets copied over verbatim. + if (!voices_active) { + if (MP_LIKELY(self->bits_per_sample == 16)) { + if (MP_LIKELY(self->samples_signed)) { + for (uint32_t i = 0; isamples_signed)) { + word = tosigned16(word); + } + word = mult16signed(word, level); + hword_buffer[i] = pack8(word); + } + } + } else { + if (MP_LIKELY(self->bits_per_sample == 16)) { + if (MP_LIKELY(self->samples_signed)) { + for (uint32_t i = 0; isamples_signed)) { + word = tosigned16(word); + } + word = mult16signed(word, level); + word = add16signed(word, unpack8(hword_buffer[i])); + hword_buffer[i] = pack8(word); + } + } + } + length -= n; + word_buffer += n; + voice->remaining_buffer += n; + voice->buffer_length -= n; + } + + if (length && !voices_active) { + uint32_t sample_value = self->bits_per_sample == 8 + ? 0x80808080 : 0x80008000; + for (uint32_t i = 0; iuse_first_buffer = !self->use_first_buffer; bool voices_active = false; + uint32_t length = self->len / sizeof(uint32_t); + for (int32_t v = 0; v < self->voice_count; v++) { audiomixer_mixervoice_obj_t* voice = MP_OBJ_TO_PTR(self->voice[v]); - uint32_t j = 0; - bool voice_done = voice->sample == NULL; - for (uint32_t i = 0; i < self->len / sizeof(uint32_t); i++) { - if (!voice_done && j >= voice->buffer_length) { - if (!voice->more_data) { - if (voice->loop) { - audiosample_reset_buffer(voice->sample, false, 0); - } else { - voice->sample = NULL; - voice_done = true; - } - } - if (!voice_done) { - // Load another buffer - audioio_get_buffer_result_t result = audiosample_get_buffer(voice->sample, false, 0, (uint8_t**) &voice->remaining_buffer, &voice->buffer_length); - // Track length in terms of words. - voice->buffer_length /= sizeof(uint32_t); - voice->more_data = result == GET_BUFFER_MORE_DATA; - j = 0; - } - } - // First active voice gets copied over verbatim. - uint32_t sample_value; - if (voice_done) { - // Exit early if another voice already set all samples once. - if (voices_active) { - continue; - } - sample_value = 0; - if (!self->samples_signed) { - if (self->bits_per_sample == 8) { - sample_value = 0x7f7f7f7f; - } else { - sample_value = 0x7fff7fff; - } - } - } else { - sample_value = voice->remaining_buffer[j]; - } - - // apply the mixer level - if (!self->samples_signed) { - if (self->bits_per_sample == 8) { - sample_value = mult8unsigned(sample_value, voice->level); - } else { - sample_value = mult16unsigned(sample_value, voice->level); - } - } else { - if (self->bits_per_sample == 8) { - sample_value = mult8signed(sample_value, voice->level); - } else { - sample_value = mult16signed(sample_value, voice->level); - } - } - - if (!voices_active) { - word_buffer[i] = sample_value; - } else { - if (self->bits_per_sample == 8) { - if (self->samples_signed) { - word_buffer[i] = add8signed(word_buffer[i], sample_value); - } else { - word_buffer[i] = add8unsigned(word_buffer[i], sample_value); - } - } else { - if (self->samples_signed) { - word_buffer[i] = add16signed(word_buffer[i], sample_value); - } else { - word_buffer[i] = add16unsigned(word_buffer[i], sample_value); - } - } - } - j++; - } - voice->buffer_length -= j; - voice->remaining_buffer += j; - + mix_down_one_voice(self, voice, voices_active, word_buffer, length); voices_active = true; } + if (!self->samples_signed) { + if (self->bits_per_sample == 16) { + for (uint32_t i = 0; i < length; i++) { + word_buffer[i] = tounsigned16(word_buffer[i]); + } + } else { + for (uint32_t i = 0; i < length; i++) { + word_buffer[i] = tounsigned8(word_buffer[i]); + } + } + } + self->read_count += 1; } else if (!self->use_first_buffer) { *buffer = (uint8_t*) self->first_buffer; diff --git a/shared-module/audiomixer/MixerVoice.c b/shared-module/audiomixer/MixerVoice.c index ff05dc93e4..9be104afcf 100644 --- a/shared-module/audiomixer/MixerVoice.c +++ b/shared-module/audiomixer/MixerVoice.c @@ -34,7 +34,7 @@ void common_hal_audiomixer_mixervoice_construct(audiomixer_mixervoice_obj_t *self) { self->sample = NULL; - self->level = ((1 << 15) - 1); + self->level = 1 << 15; } void common_hal_audiomixer_mixervoice_set_parent(audiomixer_mixervoice_obj_t* self, audiomixer_mixer_obj_t *parent) { @@ -42,11 +42,11 @@ void common_hal_audiomixer_mixervoice_set_parent(audiomixer_mixervoice_obj_t* se } float common_hal_audiomixer_mixervoice_get_level(audiomixer_mixervoice_obj_t* self) { - return ((float) self->level / ((1 << 15) - 1)); + return ((float) self->level / (1 << 15)); } void common_hal_audiomixer_mixervoice_set_level(audiomixer_mixervoice_obj_t* self, float level) { - self->level = level * ((1 << 15)-1); + self->level = level * (1 << 15); } void common_hal_audiomixer_mixervoice_play(audiomixer_mixervoice_obj_t* self, mp_obj_t sample, bool loop) { diff --git a/shared-module/audiomixer/MixerVoice.h b/shared-module/audiomixer/MixerVoice.h index efac191565..a85316e3d0 100644 --- a/shared-module/audiomixer/MixerVoice.h +++ b/shared-module/audiomixer/MixerVoice.h @@ -39,7 +39,7 @@ typedef struct { bool more_data; uint32_t* remaining_buffer; uint32_t buffer_length; - int16_t level; + uint16_t level; } audiomixer_mixervoice_obj_t;