From 43650b689674a18b95d520921d10a91b67f64c84 Mon Sep 17 00:00:00 2001
From: Dan Halbert <halbert@halwitz.org>
Date: Wed, 13 Dec 2017 13:12:49 -0500
Subject: [PATCH] Fix PDMIn MEMS microphone support (#479)

Changes:

    * New faster filter loop, by @ladyada. New filter coefficients as well.
    * Turn on microphone clock when PDMIn object is created, and run it all the time, so the user code doesn't have to wait for microphone startup, which can be 10ms or even 100ms.
    * Wait for microphone startup when PDMIn is first created, based on new optional parameter microphone_startup in seconds (takes a float).
    * record() returns number of samples actually recorded, so you can see if it's not keeping up.
    * Fix buffer overflow errors when buffer size was not a multiple of 16 or something like that.
    * Tweak a few peripheral settings.
    * Minimum sampling frequency is now 16kHZ or so, because 8kHz runs microphone at only 0.5MHz, which is too slow for many mics.

Note: I tried 128x oversampling instead of 64x, but the code cannot keep up at 24kHz or above sampling. 128x would reduce the high-frequency noise by 6db.
---
 atmel-samd/common-hal/audiobusio/PDMIn.c | 153 ++++++++++++++---------
 shared-bindings/audiobusio/PDMIn.c       |  53 +++++---
 2 files changed, 129 insertions(+), 77 deletions(-)

diff --git a/atmel-samd/common-hal/audiobusio/PDMIn.c b/atmel-samd/common-hal/audiobusio/PDMIn.c
index db671f213f..8263ac7d20 100644
--- a/atmel-samd/common-hal/audiobusio/PDMIn.c
+++ b/atmel-samd/common-hal/audiobusio/PDMIn.c
@@ -26,6 +26,7 @@
 
 #include <stdint.h>
 #include <string.h>
+#include <math.h>
 
 #include "py/gc.h"
 #include "py/mperrno.h"
@@ -42,6 +43,12 @@
 #include "shared_dma.h"
 #include "tick.h"
 
+#define OVERSAMPLING 64
+#define SAMPLES_PER_BUFFER 32
+
+// MEMS microphones must be clocked at at least 1MHz.
+#define MIN_MIC_CLOCK 1000000
+
 void pdmin_reset(void) {
     while (I2S->SYNCBUSY.reg & I2S_SYNCBUSY_ENABLE) {}
     I2S->INTENCLR.reg = I2S_INTENCLR_MASK;
@@ -96,8 +103,8 @@ void common_hal_audiobusio_pdmin_construct(audiobusio_pdmin_obj_t* self,
         mp_raise_RuntimeError("Unable to allocate audio DMA block counter.");
     }
 
-    if (!(bit_depth == 16 || bit_depth == 8) || !mono || oversample != 64) {
-        mp_raise_NotImplementedError("Only 8 or 16 bit mono with 64 oversample is supported.");
+    if (!(bit_depth == 16 || bit_depth == 8) || !mono || oversample != OVERSAMPLING) {
+        mp_raise_NotImplementedError("Only 8 or 16 bit mono with " MP_STRINGIFY(OVERSAMPLING) "x oversampling is supported.");
     }
 
     // TODO(tannewt): Use the DPLL to get a more precise sampling rate.
@@ -112,12 +119,17 @@ void common_hal_audiobusio_pdmin_construct(audiobusio_pdmin_obj_t* self,
     config_clock_unit.clock.mck_out_enable = false;
 
     config_clock_unit.clock.sck_src = I2S_SERIAL_CLOCK_SOURCE_MCKDIV;
-    config_clock_unit.clock.sck_div = 8000000 / frequency / oversample;
-    self->frequency = 8000000 / config_clock_unit.clock.sck_div / oversample;
+    uint32_t clock_divisor = (uint32_t) roundf( 8000000.0f / frequency / oversample);
+    config_clock_unit.clock.sck_div = clock_divisor;
+    float mic_clock_freq = 8000000.0f / clock_divisor;
+    self->frequency =  mic_clock_freq / oversample;
+    if (mic_clock_freq <  MIN_MIC_CLOCK || clock_divisor == 0 || clock_divisor > 255) {
+        mp_raise_ValueError("sampling frequency out of range");
+    }
 
     config_clock_unit.frame.number_slots = 2;
     config_clock_unit.frame.slot_size = I2S_SLOT_SIZE_16_BIT;
-    config_clock_unit.frame.data_delay = I2S_DATA_DELAY_1;
+    config_clock_unit.frame.data_delay = I2S_DATA_DELAY_0;
 
     config_clock_unit.frame.frame_sync.width = I2S_FRAME_SYNC_WIDTH_SLOT;
 
@@ -141,6 +153,10 @@ void common_hal_audiobusio_pdmin_construct(audiobusio_pdmin_obj_t* self,
     i2s_serializer_set_config(&self->i2s_instance, self->serializer, &config_serializer);
     i2s_enable(&self->i2s_instance);
 
+    // Run the serializer all the time. This eliminates startup delay for the microphone.
+    i2s_clock_unit_enable(&self->i2s_instance, self->clock_unit);
+    i2s_serializer_enable(&self->i2s_instance, self->serializer);
+
     self->bytes_per_sample = oversample >> 3;
     self->bit_depth = bit_depth;
 }
@@ -154,6 +170,8 @@ void common_hal_audiobusio_pdmin_deinit(audiobusio_pdmin_obj_t* self) {
         return;
     }
     i2s_disable(&self->i2s_instance);
+    i2s_serializer_disable(&self->i2s_instance, self->serializer);
+    i2s_clock_unit_disable(&self->i2s_instance, self->clock_unit);
     i2s_reset(&self->i2s_instance);
     reset_pin(self->clock_pin->pin);
     reset_pin(self->data_pin->pin);
@@ -213,74 +231,82 @@ static void setup_dma(audiobusio_pdmin_obj_t* self, uint32_t length,
 void start_dma(audiobusio_pdmin_obj_t* self) {
     dma_start_transfer_job(&audio_dma);
     tc_start_counter(MP_STATE_VM(audiodma_block_counter));
-    i2s_clock_unit_enable(&self->i2s_instance, self->clock_unit);
-    i2s_serializer_enable(&self->i2s_instance, self->serializer);
     I2S->DATA[1].reg = I2S->DATA[1].reg;
 }
 
 void stop_dma(audiobusio_pdmin_obj_t* self) {
-    // Turn off the I2S clock and serializer. Peripheral is still enabled.
-    i2s_serializer_disable(&self->i2s_instance, self->serializer);
-    i2s_clock_unit_disable(&self->i2s_instance, self->clock_unit);
-
-    // Shutdown the DMA
+    // Shutdown the DMA: serializer keeps running.
     tc_stop_counter(MP_STATE_VM(audiodma_block_counter));
     dma_abort_job(&audio_dma);
 }
 
-static const uint16_t sinc_filter[64] = {
-    0, 1, 6, 16, 29, 49, 75, 108,
-    149, 200, 261, 334, 418, 514, 622, 742,
-    872, 1012, 1161, 1315, 1472, 1631, 1787, 1938,
-    2081, 2212, 2329, 2429, 2509, 2568, 2604, 2616,
-    2604, 2568, 2509, 2429, 2329, 2212, 2081, 1938,
-    1787, 1631, 1472, 1315, 1161, 1012, 872, 742,
-    622, 514, 418, 334, 261, 200, 149, 108,
-    75, 49, 29, 16, 6, 1, 0, 0
+// a windowed sinc filter for 44 khz, 64 samples
+//
+// This filter is good enough to use for lower sample rates as
+// well. It does not increase the noise enough to be a problem.
+//
+// In the long run we could use a fast filter like this to do the
+// decimation and initial filtering in real time, filtering to a
+// higher sample rate than specified.  Then after the audio is
+// recorded, a more expensive filter non-real-time filter could be
+// used to down-sample and low-pass.
+uint16_t sinc_filter [OVERSAMPLING] = {
+    0, 2, 9, 21, 39, 63, 94, 132,
+    179, 236, 302, 379, 467, 565, 674, 792,
+    920, 1055, 1196, 1341, 1487, 1633, 1776, 1913,
+    2042, 2159, 2263, 2352, 2422, 2474, 2506, 2516,
+    2506, 2474, 2422, 2352, 2263, 2159, 2042, 1913,
+    1776, 1633, 1487, 1341, 1196, 1055, 920, 792,
+    674, 565, 467, 379, 302, 236, 179, 132,
+    94, 63, 39, 21, 9, 2, 0, 0
 };
 
+#define REPEAT_16_TIMES(X) X X X X X X X X X X X X X X X X
+
 static uint16_t filter_sample(uint32_t pdm_samples[4]) {
-    uint16_t sample = 0;
-    for (uint8_t i = 0; i < 4; i++) {
-        uint16_t pdm = pdm_samples[i] & 0xffff;
-        for (uint8_t j = 0; j < 16; j++) {
-            if ((pdm & 0x8000) != 0) {
-                sample += sinc_filter[i * 16 + j];
+    uint16_t running_sum = 0;
+    const uint16_t *filter_ptr = sinc_filter;
+    for (uint8_t i = 0; i < OVERSAMPLING/16; i++) {
+        // The sample is 16-bits right channel in the upper two bytes and 16-bits left channel
+        // in the lower two bytes.
+        // We just ignore the upper bits
+        uint32_t pdm_sample = pdm_samples[i];
+        REPEAT_16_TIMES( {
+                if (pdm_sample & 0x8000) {
+                    running_sum += *filter_ptr++;
+                }
+                pdm_sample <<= 1;
             }
-            pdm <<= 1;
-        }
+            )
     }
-    return sample;
+    return running_sum;
 }
 
+// output_buffer may be a byte buffer or a halfword buffer.
+// output_buffer_length is the number of slots, not the number of bytes.
 uint32_t common_hal_audiobusio_pdmin_record_to_buffer(audiobusio_pdmin_obj_t* self,
-        uint16_t* output_buffer, uint32_t length) {
-    // Write the wave file header.
-
-    // We allocate two 256 byte buffers on the stack to use for double buffering.
-    // Our oversample rate is 64 (bits) so each buffer produces 32 samples.
-    // TODO(tannewt): Can the compiler optimize better if we fix the size of
-    // these buffers?
-    uint8_t samples_per_buffer = 32;
+        uint16_t* output_buffer, uint32_t output_buffer_length) {
+    // We allocate two buffers on the stack to use for double buffering.
+    const uint8_t samples_per_buffer = SAMPLES_PER_BUFFER;
     // For every word we record, we throw away 2 bytes of a phantom second channel.
-    uint8_t words_per_sample = self->bytes_per_sample / 2;
-    uint8_t words_per_buffer = samples_per_buffer * words_per_sample;
+    const uint8_t words_per_sample = self->bytes_per_sample / 2;
+    const uint8_t words_per_buffer = samples_per_buffer * words_per_sample;
     uint32_t first_buffer[words_per_buffer];
     uint32_t second_buffer[words_per_buffer];
 
     COMPILER_ALIGNED(16) DmacDescriptor second_descriptor;
 
-    setup_dma(self, length, &second_descriptor, words_per_buffer,
+    setup_dma(self, output_buffer_length, &second_descriptor, words_per_buffer,
        words_per_sample, first_buffer, second_buffer);
 
     start_dma(self);
 
     // Record
     uint32_t buffers_processed = 0;
-    uint32_t total_bytes = 0;
+    uint32_t values_output = 0;
 
-    uint64_t start_ticks = ticks_ms;
-    while (total_bytes < length) {
+    uint32_t remaining_samples_needed = output_buffer_length;
+    while (values_output < output_buffer_length) {
         // Wait for the next buffer to fill
         while (tc_get_count_value(MP_STATE_VM(audiodma_block_counter)) == buffers_processed) {
             #ifdef MICROPY_VM_HOOK_LOOP
@@ -290,40 +316,45 @@ uint32_t common_hal_audiobusio_pdmin_record_to_buffer(audiobusio_pdmin_obj_t* se
         if (tc_get_count_value(MP_STATE_VM(audiodma_block_counter)) != (buffers_processed + 1)) {
             break;
         }
-        // Throw away the first ~10ms of data because thats during mic start up.
-        if (ticks_ms - start_ticks < 10) {
-            buffers_processed++;
-            continue;
-        }
-        uint32_t* buffer = first_buffer;
+
+        // The mic is running all the time, so we don't need to wait the usual 10msec or 100msec
+        // for it to start up.
+
+        uint32_t *buffer = first_buffer;
         DmacDescriptor* descriptor = audio_dma.descriptor;
         if (buffers_processed % 2 == 1) {
             buffer = second_buffer;
             descriptor = &second_descriptor;
         }
         // Decimate and filter the last buffer
-        int32_t samples_gathered = descriptor->BTCNT.reg / words_per_sample;
-        for (uint16_t i = 0; i < samples_gathered; i++) {
+        uint32_t samples_gathered = descriptor->BTCNT.reg / words_per_sample;
+        // Don't run off the end of output buffer. Process only as many as needed.
+        uint32_t samples_to_process = min(remaining_samples_needed, samples_gathered);
+        for (uint32_t i = 0; i < samples_to_process; i++) {
+            // Call filter_sample just one place so it can be inlined.
+            uint16_t value = filter_sample(buffer + i * words_per_sample);
             if (self->bit_depth == 8) {
-                ((uint8_t*) output_buffer)[total_bytes] = filter_sample(buffer + i * words_per_sample) >> 8;
-                total_bytes += 1;
-            } else if (self->bit_depth == 16) {
-                output_buffer[total_bytes / 2] = filter_sample(buffer + i * words_per_sample);
-                total_bytes += 2;
+                // Truncate to 8 bits.
+                ((uint8_t*) output_buffer)[values_output] = value >> 8;
+            } else {
+                output_buffer[values_output] = value;
             }
+            values_output++;
         }
         buffers_processed++;
 
-        if (length - total_bytes < samples_per_buffer) {
-            descriptor->BTCNT.reg = (length - total_bytes) * words_per_sample;
-            descriptor->DSTADDR.reg = ((uint32_t) buffer) + (length - total_bytes) * self->bytes_per_sample;
+        // See if we need to transfer less than a full buffer for the remaining needed samples.
+        remaining_samples_needed = output_buffer_length - values_output;
+        if (remaining_samples_needed > 0 && remaining_samples_needed < samples_per_buffer) {
+            descriptor->BTCNT.reg = remaining_samples_needed;
+            descriptor->DSTADDR.reg = ((uint32_t) buffer) + remaining_samples_needed * words_per_sample;
             descriptor->DESCADDR.reg = 0;
         }
     }
 
     stop_dma(self);
 
-    return total_bytes;
+    return values_output;
 }
 
 void common_hal_audiobusio_pdmin_record_to_file(audiobusio_pdmin_obj_t* self, uint8_t* buffer, uint32_t length) {
diff --git a/shared-bindings/audiobusio/PDMIn.c b/shared-bindings/audiobusio/PDMIn.c
index e13e09d64e..c71f44679a 100644
--- a/shared-bindings/audiobusio/PDMIn.c
+++ b/shared-bindings/audiobusio/PDMIn.c
@@ -28,6 +28,7 @@
 
 #include "lib/utils/context_manager_helpers.h"
 #include "py/binary.h"
+#include "py/mphal.h"
 #include "py/objproperty.h"
 #include "py/runtime.h"
 #include "shared-bindings/microcontroller/Pin.h"
@@ -41,7 +42,7 @@
 //|
 //| PDMIn can be used to record an input audio signal on a given set of pins.
 //|
-//| .. class:: PDMIn(clock_pin, data_pin, \*, frequency=8000, bit_depth=8, mono=True, oversample=64)
+//| .. class:: PDMIn(clock_pin, data_pin, \*, frequency=16000, bit_depth=8, mono=True, oversample=64, startup_delay=0.11)
 //|
 //|   Create a PDMIn object associated with the given pins. This allows you to
 //|   record audio signals from the given pins. Individual ports may put further
@@ -49,11 +50,16 @@
 //|
 //|   :param ~microcontroller.Pin clock_pin: The pin to output the clock to
 //|   :param ~microcontroller.Pin data_pin: The pin to read the data from
-//|   :param int frequency: Target frequency of the resulting samples. Check `frequency` for real value.
+//|   :param int frequency: Target frequency of the resulting samples. Check `frequency` for actual value.
+//|   Minimum frequency is about 16000 Hz.
 //|   :param int bit_depth: Final number of bits per sample. Must be divisible by 8
 //|   :param bool mono: True when capturing a single channel of audio, captures two channels otherwise
 //|   :param int oversample: Number of single bit samples to decimate into a final sample. Must be divisible by 8
+//|   :param float startup_delay: seconds to wait after starting microphone clock
+//|    to allow microphone to turn on. Most require only 0.01s; some require 0.1s. Longer is safer.
+//|    Must be in range 0.0-1.0 seconds.
 //|
+
 //|   Record 8-bit unsigned samples to buffer::
 //|
 //|     import audiobusio
@@ -79,15 +85,19 @@
 //|         mic.record(b, len(b))
 //|
 STATIC mp_obj_t audiobusio_pdmin_make_new(const mp_obj_type_t *type, size_t n_args, size_t n_kw, const mp_obj_t *pos_args) {
-    enum { ARG_frequency, ARG_bit_depth, ARG_mono, ARG_oversample };
+    enum { ARG_frequency, ARG_bit_depth, ARG_mono, ARG_oversample, ARG_startup_delay };
     mp_map_t kw_args;
     mp_map_init_fixed_table(&kw_args, n_kw, pos_args + n_args);
     static const mp_arg_t allowed_args[] = {
-        { MP_QSTR_frequency,  MP_ARG_INT, {.u_int = 8000} },
-        { MP_QSTR_bit_depth,  MP_ARG_INT, {.u_int = 8} },
-        { MP_QSTR_mono,       MP_ARG_BOOL,{.u_bool = true} },
-        { MP_QSTR_oversample, MP_ARG_INT, {.u_int = 64} },
+        { MP_QSTR_frequency,     MP_ARG_KW_ONLY | MP_ARG_INT, {.u_int = 16000} },
+        { MP_QSTR_bit_depth,     MP_ARG_KW_ONLY | MP_ARG_INT, {.u_int = 8} },
+        { MP_QSTR_mono,          MP_ARG_KW_ONLY | MP_ARG_BOOL,{.u_bool = true} },
+        { MP_QSTR_oversample,    MP_ARG_KW_ONLY | MP_ARG_INT, {.u_int = 64} },
+        { MP_QSTR_startup_delay, MP_ARG_KW_ONLY | MP_ARG_OBJ, {.u_obj = MP_OBJ_NULL} },
     };
+    // Default microphone startup delay is 110msecs. Have seen mics that need 100 msecs plus a bit.
+    static const float STARTUP_DELAY_DEFAULT = 0.110F;
+
     mp_arg_val_t args[MP_ARRAY_SIZE(allowed_args)];
     mp_arg_parse_all(n_args - 2, pos_args + 2, &kw_args, MP_ARRAY_SIZE(allowed_args), allowed_args, args);
 
@@ -116,8 +126,18 @@ STATIC mp_obj_t audiobusio_pdmin_make_new(const mp_obj_type_t *type, size_t n_ar
     }
     bool mono = args[ARG_mono].u_bool;
 
+    float startup_delay = (args[ARG_startup_delay].u_obj == MP_OBJ_NULL)
+        ? STARTUP_DELAY_DEFAULT
+        : mp_obj_get_float(args[ARG_startup_delay].u_obj);
+    if (startup_delay < 0.0 || startup_delay > 1.0) {
+        mp_raise_ValueError("Microphone startup delay must be in range 0.0 to 1.0");
+    }
+
     common_hal_audiobusio_pdmin_construct(self, clock_pin, data_pin, frequency,
-        bit_depth, mono, oversample);
+                                          bit_depth, mono, oversample);
+
+    // Wait for the microphone to start up. Some start in 10 msecs; some take as much as 100 msecs.
+    mp_hal_delay_ms(startup_delay * 1000);
 
     return MP_OBJ_FROM_PTR(self);
 }
@@ -160,11 +180,14 @@ STATIC MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(audiobusio_pdmin___exit___obj, 4, 4,
 //|     audio at the given rate. For internal flash, writing all 1s to the file
 //|     before recording is recommended to speed up writes.
 //|
+//|    :return: The number of samples recorded. If this is less than `destination_length`,
+//|    some samples were missed due to processing time.
+//|
 STATIC mp_obj_t audiobusio_pdmin_obj_record(mp_obj_t self_obj, mp_obj_t destination, mp_obj_t destination_length) {
     audiobusio_pdmin_obj_t *self = MP_OBJ_TO_PTR(self_obj);
     raise_error_if_deinited(common_hal_audiobusio_pdmin_deinited(self));
-    if (!MP_OBJ_IS_SMALL_INT(destination_length)) {
-        mp_raise_TypeError("destination_length must be int");
+    if (!MP_OBJ_IS_SMALL_INT(destination_length) || MP_OBJ_SMALL_INT_VALUE(destination_length) < 0) {
+        mp_raise_TypeError("destination_length must be an int >= 0");
     }
     uint32_t length = MP_OBJ_SMALL_INT_VALUE(destination_length);
 
@@ -172,8 +195,8 @@ STATIC mp_obj_t audiobusio_pdmin_obj_record(mp_obj_t self_obj, mp_obj_t destinat
     if (MP_OBJ_IS_TYPE(destination, &fatfs_type_fileio)) {
         mp_raise_NotImplementedError("");
     } else if (mp_get_buffer(destination, &bufinfo, MP_BUFFER_WRITE)) {
-        if (bufinfo.len < length) {
-            mp_raise_ValueError("Target buffer cannot hold destination_length bytes.");
+        if (bufinfo.len / mp_binary_get_size('@', bufinfo.typecode, NULL) < length) {
+            mp_raise_ValueError("Destination capacity is smaller than destination_length.");
         }
         uint8_t bit_depth = common_hal_audiobusio_pdmin_get_bit_depth(self);
         if (bufinfo.typecode != 'H' && bit_depth == 16) {
@@ -181,12 +204,10 @@ STATIC mp_obj_t audiobusio_pdmin_obj_record(mp_obj_t self_obj, mp_obj_t destinat
         } else if (bufinfo.typecode != 'B' && bufinfo.typecode != BYTEARRAY_TYPECODE && bit_depth == 8) {
             mp_raise_ValueError("destination buffer must be a bytearray or array of type 'B' for bit_depth = 8");
         }
-        length *= bit_depth / 8;
+        // length is the buffer length in slots, not bytes.
         uint32_t length_written =
             common_hal_audiobusio_pdmin_record_to_buffer(self, bufinfo.buf, length);
-        if (length_written != length) {
-            mp_printf(&mp_plat_print, "length mismatch %d %d\n", length_written, length);
-        }
+        return MP_OBJ_NEW_SMALL_INT(length_written);
     }
     return mp_const_none;
 }