Merge pull request #2363 from jepler/samd-neopixel
samd: neopixel: Fix neopixels after #2297
This commit is contained in:
commit
dda9ea09b1
|
@ -34,24 +34,62 @@
|
|||
#ifdef SAMD51
|
||||
#include "hri/hri_cmcc_d51.h"
|
||||
#include "hri/hri_nvmctrl_d51.h"
|
||||
|
||||
// This magical macro makes sure the delay isn't optimized out and is the
|
||||
// minimal three instructions.
|
||||
#define delay_cycles(cycles) \
|
||||
{ \
|
||||
uint32_t t; \
|
||||
asm volatile ( \
|
||||
"movs %[t], %[c]\n\t" \
|
||||
"loop%=:\n\t" \
|
||||
"subs %[t], #1\n\t" \
|
||||
"bne.n loop%=" : [t] "=r"(t) : [c] "I" (cycles)); \
|
||||
}
|
||||
#endif
|
||||
|
||||
// Ensure this code is compiled with -Os. Any other optimization level may change the timing of it
|
||||
// and break neopixels.
|
||||
#pragma GCC push_options
|
||||
#pragma GCC optimize ("Os")
|
||||
__attribute__((naked,noinline,aligned(16)))
|
||||
static void neopixel_send_buffer_core(volatile uint32_t *clraddr, uint32_t pinMask,
|
||||
const uint8_t *ptr, int numBytes);
|
||||
|
||||
static void neopixel_send_buffer_core(volatile uint32_t *clraddr, uint32_t pinMask,
|
||||
const uint8_t *ptr, int numBytes) {
|
||||
asm volatile(" push {r4, r5, r6, lr};"
|
||||
" add r3, r2, r3;"
|
||||
"loopLoad:"
|
||||
" ldrb r5, [r2, #0];" // r5 := *ptr
|
||||
" add r2, #1;" // ptr++
|
||||
" movs r4, #128;" // r4-mask, 0x80
|
||||
"loopBit:"
|
||||
" str r1, [r0, #4];" // set
|
||||
#ifdef SAMD21
|
||||
" movs r6, #3; d2: sub r6, #1; bne d2;" // delay 3
|
||||
#endif
|
||||
#ifdef SAMD51
|
||||
" movs r6, #3; d2: subs r6, #1; bne d2;" // delay 3
|
||||
#endif
|
||||
" tst r4, r5;" // mask&r5
|
||||
" bne skipclr;"
|
||||
" str r1, [r0, #0];" // clr
|
||||
"skipclr:"
|
||||
#ifdef SAMD21
|
||||
" movs r6, #6; d0: sub r6, #1; bne d0;" // delay 6
|
||||
#endif
|
||||
#ifdef SAMD51
|
||||
" movs r6, #6; d0: subs r6, #1; bne d0;" // delay 6
|
||||
#endif
|
||||
" str r1, [r0, #0];" // clr (possibly again, doesn't matter)
|
||||
#ifdef SAMD21
|
||||
" asr r4, r4, #1;" // mask >>= 1
|
||||
#endif
|
||||
#ifdef SAMD51
|
||||
" asrs r4, r4, #1;" // mask >>= 1
|
||||
#endif
|
||||
" beq nextbyte;"
|
||||
" uxtb r4, r4;"
|
||||
#ifdef SAMD21
|
||||
" movs r6, #2; d1: sub r6, #1; bne d1;" // delay 2
|
||||
#endif
|
||||
#ifdef SAMD51
|
||||
" movs r6, #2; d1: subs r6, #1; bne d1;" // delay 2
|
||||
#endif
|
||||
" b loopBit;"
|
||||
"nextbyte:"
|
||||
" cmp r2, r3;"
|
||||
" bcs neopixel_stop;"
|
||||
" b loopLoad;"
|
||||
"neopixel_stop:"
|
||||
" pop {r4, r5, r6, pc};"
|
||||
"");
|
||||
}
|
||||
|
||||
uint64_t next_start_tick_ms = 0;
|
||||
uint32_t next_start_tick_us = 1000;
|
||||
|
@ -59,7 +97,7 @@ uint32_t next_start_tick_us = 1000;
|
|||
void common_hal_neopixel_write(const digitalio_digitalinout_obj_t* digitalinout, uint8_t *pixels, uint32_t numBytes) {
|
||||
// This is adapted directly from the Adafruit NeoPixel library SAMD21G18A code:
|
||||
// https://github.com/adafruit/Adafruit_NeoPixel/blob/master/Adafruit_NeoPixel.cpp
|
||||
uint8_t *ptr, *end, p, bitMask;
|
||||
// and the asm version from https://github.com/microsoft/uf2-samdx1/blob/master/inc/neopixel.h
|
||||
uint32_t pinMask;
|
||||
PortGroup* port;
|
||||
|
||||
|
@ -71,21 +109,18 @@ void common_hal_neopixel_write(const digitalio_digitalinout_obj_t* digitalinout,
|
|||
mp_hal_disable_all_interrupts();
|
||||
|
||||
|
||||
#ifdef SAMD21
|
||||
// Make sure the NVM cache is consistently timed.
|
||||
NVMCTRL->CTRLB.bit.READMODE = NVMCTRL_CTRLB_READMODE_DETERMINISTIC_Val;
|
||||
#endif
|
||||
|
||||
#ifdef SAMD51
|
||||
// When this routine is positioned at certain addresses, the timing logic
|
||||
// below can be too fast by about 2.5x. This is some kind of (un)fortunate code
|
||||
// positiong with respect to a cache line.
|
||||
// positioning with respect to a cache line.
|
||||
// Theoretically we should turn on off the CMCC caches and the
|
||||
// NVM caches to ensure consistent timing. Testing shows the the NVMCTRL
|
||||
// cache disabling seems to make the difference. But turn both off to make sure.
|
||||
// It's difficult to test because additions to the code before the timing loop
|
||||
// below change instruction placement. Testing was done by adding cache changes
|
||||
// below the loop (so only the first time through is wrong).
|
||||
// below change instruction placement. (though this should be less true now that
|
||||
// the main code is in the cache-aligned function neopixel_send_buffer_core)
|
||||
// Testing was done by adding cache changes below the loop (so only the
|
||||
// first time through is wrong).
|
||||
//
|
||||
// Turn off instruction, data, and NVM caches to force consistent timing.
|
||||
// Invalidate existing cache entries.
|
||||
|
@ -93,78 +128,13 @@ void common_hal_neopixel_write(const digitalio_digitalinout_obj_t* digitalinout,
|
|||
hri_cmcc_write_MAINT0_reg(CMCC, CMCC_MAINT0_INVALL);
|
||||
hri_nvmctrl_set_CTRLA_CACHEDIS0_bit(NVMCTRL);
|
||||
hri_nvmctrl_set_CTRLA_CACHEDIS1_bit(NVMCTRL);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
uint32_t pin = digitalinout->pin->number;
|
||||
port = &PORT->Group[GPIO_PORT(pin)]; // Convert GPIO # to port register
|
||||
pinMask = (1UL << (pin % 32)); // From port_pin_set_output_level ASF code.
|
||||
ptr = pixels;
|
||||
end = ptr + numBytes;
|
||||
p = *ptr++;
|
||||
bitMask = 0x80;
|
||||
|
||||
volatile uint32_t *set = &(port->OUTSET.reg),
|
||||
*clr = &(port->OUTCLR.reg);
|
||||
|
||||
for(;;) {
|
||||
*set = pinMask;
|
||||
// This is the time where the line is always high regardless of the bit.
|
||||
// For the SK6812 its 0.3us +- 0.15us
|
||||
#ifdef SAMD21
|
||||
asm("nop; nop;");
|
||||
#endif
|
||||
#ifdef SAMD51
|
||||
delay_cycles(2);
|
||||
#endif
|
||||
if((p & bitMask) != 0) {
|
||||
// This is the high delay unique to a one bit.
|
||||
// For the SK6812 its 0.3us
|
||||
#ifdef SAMD21
|
||||
asm("nop; nop; nop; nop; nop; nop; nop;");
|
||||
#endif
|
||||
#ifdef SAMD51
|
||||
delay_cycles(3);
|
||||
#endif
|
||||
*clr = pinMask;
|
||||
} else {
|
||||
*clr = pinMask;
|
||||
// This is the low delay unique to a zero bit.
|
||||
// For the SK6812 its 0.3us
|
||||
#ifdef SAMD21
|
||||
asm("nop; nop;");
|
||||
#endif
|
||||
#ifdef SAMD51
|
||||
delay_cycles(2);
|
||||
#endif
|
||||
}
|
||||
if((bitMask >>= 1) != 0) {
|
||||
// This is the delay between bits in a byte and is the 1 code low
|
||||
// level time from the datasheet.
|
||||
// For the SK6812 its 0.6us +- 0.15us
|
||||
#ifdef SAMD21
|
||||
asm("nop; nop; nop; nop; nop;");
|
||||
#endif
|
||||
#ifdef SAMD51
|
||||
delay_cycles(4);
|
||||
#endif
|
||||
} else {
|
||||
if(ptr >= end) break;
|
||||
p = *ptr++;
|
||||
bitMask = 0x80;
|
||||
// This is the delay between bytes. It's similar to the other branch
|
||||
// in the if statement except its tuned to account for the time the
|
||||
// above operations take.
|
||||
// For the SK6812 its 0.6us +- 0.15us
|
||||
#ifdef SAMD51
|
||||
delay_cycles(3);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef SAMD21
|
||||
// Speed up! (But inconsistent timing.)
|
||||
NVMCTRL->CTRLB.bit.READMODE = NVMCTRL_CTRLB_READMODE_NO_MISS_PENALTY_Val;
|
||||
#endif
|
||||
volatile uint32_t *clr = &(port->OUTCLR.reg);
|
||||
neopixel_send_buffer_core(clr, pinMask, pixels, numBytes);
|
||||
|
||||
#ifdef SAMD51
|
||||
// Turn instruction, data, and NVM caches back on.
|
||||
|
@ -189,4 +159,3 @@ void common_hal_neopixel_write(const digitalio_digitalinout_obj_t* digitalinout,
|
|||
|
||||
}
|
||||
|
||||
#pragma GCC pop_options
|
||||
|
|
Loading…
Reference in New Issue