_pixelbuf: Increase performance of brightness-scaling

On the Pico, this increases the "fill rate" of
    pixels[:] = newvalues
considerably.  On a strip of 240 RGB LEDs, auto_write=False, the timings
are:

|| Brightness || Before || After || Improvement ||
|| 1.0        || 117 kpix/s || 307 kpix/s || 2.62x ||
|| 0.07       || 117 kpix/s || 273 kpix/s || 2.33x ||

It's worth noting that even the "before" rate is fast compared to the
time to transmit a single neopixel, but any time we can gain back
in the whole pipeline will let marginal animations work a little better.
To set all the pixels in this way and then show() gives a pleasant bump
to the framerate, from about 108Hz to 124Hz (1.15x)

The main source of speed-up is using integer math instead of floating
point math for the calculation of the post-scaled pixel values.  A slight
secondary gain is achieved by avoiding the scaling altogether when
the scale factor is 1.0.

Because the math is not exactly the same, some scaled pixel values may
change by +- 1 RGBW "step".  In practice, this is unlikely to matter.

The gains are bigger on the Pico and other M0 microcontrollers than M4
microcontrollers with floating point math in the hardware.

Happily, flash size is also improved a bit on the Pico build I did,
going from
> 542552 bytes used, 506024 bytes free in flash firmware space out of 1048576 bytes (1024.0kB).

to
> 542376 bytes used, 506200 bytes free in flash firmware space out of 1048576 bytes (1024.0kB).
This commit is contained in:
Jeff Epler 2021-02-24 09:29:59 -06:00
parent e41137c745
commit b7f5c277ad
2 changed files with 68 additions and 34 deletions

View File

@ -31,6 +31,7 @@
#include "py/runtime.h"
#include "shared-bindings/_pixelbuf/PixelBuf.h"
#include <string.h>
#include <math.h>
// Helper to ensure we have the native super class instead of a subclass.
static pixelbuf_pixelbuf_obj_t* native_pixelbuf(mp_obj_t pixelbuf_obj) {
@ -69,6 +70,7 @@ void common_hal__pixelbuf_pixelbuf_construct(pixelbuf_pixelbuf_obj_t *self, size
}
// Call set_brightness so that it can allocate a second buffer if needed.
self->brightness = 1.0;
self->scaled_brightness = 0x100;
common_hal__pixelbuf_pixelbuf_set_brightness(MP_OBJ_FROM_PTR(self), brightness);
// Turn on auto_write. We don't want to do it with the above brightness call.
@ -109,26 +111,31 @@ void common_hal__pixelbuf_pixelbuf_set_brightness(mp_obj_t self_in, mp_float_t b
pixelbuf_pixelbuf_obj_t* self = native_pixelbuf(self_in);
// Skip out if the brightness is already set. The default of self->brightness is 1.0. So, this
// also prevents the pre_brightness_buffer allocation when brightness is set to 1.0 again.
mp_float_t change = brightness - self->brightness;
if (-0.001 < change && change < 0.001) {
self->brightness = brightness;
uint16_t new_scaled_brightness = (int)roundf(brightness * 256);
if (new_scaled_brightness == self->scaled_brightness) {
return;
}
self->brightness = brightness;
self->scaled_brightness = new_scaled_brightness;
size_t pixel_len = self->pixel_count * self->bytes_per_pixel;
if (self->pre_brightness_buffer == NULL) {
self->pre_brightness_buffer = m_malloc(pixel_len, false);
memcpy(self->pre_brightness_buffer, self->post_brightness_buffer, pixel_len);
}
for (size_t i = 0; i < pixel_len; i++) {
// Don't adjust per-pixel luminance bytes in dotstar mode
if (self->byteorder.is_dotstar && i % 4 == 0) {
continue;
if (self->scaled_brightness == 0x100 && !self->pre_brightness_buffer) {
return;
} else {
if (self->pre_brightness_buffer == NULL) {
self->pre_brightness_buffer = m_malloc(pixel_len, false);
memcpy(self->pre_brightness_buffer, self->post_brightness_buffer, pixel_len);
}
for (size_t i = 0; i < pixel_len; i++) {
// Don't adjust per-pixel luminance bytes in dotstar mode
if (self->byteorder.is_dotstar && i % 4 == 0) {
continue;
}
self->post_brightness_buffer[i] = (self->pre_brightness_buffer[i] * self->scaled_brightness) >> 8;
}
self->post_brightness_buffer[i] = self->pre_brightness_buffer[i] * self->brightness;
}
if (self->auto_write) {
common_hal__pixelbuf_pixelbuf_show(self_in);
if (self->auto_write) {
common_hal__pixelbuf_pixelbuf_show(self_in);
}
}
}
@ -197,28 +204,34 @@ void _pixelbuf_set_pixel_color(pixelbuf_pixelbuf_obj_t* self, size_t index, uint
}
pixelbuf_rgbw_t *rgbw_order = &self->byteorder.byteorder;
size_t offset = index * self->bytes_per_pixel;
if (self->pre_brightness_buffer != NULL) {
uint8_t* pre_brightness_buffer = self->pre_brightness_buffer + offset;
if (self->bytes_per_pixel == 4) {
pre_brightness_buffer[rgbw_order->w] = w;
}
pre_brightness_buffer[rgbw_order->r] = r;
pre_brightness_buffer[rgbw_order->g] = g;
pre_brightness_buffer[rgbw_order->b] = b;
uint8_t *scaled_buffer, *unscaled_buffer;
if (self->pre_brightness_buffer) {
scaled_buffer = self->post_brightness_buffer + offset;
unscaled_buffer = self->pre_brightness_buffer + offset;
} else {
scaled_buffer = NULL;
unscaled_buffer = self->post_brightness_buffer + offset;
}
uint8_t* post_brightness_buffer = self->post_brightness_buffer + offset;
if (self->bytes_per_pixel == 4) {
// Only apply brightness if w is actually white (aka not DotStar.)
if (!self->byteorder.is_dotstar) {
w *= self->brightness;
}
post_brightness_buffer[rgbw_order->w] = w;
unscaled_buffer[rgbw_order->w] = w;
}
unscaled_buffer[rgbw_order->r] = r;
unscaled_buffer[rgbw_order->g] = g;
unscaled_buffer[rgbw_order->b] = b;
if (scaled_buffer) {
if (self->bytes_per_pixel == 4) {
if (!self->byteorder.is_dotstar) {
w = (w * self->scaled_brightness) >> 8;
}
scaled_buffer[rgbw_order->w] = w;
}
scaled_buffer[rgbw_order->r] = (r * self->scaled_brightness) >> 8;
scaled_buffer[rgbw_order->g] = (g * self->scaled_brightness) >> 8;
scaled_buffer[rgbw_order->b] = (b * self->scaled_brightness) >> 8;
}
post_brightness_buffer[rgbw_order->r] = r * self->brightness;
post_brightness_buffer[rgbw_order->g] = g * self->brightness;
post_brightness_buffer[rgbw_order->b] = b * self->brightness;
}
void _pixelbuf_set_pixel(pixelbuf_pixelbuf_obj_t* self, size_t index, mp_obj_t value) {
@ -318,3 +331,23 @@ void common_hal__pixelbuf_pixelbuf_fill(mp_obj_t self_in, mp_obj_t fill_color) {
common_hal__pixelbuf_pixelbuf_show(self_in);
}
}
mp_int_t common_hal__pixelbuf_get_buffer(mp_obj_t self_in, mp_buffer_info_t *bufinfo, mp_uint_t flags) {
pixelbuf_pixelbuf_obj_t *self = native_pixelbuf(self_in);
bufinfo->buf = self->pre_brightness_buffer;
if (self->pre_brightness_buffer) {
// If we have a brightness setting, we must treat the buffer as
// read-only (because we have no way to "fire" the
// brightness-converting code as a side effect of mutation via the
// buffer)
if ((flags & MP_BUFFER_WRITE)) {
return 1;
}
bufinfo->buf = self->pre_brightness_buffer;
} else {
bufinfo->buf = self->post_brightness_buffer;
}
bufinfo->typecode = 'B';
bufinfo->len = self->bytes_per_pixel * common_hal__pixelbuf_pixelbuf_get_len(self_in);
return 0;
}

View File

@ -49,7 +49,8 @@ typedef struct {
typedef struct {
mp_obj_base_t base;
size_t pixel_count;
size_t bytes_per_pixel;
uint16_t bytes_per_pixel;
uint16_t scaled_brightness;
pixelbuf_byteorder_details_t byteorder;
mp_float_t brightness;
mp_obj_t transmit_buffer_obj;