bit_transpose: Support from 2 to 7 strands, not just 8

This commit is contained in:
Jeff Epler 2021-02-18 11:33:13 -06:00
parent 9cf7d73c6c
commit c284728621
4 changed files with 99 additions and 17 deletions

View File

@ -1093,7 +1093,8 @@ msgid "Initialization failed due to lack of memory"
msgstr ""
#: shared-bindings/_bit_transpose/__init__.c
msgid "Input buffer must be a multiple of 8 bytes"
#, c-format
msgid "Input buffer length (%d) must be a multiple of the strand count (%d)"
msgstr ""
#: ports/atmel-samd/common-hal/pulseio/PulseIn.c
@ -1664,7 +1665,8 @@ msgid "Out of sockets"
msgstr ""
#: shared-bindings/_bit_transpose/__init__.c
msgid "Output buffer must be at least as big as input buffer"
#, c-format
msgid "Output buffer must be at least %d bytes"
msgstr ""
#: shared-bindings/audiobusio/PDMIn.c
@ -3473,6 +3475,10 @@ msgstr ""
msgid "not enough arguments for format string"
msgstr ""
#: shared-bindings/_bit_transpose/__init__.c
msgid "num_strands must be from 2 to 8 (inclusive)"
msgstr ""
#: extmod/ulab/code/ulab_create.c
msgid "number of points must be at least 2"
msgstr ""

View File

@ -43,9 +43,10 @@
//| ...
//|
STATIC mp_obj_t bit_transpose(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args) {
enum { ARG_input, ARG_output };
enum { ARG_input, ARG_num_strands, ARG_output };
static const mp_arg_t allowed_args[] = {
{ MP_QSTR_input, MP_ARG_OBJ | MP_ARG_REQUIRED, {} },
{ MP_QSTR_num_strands, MP_ARG_INT | MP_ARG_KW_ONLY, { .u_int = 8 } },
{ MP_QSTR_output, MP_ARG_OBJ | MP_ARG_KW_ONLY, { .u_obj = mp_const_none } },
};
mp_arg_val_t args[MP_ARRAY_SIZE(allowed_args)];
@ -55,21 +56,28 @@ STATIC mp_obj_t bit_transpose(size_t n_args, const mp_obj_t *pos_args, mp_map_t
mp_buffer_info_t output_bufinfo;
mp_get_buffer_raise(args[ARG_input].u_obj, &input_bufinfo, MP_BUFFER_READ);
int n = input_bufinfo.len;
if (n % 8 != 0) {
mp_raise_ValueError(translate("Input buffer must be a multiple of 8 bytes"));
int num_strands = args[ARG_num_strands].u_int;
if (num_strands < 2 || num_strands > 8) {
mp_raise_ValueError(translate("num_strands must be from 2 to 8 (inclusive)"));
}
int inlen = input_bufinfo.len;
if (inlen % num_strands != 0) {
mp_raise_ValueError_varg(translate("Input buffer length (%d) must be a multiple of the strand count (%d)"), inlen, num_strands);
}
mp_obj_t output = args[ARG_output].u_obj;
int outlen = 8 * (inlen / num_strands);
if (!output || output == mp_const_none) {
output = mp_obj_new_bytearray_of_zeros(n);
output = mp_obj_new_bytearray_of_zeros(outlen);
}
mp_get_buffer_raise(output, &output_bufinfo, MP_BUFFER_WRITE);
int m = output_bufinfo.len;
if (m < n) {
mp_raise_ValueError(translate("Output buffer must be at least as big as input buffer"));
int avail = output_bufinfo.len;
if (avail < outlen) {
mp_raise_ValueError_varg(translate("Output buffer must be at least %d bytes"), outlen);
}
common_hal_bit_transpose_bit_transpose(output_bufinfo.buf, input_bufinfo.buf, input_bufinfo.len);
common_hal_bit_transpose_bit_transpose(output_bufinfo.buf, input_bufinfo.buf, inlen, num_strands);
return output;
}
STATIC MP_DEFINE_CONST_FUN_OBJ_KW(bit_transpose_bit_transpose_obj, 1, bit_transpose);

View File

@ -29,4 +29,4 @@
#include <stdint.h>
#include <stdlib.h>
void common_hal_bit_transpose_bit_transpose(uint8_t *result, const uint8_t *src, size_t n);
void common_hal_bit_transpose_bit_transpose(uint8_t *result, const uint8_t *src, size_t inlen, size_t num_strands);

View File

@ -30,6 +30,12 @@
#include <stdlib.h>
#include <string.h>
#ifdef __GNUC__
#define FALLTHROUGH __attribute__((fallthrough))
#else
#define FALLTHROUGH ((void)0) /* FALLTHROUGH */
#endif
// adapted from "Hacker's Delight" - Figure 7-2 Transposing an 8x8-bit matrix
// basic idea is:
// > First, treat the 8x8-bit matrix as 16 2x2-bit matrices, and transpose each
@ -40,7 +46,57 @@
// > illustrated below.
// We want a different definition of bit/byte order, deal with strides differently, etc.
// so the code is heavily re-worked compared to the original.
static void transpose8(uint32_t *result, const uint8_t *src, int src_stride) {
static void transpose_var(uint32_t *result, const uint8_t *src, int src_stride, int num_strands) {
uint32_t x = 0, y = 0, t;
src += (num_strands-1) * src_stride;
switch(num_strands) {
case 7:
x |= *src << 16;
src -= src_stride;
FALLTHROUGH;
case 6:
x |= *src << 8;
src -= src_stride;
FALLTHROUGH;
case 5:
x |= *src;
src -= src_stride;
FALLTHROUGH;
case 4:
y |= *src << 24;
src -= src_stride;
FALLTHROUGH;
case 3:
y |= *src << 16;
src -= src_stride;
FALLTHROUGH;
case 2:
y |= *src << 8;
src -= src_stride;
y |= *src;
}
t = (x ^ (x >> 7)) & 0x00AA00AA; x = x ^ t ^ (t << 7);
t = (y ^ (y >> 7)) & 0x00AA00AA; y = y ^ t ^ (t << 7);
t = (x ^ (x >>14)) & 0x0000CCCC; x = x ^ t ^ (t <<14);
t = (y ^ (y >>14)) & 0x0000CCCC; y = y ^ t ^ (t <<14);
t = (x & 0xF0F0F0F0) | ((y >> 4) & 0x0F0F0F0F);
y = ((x << 4) & 0xF0F0F0F0) | (y & 0x0F0F0F0F);
x = t;
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
x = __builtin_bswap32(x);
y = __builtin_bswap32(y);
#endif
result[0] = x;
result[1] = y;
}
static void transpose_8(uint32_t *result, const uint8_t *src, int src_stride) {
uint32_t x, y, t;
y = *src; src += src_stride;
@ -70,14 +126,26 @@ static void transpose8(uint32_t *result, const uint8_t *src, int src_stride) {
result[1] = y;
}
static void bit_transpose(uint32_t *result, const uint8_t *src, size_t src_stride, size_t n) {
static void bit_transpose_8(uint32_t *result, const uint8_t *src, size_t src_stride, size_t n) {
for(size_t i=0; i<n; i++) {
transpose8(result, src, src_stride);
transpose_8(result, src, src_stride);
result += 2;
src += 1;
}
}
void common_hal_bit_transpose_bit_transpose(uint8_t *result, const uint8_t *src, size_t n) {
bit_transpose((uint32_t*)(void*)result, src, n/8, n/8);
static void bit_transpose_var(uint32_t *result, const uint8_t *src, size_t src_stride, size_t n, int num_strands) {
for(size_t i=0; i<n; i++) {
transpose_var(result, src, src_stride, num_strands);
result += 2;
src += 1;
}
}
void common_hal_bit_transpose_bit_transpose(uint8_t *result, const uint8_t *src, size_t inlen, size_t num_strands) {
if(num_strands == 8) {
bit_transpose_8((uint32_t*)(void*)result, src, inlen/8, inlen/8);
} else {
bit_transpose_var((uint32_t*)(void*)result, src, inlen/num_strands, inlen/num_strands, num_strands);
}
}