bit_transpose: Support from 2 to 7 strands, not just 8
This commit is contained in:
parent
9cf7d73c6c
commit
c284728621
@ -1093,7 +1093,8 @@ msgid "Initialization failed due to lack of memory"
|
||||
msgstr ""
|
||||
|
||||
#: shared-bindings/_bit_transpose/__init__.c
|
||||
msgid "Input buffer must be a multiple of 8 bytes"
|
||||
#, c-format
|
||||
msgid "Input buffer length (%d) must be a multiple of the strand count (%d)"
|
||||
msgstr ""
|
||||
|
||||
#: ports/atmel-samd/common-hal/pulseio/PulseIn.c
|
||||
@ -1664,7 +1665,8 @@ msgid "Out of sockets"
|
||||
msgstr ""
|
||||
|
||||
#: shared-bindings/_bit_transpose/__init__.c
|
||||
msgid "Output buffer must be at least as big as input buffer"
|
||||
#, c-format
|
||||
msgid "Output buffer must be at least %d bytes"
|
||||
msgstr ""
|
||||
|
||||
#: shared-bindings/audiobusio/PDMIn.c
|
||||
@ -3473,6 +3475,10 @@ msgstr ""
|
||||
msgid "not enough arguments for format string"
|
||||
msgstr ""
|
||||
|
||||
#: shared-bindings/_bit_transpose/__init__.c
|
||||
msgid "num_strands must be from 2 to 8 (inclusive)"
|
||||
msgstr ""
|
||||
|
||||
#: extmod/ulab/code/ulab_create.c
|
||||
msgid "number of points must be at least 2"
|
||||
msgstr ""
|
||||
|
@ -43,9 +43,10 @@
|
||||
//| ...
|
||||
//|
|
||||
STATIC mp_obj_t bit_transpose(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args) {
|
||||
enum { ARG_input, ARG_output };
|
||||
enum { ARG_input, ARG_num_strands, ARG_output };
|
||||
static const mp_arg_t allowed_args[] = {
|
||||
{ MP_QSTR_input, MP_ARG_OBJ | MP_ARG_REQUIRED, {} },
|
||||
{ MP_QSTR_num_strands, MP_ARG_INT | MP_ARG_KW_ONLY, { .u_int = 8 } },
|
||||
{ MP_QSTR_output, MP_ARG_OBJ | MP_ARG_KW_ONLY, { .u_obj = mp_const_none } },
|
||||
};
|
||||
mp_arg_val_t args[MP_ARRAY_SIZE(allowed_args)];
|
||||
@ -55,21 +56,28 @@ STATIC mp_obj_t bit_transpose(size_t n_args, const mp_obj_t *pos_args, mp_map_t
|
||||
mp_buffer_info_t output_bufinfo;
|
||||
|
||||
mp_get_buffer_raise(args[ARG_input].u_obj, &input_bufinfo, MP_BUFFER_READ);
|
||||
int n = input_bufinfo.len;
|
||||
if (n % 8 != 0) {
|
||||
mp_raise_ValueError(translate("Input buffer must be a multiple of 8 bytes"));
|
||||
int num_strands = args[ARG_num_strands].u_int;
|
||||
|
||||
if (num_strands < 2 || num_strands > 8) {
|
||||
mp_raise_ValueError(translate("num_strands must be from 2 to 8 (inclusive)"));
|
||||
}
|
||||
|
||||
int inlen = input_bufinfo.len;
|
||||
if (inlen % num_strands != 0) {
|
||||
mp_raise_ValueError_varg(translate("Input buffer length (%d) must be a multiple of the strand count (%d)"), inlen, num_strands);
|
||||
}
|
||||
mp_obj_t output = args[ARG_output].u_obj;
|
||||
|
||||
int outlen = 8 * (inlen / num_strands);
|
||||
if (!output || output == mp_const_none) {
|
||||
output = mp_obj_new_bytearray_of_zeros(n);
|
||||
output = mp_obj_new_bytearray_of_zeros(outlen);
|
||||
}
|
||||
mp_get_buffer_raise(output, &output_bufinfo, MP_BUFFER_WRITE);
|
||||
int m = output_bufinfo.len;
|
||||
if (m < n) {
|
||||
mp_raise_ValueError(translate("Output buffer must be at least as big as input buffer"));
|
||||
int avail = output_bufinfo.len;
|
||||
if (avail < outlen) {
|
||||
mp_raise_ValueError_varg(translate("Output buffer must be at least %d bytes"), outlen);
|
||||
}
|
||||
common_hal_bit_transpose_bit_transpose(output_bufinfo.buf, input_bufinfo.buf, input_bufinfo.len);
|
||||
common_hal_bit_transpose_bit_transpose(output_bufinfo.buf, input_bufinfo.buf, inlen, num_strands);
|
||||
return output;
|
||||
}
|
||||
STATIC MP_DEFINE_CONST_FUN_OBJ_KW(bit_transpose_bit_transpose_obj, 1, bit_transpose);
|
||||
|
@ -29,4 +29,4 @@
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
void common_hal_bit_transpose_bit_transpose(uint8_t *result, const uint8_t *src, size_t n);
|
||||
void common_hal_bit_transpose_bit_transpose(uint8_t *result, const uint8_t *src, size_t inlen, size_t num_strands);
|
||||
|
@ -30,6 +30,12 @@
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#ifdef __GNUC__
|
||||
#define FALLTHROUGH __attribute__((fallthrough))
|
||||
#else
|
||||
#define FALLTHROUGH ((void)0) /* FALLTHROUGH */
|
||||
#endif
|
||||
|
||||
// adapted from "Hacker's Delight" - Figure 7-2 Transposing an 8x8-bit matrix
|
||||
// basic idea is:
|
||||
// > First, treat the 8x8-bit matrix as 16 2x2-bit matrices, and transpose each
|
||||
@ -40,7 +46,57 @@
|
||||
// > illustrated below.
|
||||
// We want a different definition of bit/byte order, deal with strides differently, etc.
|
||||
// so the code is heavily re-worked compared to the original.
|
||||
static void transpose8(uint32_t *result, const uint8_t *src, int src_stride) {
|
||||
static void transpose_var(uint32_t *result, const uint8_t *src, int src_stride, int num_strands) {
|
||||
uint32_t x = 0, y = 0, t;
|
||||
|
||||
src += (num_strands-1) * src_stride;
|
||||
|
||||
switch(num_strands) {
|
||||
case 7:
|
||||
x |= *src << 16;
|
||||
src -= src_stride;
|
||||
FALLTHROUGH;
|
||||
case 6:
|
||||
x |= *src << 8;
|
||||
src -= src_stride;
|
||||
FALLTHROUGH;
|
||||
case 5:
|
||||
x |= *src;
|
||||
src -= src_stride;
|
||||
FALLTHROUGH;
|
||||
case 4:
|
||||
y |= *src << 24;
|
||||
src -= src_stride;
|
||||
FALLTHROUGH;
|
||||
case 3:
|
||||
y |= *src << 16;
|
||||
src -= src_stride;
|
||||
FALLTHROUGH;
|
||||
case 2:
|
||||
y |= *src << 8;
|
||||
src -= src_stride;
|
||||
y |= *src;
|
||||
}
|
||||
|
||||
t = (x ^ (x >> 7)) & 0x00AA00AA; x = x ^ t ^ (t << 7);
|
||||
t = (y ^ (y >> 7)) & 0x00AA00AA; y = y ^ t ^ (t << 7);
|
||||
|
||||
t = (x ^ (x >>14)) & 0x0000CCCC; x = x ^ t ^ (t <<14);
|
||||
t = (y ^ (y >>14)) & 0x0000CCCC; y = y ^ t ^ (t <<14);
|
||||
|
||||
t = (x & 0xF0F0F0F0) | ((y >> 4) & 0x0F0F0F0F);
|
||||
y = ((x << 4) & 0xF0F0F0F0) | (y & 0x0F0F0F0F);
|
||||
x = t;
|
||||
|
||||
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
|
||||
x = __builtin_bswap32(x);
|
||||
y = __builtin_bswap32(y);
|
||||
#endif
|
||||
result[0] = x;
|
||||
result[1] = y;
|
||||
}
|
||||
|
||||
static void transpose_8(uint32_t *result, const uint8_t *src, int src_stride) {
|
||||
uint32_t x, y, t;
|
||||
|
||||
y = *src; src += src_stride;
|
||||
@ -70,14 +126,26 @@ static void transpose8(uint32_t *result, const uint8_t *src, int src_stride) {
|
||||
result[1] = y;
|
||||
}
|
||||
|
||||
static void bit_transpose(uint32_t *result, const uint8_t *src, size_t src_stride, size_t n) {
|
||||
static void bit_transpose_8(uint32_t *result, const uint8_t *src, size_t src_stride, size_t n) {
|
||||
for(size_t i=0; i<n; i++) {
|
||||
transpose8(result, src, src_stride);
|
||||
transpose_8(result, src, src_stride);
|
||||
result += 2;
|
||||
src += 1;
|
||||
}
|
||||
}
|
||||
|
||||
void common_hal_bit_transpose_bit_transpose(uint8_t *result, const uint8_t *src, size_t n) {
|
||||
bit_transpose((uint32_t*)(void*)result, src, n/8, n/8);
|
||||
static void bit_transpose_var(uint32_t *result, const uint8_t *src, size_t src_stride, size_t n, int num_strands) {
|
||||
for(size_t i=0; i<n; i++) {
|
||||
transpose_var(result, src, src_stride, num_strands);
|
||||
result += 2;
|
||||
src += 1;
|
||||
}
|
||||
}
|
||||
|
||||
void common_hal_bit_transpose_bit_transpose(uint8_t *result, const uint8_t *src, size_t inlen, size_t num_strands) {
|
||||
if(num_strands == 8) {
|
||||
bit_transpose_8((uint32_t*)(void*)result, src, inlen/8, inlen/8);
|
||||
} else {
|
||||
bit_transpose_var((uint32_t*)(void*)result, src, inlen/num_strands, inlen/num_strands, num_strands);
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user