Add string formatting support for longlong and mpz.

This commit is contained in:
Dave Hylands 2014-04-07 11:19:51 -07:00
parent 6827f9fc55
commit c4029e5079
11 changed files with 303 additions and 95 deletions

View File

@ -1216,15 +1216,27 @@ uint mpz_as_str_size(const mpz_t *i, uint base) {
return i->len * DIG_SIZE / log_base2_floor[base] + 2 + 1; // +1 for null byte termination
}
uint mpz_as_str_size_formatted(const mpz_t *i, uint base, const char *prefix, char comma) {
if (base < 2 || base > 32) {
return 0;
}
uint num_digits = i->len * DIG_SIZE / log_base2_floor[base] + 1;
uint num_commas = comma ? num_digits / 3: 0;
uint prefix_len = prefix ? strlen(prefix) : 0;
return num_digits + num_commas + prefix_len + 2; // +1 for sign, +1 for null byte
}
char *mpz_as_str(const mpz_t *i, uint base) {
char *s = m_new(char, mpz_as_str_size(i, base));
mpz_as_str_inpl(i, base, s);
mpz_as_str_inpl(i, base, "", 'a', 0, s);
return s;
}
// assumes enough space as calculated by mpz_as_str_size
// returns length of string, not including null byte
uint mpz_as_str_inpl(const mpz_t *i, uint base, char *str) {
uint mpz_as_str_inpl(const mpz_t *i, uint base, const char *prefix, char base_char, char comma, char *str) {
if (str == NULL || base < 2 || base > 32) {
str[0] = 0;
return 0;
@ -1232,10 +1244,15 @@ uint mpz_as_str_inpl(const mpz_t *i, uint base, char *str) {
uint ilen = i->len;
char *s = str;
if (ilen == 0) {
str[0] = '0';
str[1] = 0;
return 1;
if (prefix) {
while (*prefix)
*s++ = *prefix++;
}
*s++ = '0';
*s = '\0';
return s - str;
}
// make a copy of mpz digits
@ -1243,7 +1260,7 @@ uint mpz_as_str_inpl(const mpz_t *i, uint base, char *str) {
memcpy(dig, i->dig, ilen * sizeof(mpz_dig_t));
// convert
char *s = str;
char *last_comma = str;
bool done;
do {
mpz_dig_t *d = dig + ilen;
@ -1259,7 +1276,7 @@ uint mpz_as_str_inpl(const mpz_t *i, uint base, char *str) {
// convert to character
a += '0';
if (a > '9') {
a += 'a' - '9' - 1;
a += base_char - '9' - 1;
}
*s++ = a;
@ -1271,8 +1288,19 @@ uint mpz_as_str_inpl(const mpz_t *i, uint base, char *str) {
break;
}
}
} while (!done);
if (comma && (s - last_comma) == 3) {
*s++ = comma;
last_comma = s;
}
}
while (!done);
if (prefix) {
const char *p = &prefix[strlen(prefix)];
while (p > prefix) {
*s++ = *--p;
}
}
if (i->neg != 0) {
*s++ = '-';
}
@ -1284,7 +1312,7 @@ uint mpz_as_str_inpl(const mpz_t *i, uint base, char *str) {
*v = temp;
}
s[0] = 0; // null termination
*s = '\0'; // null termination
return s - str;
}

View File

@ -76,5 +76,6 @@ bool mpz_as_int_checked(const mpz_t *z, machine_int_t *value);
mp_float_t mpz_as_float(const mpz_t *z);
#endif
uint mpz_as_str_size(const mpz_t *z, uint base);
uint mpz_as_str_size_formatted(const mpz_t *i, uint base, const char *prefix, char comma);
char *mpz_as_str(const mpz_t *z, uint base);
uint mpz_as_str_inpl(const mpz_t *z, uint base, char *str);
uint mpz_as_str_inpl(const mpz_t *z, uint base, const char *prefix, char base_char, char comma, char *str);

View File

@ -1,6 +1,7 @@
#include <stdlib.h>
#include <stdint.h>
#include <assert.h>
#include <string.h>
#include "nlr.h"
#include "misc.h"
@ -53,14 +54,129 @@ STATIC mp_obj_t mp_obj_int_make_new(mp_obj_t type_in, uint n_args, uint n_kw, co
}
}
#if MICROPY_LONGINT_IMPL == MICROPY_LONGINT_IMPL_NONE
void mp_obj_int_print(void (*print)(void *env, const char *fmt, ...), void *env, mp_obj_t self_in, mp_print_kind_t kind) {
if (MP_OBJ_IS_SMALL_INT(self_in)) {
print(env, INT_FMT, MP_OBJ_SMALL_INT_VALUE(self_in));
// The size of this buffer is rather arbitrary. If it's not large
// enough, a dynamic one will be allocated.
char stack_buf[sizeof(machine_int_t) * 4];
char *buf = stack_buf;
int buf_size = sizeof(stack_buf);
int fmt_size;
char *str = mp_obj_int_formatted(&buf, &buf_size, &fmt_size, self_in, 10, NULL, '\0', '\0');
print(env, "%s", str);
if (buf != stack_buf) {
m_free(buf, buf_size);
}
}
#if MICROPY_LONGINT_IMPL == MICROPY_LONGINT_IMPL_NONE || MICROPY_LONGINT_IMPL == MICROPY_LONGINT_IMPL_LONGLONG
#if MICROPY_LONGINT_IMPL == MICROPY_LONGINT_IMPL_LONGLONG
typedef mp_longint_impl_t fmt_int_t;
#else
typedef mp_small_int_t fmt_int_t;
#endif
static const uint log_base2_floor[] = {
0,
0, 1, 1, 2,
2, 2, 2, 3,
3, 3, 3, 3,
3, 3, 3, 4,
4, 4, 4, 4,
4, 4, 4, 4,
4, 4, 4, 4,
4, 4, 4, 5
};
uint int_as_str_size_formatted(uint base, const char *prefix, char comma) {
if (base < 2 || base > 32) {
return 0;
}
uint num_digits = sizeof(fmt_int_t) * 8 / log_base2_floor[base] + 1;
uint num_commas = comma ? num_digits / 3: 0;
uint prefix_len = prefix ? strlen(prefix) : 0;
return num_digits + num_commas + prefix_len + 2; // +1 for sign, +1 for null byte
}
// This routine expects you to pass in a buffer and size (in *buf and buf_size).
// If, for some reason, this buffer is too small, then it will allocate a
// buffer and return the allocated buffer and size in *buf and *buf_size. It
// is the callers responsibility to free this allocated buffer.
//
// The resulting formatted string will be returned from this function and the
// formatted size will be in *fmt_size.
char *mp_obj_int_formatted(char **buf, int *buf_size, int *fmt_size, mp_obj_t self_in,
int base, const char *prefix, char base_char, char comma) {
if (!MP_OBJ_IS_INT(self_in)) {
buf[0] = '\0';
*fmt_size = 0;
return *buf;
}
fmt_int_t num = mp_obj_get_int(self_in);
char sign = '\0';
if (num < 0) {
num = -num;
sign = '-';
}
uint needed_size = int_as_str_size_formatted(base, prefix, comma);
if (needed_size > *buf_size) {
*buf = m_new(char, needed_size);
*buf_size = needed_size;
}
char *str = *buf;
char *b = str + needed_size;
*(--b) = '\0';
char *last_comma = b;
if (num == 0) {
*(--b) = '0';
} else {
do {
int c = num % base;
num /= base;
if (c >= 10) {
c += base_char - 10;
} else {
c += '0';
}
*(--b) = c;
if (comma && num != 0 && b > str && (last_comma - b) == 3) {
*(--b) = comma;
last_comma = b;
}
}
while (b > str && num != 0);
}
if (prefix) {
size_t prefix_len = strlen(prefix);
char *p = b - prefix_len;
if (p > str) {
b = p;
while (*prefix) {
*p++ = *prefix++;
}
}
}
if (sign && b > str) {
*(--b) = sign;
}
*fmt_size = *buf + needed_size - b - 1;
return b;
}
bool mp_obj_int_is_positive(mp_obj_t self_in) {
return mp_obj_get_int(self_in) >= 0;
}
#endif // LONGLONG or NONE
#if MICROPY_LONGINT_IMPL == MICROPY_LONGINT_IMPL_NONE
// This is called for operations on SMALL_INT that are not handled by mp_unary_op
mp_obj_t mp_obj_int_unary_op(int op, mp_obj_t o_in) {
return MP_OBJ_NULL;

View File

@ -8,6 +8,9 @@ typedef struct _mp_obj_int_t {
} mp_obj_int_t;
void mp_obj_int_print(void (*print)(void *env, const char *fmt, ...), void *env, mp_obj_t self_in, mp_print_kind_t kind);
char *mp_obj_int_formatted(char **buf, int *buf_size, int *fmt_size, mp_obj_t self_in,
int base, const char *prefix, char base_char, char comma);
bool mp_obj_int_is_positive(mp_obj_t self_in);
mp_obj_t mp_obj_int_unary_op(int op, mp_obj_t o_in);
mp_obj_t mp_obj_int_binary_op(int op, mp_obj_t lhs_in, mp_obj_t rhs_in);
mp_obj_t mp_obj_int_binary_op_extra_cases(int op, mp_obj_t lhs_in, mp_obj_t rhs_in);

View File

@ -1,5 +1,6 @@
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include "nlr.h"
#include "misc.h"
@ -21,15 +22,6 @@
#define SUFFIX ""
#endif
void mp_obj_int_print(void (*print)(void *env, const char *fmt, ...), void *env, mp_obj_t self_in, mp_print_kind_t kind) {
if (MP_OBJ_IS_SMALL_INT(self_in)) {
print(env, INT_FMT, MP_OBJ_SMALL_INT_VALUE(self_in));
} else {
mp_obj_int_t *self = self_in;
print(env, "%lld" SUFFIX, self->val);
}
}
mp_obj_t mp_obj_int_unary_op(int op, mp_obj_t o_in) {
mp_obj_int_t *o = o_in;
switch (op) {

View File

@ -22,16 +22,47 @@ STATIC mp_obj_int_t *mp_obj_int_new_mpz(void) {
return o;
}
void mp_obj_int_print(void (*print)(void *env, const char *fmt, ...), void *env, mp_obj_t self_in, mp_print_kind_t kind) {
// This routine expects you to pass in a buffer and size (in *buf and buf_size).
// If, for some reason, this buffer is too small, then it will allocate a
// buffer and return the allocated buffer and size in *buf and *buf_size. It
// is the callers responsibility to free this allocated buffer.
//
// The resulting formatted string will be returned from this function and the
// formatted size will be in *fmt_size.
char *mp_obj_int_formatted(char **buf, int *buf_size, int *fmt_size, mp_obj_t self_in,
int base, const char *prefix, char base_char, char comma) {
mpz_t small_mpz;
mpz_t *mpz;
mpz_dig_t small_dig[(sizeof(mp_small_int_t) * 8 + MPZ_DIG_SIZE - 1) / MPZ_DIG_SIZE];
if (MP_OBJ_IS_SMALL_INT(self_in)) {
print(env, INT_FMT, MP_OBJ_SMALL_INT_VALUE(self_in));
mpz_init_fixed_from_int(&small_mpz, small_dig,
sizeof(small_dig) / sizeof(small_dig[0]),
MP_OBJ_SMALL_INT_VALUE(self_in));
mpz = &small_mpz;
} else {
// TODO would rather not allocate memory to print...
mp_obj_int_t *self = self_in;
char *str = mpz_as_str(&self->mpz, 10);
print(env, "%s", str);
m_free(str, 0);
mpz = &self->mpz;
}
uint needed_size = mpz_as_str_size_formatted(mpz, base, prefix, comma);
if (needed_size > *buf_size) {
*buf = m_new(char, needed_size);
*buf_size = needed_size;
}
char *str = *buf;
*fmt_size = mpz_as_str_inpl(mpz, base, prefix, base_char, comma, str);
return str;
}
bool mp_obj_int_is_positive(mp_obj_t self_in) {
if (MP_OBJ_IS_SMALL_INT(self_in)) {
return MP_OBJ_SMALL_INT_VALUE(self_in) >= 0;
}
mp_obj_int_t *self = self_in;
return !self->mpz.neg;
}
mp_obj_t mp_obj_int_unary_op(int op, mp_obj_t o_in) {

View File

@ -562,13 +562,17 @@ static bool arg_looks_numeric(mp_obj_t arg) {
;
}
static machine_int_t arg_as_int(mp_obj_t arg) {
static mp_obj_t arg_as_int(mp_obj_t arg) {
#if MICROPY_ENABLE_FLOAT
if (MP_OBJ_IS_TYPE(arg, &mp_type_float)) {
return mp_obj_get_float(arg);
// TODO: Needs a way to construct an mpz integer from a float
mp_small_int_t num = mp_obj_get_float(arg);
return MP_OBJ_NEW_SMALL_INT(num);
}
#endif
return mp_obj_get_int(arg);
return arg;
}
mp_obj_t str_format(uint n_args, const mp_obj_t *args) {
@ -788,7 +792,7 @@ mp_obj_t str_format(uint n_args, const mp_obj_t *args) {
if (arg_looks_integer(arg)) {
switch (type) {
case 'b':
pfenv_print_int(&pfenv_vstr, mp_obj_get_int(arg), 1, 2, 'a', flags, fill, width);
pfenv_print_int(&pfenv_vstr, arg, 1, 2, 'a', flags, fill, width);
continue;
case 'c':
@ -801,19 +805,23 @@ mp_obj_t str_format(uint n_args, const mp_obj_t *args) {
case '\0': // No explicit format type implies 'd'
case 'n': // I don't think we support locales in uPy so use 'd'
case 'd':
pfenv_print_int(&pfenv_vstr, mp_obj_get_int(arg), 1, 10, 'a', flags, fill, width);
pfenv_print_int(&pfenv_vstr, arg, 1, 10, 'a', flags, fill, width);
continue;
case 'o':
pfenv_print_int(&pfenv_vstr, mp_obj_get_int(arg), 1, 8, 'a', flags, fill, width);
if (flags & PF_FLAG_SHOW_PREFIX) {
flags |= PF_FLAG_SHOW_OCTAL_LETTER;
}
pfenv_print_int(&pfenv_vstr, arg, 1, 8, 'a', flags, fill, width);
continue;
case 'x':
pfenv_print_int(&pfenv_vstr, mp_obj_get_int(arg), 1, 16, 'a', flags, fill, width);
pfenv_print_int(&pfenv_vstr, arg, 1, 16, 'a', flags, fill, width);
continue;
case 'X':
pfenv_print_int(&pfenv_vstr, mp_obj_get_int(arg), 1, 16, 'A', flags, fill, width);
pfenv_print_int(&pfenv_vstr, arg, 1, 16, 'A', flags, fill, width);
continue;
case 'e':
@ -1050,7 +1058,7 @@ STATIC mp_obj_t str_modulo_format(mp_obj_t pattern, uint n_args, const mp_obj_t
case 'o':
if (alt) {
flags |= PF_FLAG_SHOW_PREFIX;
flags |= (PF_FLAG_SHOW_PREFIX | PF_FLAG_SHOW_OCTAL_LETTER);
}
pfenv_print_int(&pfenv_vstr, arg_as_int(arg), 1, 8, 'a', flags, fill, width);
break;

View File

@ -5,6 +5,8 @@
#include "mpconfig.h"
#include "qstr.h"
#include "obj.h"
#include "mpz.h"
#include "objint.h"
#include "pfenv.h"
#if MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_DOUBLE
@ -79,71 +81,88 @@ int pfenv_print_strn(const pfenv_t *pfenv, const char *str, unsigned int len, in
// We can use 16 characters for 32-bit and 32 characters for 64-bit
#define INT_BUF_SIZE (sizeof(machine_int_t) * 4)
int pfenv_print_int(const pfenv_t *pfenv, machine_uint_t x, int sgn, int base, int base_char, int flags, char fill, int width) {
char sign = 0;
if (sgn) {
if ((machine_int_t)x < 0) {
sign = '-';
x = -x;
} else if (flags & PF_FLAG_SHOW_SIGN) {
sign = '+';
int pfenv_print_int(const pfenv_t *pfenv, mp_obj_t x, int sgn, int base, int base_char, int flags, char fill, int width) {
if (!MP_OBJ_IS_INT(x)) {
// This will convert booleans to int, or raise an error for
// non-integer types.
x = MP_OBJ_NEW_SMALL_INT(mp_obj_get_int(x));
}
char prefix_buf[4];
char *prefix = prefix_buf;
if (mp_obj_int_is_positive(x)) {
if (flags & PF_FLAG_SHOW_SIGN) {
*prefix++ = '+';
} else if (flags & PF_FLAG_SPACE_SIGN) {
sign = ' ';
*prefix++ = ' ';
}
}
char buf[INT_BUF_SIZE];
char *b = buf + INT_BUF_SIZE;
if (x == 0) {
*(--b) = '0';
} else {
do {
int c = x % base;
x /= base;
if (c >= 10) {
c += base_char - 10;
} else {
c += '0';
}
*(--b) = c;
} while (b > buf && x != 0);
}
char prefix_char = '\0';
if (flags & PF_FLAG_SHOW_PREFIX) {
if (base == 2) {
prefix_char = base_char + 'b' - 'a';
*prefix++ = '0';
*prefix++ = base_char + 'b' - 'a';
} else if (base == 8) {
prefix_char = base_char + 'o' - 'a';
} else if (base == 16) {
prefix_char = base_char + 'x' - 'a';
*prefix++ = '0';
if (flags & PF_FLAG_SHOW_OCTAL_LETTER) {
*prefix++ = base_char + 'o' - 'a';
}
} else if (base == 16) {
*prefix++ = '0';
*prefix++ = base_char + 'x' - 'a';
}
}
*prefix = '\0';
int prefix_len = prefix - prefix_buf;
prefix = prefix_buf;
char comma = '\0';
if (flags & PF_FLAG_SHOW_COMMA) {
comma = ',';
}
// The size of this buffer is rather arbitrary. If it's not large
// enough, a dynamic one will be allocated.
char stack_buf[sizeof(machine_int_t) * 4];
char *buf = stack_buf;
int buf_size = sizeof(stack_buf);
int fmt_size = 0;
char *str;
char sign = '\0';
if (flags & PF_FLAG_PAD_AFTER_SIGN) {
// We add the pad in this function, so since the pad goes after
// the sign & prefix, we format without a prefix
str = mp_obj_int_formatted(&buf, &buf_size, &fmt_size,
x, base, NULL, base_char, comma);
if (*str == '-') {
sign = *str++;
fmt_size--;
}
} else {
str = mp_obj_int_formatted(&buf, &buf_size, &fmt_size,
x, base, prefix, base_char, comma);
}
int len = 0;
if (flags & PF_FLAG_PAD_AFTER_SIGN) {
// pad after sign implies pad after prefix as well.
if (sign) {
len += pfenv_print_strn(pfenv, &sign, 1, flags, fill, 1);
len += pfenv_print_strn(pfenv, &sign, 1, 0, 0, 1);
width--;
}
if (prefix_char) {
len += pfenv_print_strn(pfenv, "0", 1, flags, fill, 1);
len += pfenv_print_strn(pfenv, &prefix_char, 1, flags, fill, 1);
width -= 2;
}
} else {
if (prefix_char && b > &buf[1]) {
*(--b) = prefix_char;
*(--b) = '0';
}
if (sign && b > buf) {
*(--b) = sign;
if (prefix_len) {
len += pfenv_print_strn(pfenv, prefix, prefix_len, 0, 0, 1);
width -= prefix_len;
}
}
len += pfenv_print_strn(pfenv, b, buf + INT_BUF_SIZE - b, flags, fill, width);
len += pfenv_print_strn(pfenv, str, fmt_size, flags, fill, width);
if (buf != stack_buf) {
m_free(buf, buf_size);
}
return len;
}

View File

@ -8,6 +8,7 @@
#define PF_FLAG_CENTER_ADJUST (0x080)
#define PF_FLAG_ADD_PERCENT (0x100)
#define PF_FLAG_PAD_NAN_INF (0x200)
#define PF_FLAG_SHOW_OCTAL_LETTER (0x400)
typedef struct _pfenv_t {
void *data;
@ -17,7 +18,7 @@ typedef struct _pfenv_t {
void pfenv_vstr_add_strn(void *data, const char *str, unsigned int len);
int pfenv_print_strn(const pfenv_t *pfenv, const char *str, unsigned int len, int flags, char fill, int width);
int pfenv_print_int(const pfenv_t *pfenv, machine_uint_t x, int sgn, int base, int base_char, int flags, char fill, int width);
int pfenv_print_int(const pfenv_t *pfenv, mp_obj_t x, int sgn, int base, int base_char, int flags, char fill, int width);
#if MICROPY_ENABLE_FLOAT
int pfenv_print_float(const pfenv_t *pfenv, mp_float_t f, char fmt, int flags, char fill, int width, int prec);
#endif

View File

@ -53,6 +53,7 @@ print("%X" % 18)
print("%X" % 18.0)
print("%#x" % 18)
print("%#X" % 18)
print("%#6o" % 18)
print("%#6x" % 18)
print("%#06x" % 18)
print("%e" % 1.23456)

View File

@ -24,11 +24,19 @@ test("{:4o}", 123)
test("{:4x}", 123)
test("{:4X}", 123)
test("{:4,d}", 12345678)
test("{:#4b}", 10)
test("{:#4o}", 123)
test("{:#4x}", 123)
test("{:#4X}", 123)
test("{:#4d}", 0)
test("{:#4b}", 0)
test("{:#4o}", 0)
test("{:#4x}", 0)
test("{:#4X}", 0)
test("{:<6s}", "ab")
test("{:>6s}", "ab")
test("{:^6s}", "ab")