diff --git a/py/mpz.c b/py/mpz.c index 21b390996a..a6b024ca87 100644 --- a/py/mpz.c +++ b/py/mpz.c @@ -1216,15 +1216,27 @@ uint mpz_as_str_size(const mpz_t *i, uint base) { return i->len * DIG_SIZE / log_base2_floor[base] + 2 + 1; // +1 for null byte termination } +uint mpz_as_str_size_formatted(const mpz_t *i, uint base, const char *prefix, char comma) { + if (base < 2 || base > 32) { + return 0; + } + + uint num_digits = i->len * DIG_SIZE / log_base2_floor[base] + 1; + uint num_commas = comma ? num_digits / 3: 0; + uint prefix_len = prefix ? strlen(prefix) : 0; + + return num_digits + num_commas + prefix_len + 2; // +1 for sign, +1 for null byte +} + char *mpz_as_str(const mpz_t *i, uint base) { char *s = m_new(char, mpz_as_str_size(i, base)); - mpz_as_str_inpl(i, base, s); + mpz_as_str_inpl(i, base, "", 'a', 0, s); return s; } // assumes enough space as calculated by mpz_as_str_size // returns length of string, not including null byte -uint mpz_as_str_inpl(const mpz_t *i, uint base, char *str) { +uint mpz_as_str_inpl(const mpz_t *i, uint base, const char *prefix, char base_char, char comma, char *str) { if (str == NULL || base < 2 || base > 32) { str[0] = 0; return 0; @@ -1232,10 +1244,15 @@ uint mpz_as_str_inpl(const mpz_t *i, uint base, char *str) { uint ilen = i->len; + char *s = str; if (ilen == 0) { - str[0] = '0'; - str[1] = 0; - return 1; + if (prefix) { + while (*prefix) + *s++ = *prefix++; + } + *s++ = '0'; + *s = '\0'; + return s - str; } // make a copy of mpz digits @@ -1243,7 +1260,7 @@ uint mpz_as_str_inpl(const mpz_t *i, uint base, char *str) { memcpy(dig, i->dig, ilen * sizeof(mpz_dig_t)); // convert - char *s = str; + char *last_comma = str; bool done; do { mpz_dig_t *d = dig + ilen; @@ -1259,7 +1276,7 @@ uint mpz_as_str_inpl(const mpz_t *i, uint base, char *str) { // convert to character a += '0'; if (a > '9') { - a += 'a' - '9' - 1; + a += base_char - '9' - 1; } *s++ = a; @@ -1271,8 +1288,19 @@ uint mpz_as_str_inpl(const mpz_t *i, uint base, char *str) { break; } } - } while (!done); + if (comma && (s - last_comma) == 3) { + *s++ = comma; + last_comma = s; + } + } + while (!done); + if (prefix) { + const char *p = &prefix[strlen(prefix)]; + while (p > prefix) { + *s++ = *--p; + } + } if (i->neg != 0) { *s++ = '-'; } @@ -1284,7 +1312,7 @@ uint mpz_as_str_inpl(const mpz_t *i, uint base, char *str) { *v = temp; } - s[0] = 0; // null termination + *s = '\0'; // null termination return s - str; } diff --git a/py/mpz.h b/py/mpz.h index cbe60eb8d0..04b06b8790 100644 --- a/py/mpz.h +++ b/py/mpz.h @@ -76,5 +76,6 @@ bool mpz_as_int_checked(const mpz_t *z, machine_int_t *value); mp_float_t mpz_as_float(const mpz_t *z); #endif uint mpz_as_str_size(const mpz_t *z, uint base); +uint mpz_as_str_size_formatted(const mpz_t *i, uint base, const char *prefix, char comma); char *mpz_as_str(const mpz_t *z, uint base); -uint mpz_as_str_inpl(const mpz_t *z, uint base, char *str); +uint mpz_as_str_inpl(const mpz_t *z, uint base, const char *prefix, char base_char, char comma, char *str); diff --git a/py/objint.c b/py/objint.c index a1d3924a21..e1b67a16b3 100644 --- a/py/objint.c +++ b/py/objint.c @@ -1,6 +1,7 @@ #include #include #include +#include #include "nlr.h" #include "misc.h" @@ -53,14 +54,139 @@ STATIC mp_obj_t mp_obj_int_make_new(mp_obj_t type_in, uint n_args, uint n_kw, co } } -#if MICROPY_LONGINT_IMPL == MICROPY_LONGINT_IMPL_NONE - void mp_obj_int_print(void (*print)(void *env, const char *fmt, ...), void *env, mp_obj_t self_in, mp_print_kind_t kind) { - if (MP_OBJ_IS_SMALL_INT(self_in)) { - print(env, INT_FMT, MP_OBJ_SMALL_INT_VALUE(self_in)); + // The size of this buffer is rather arbitrary. If it's not large + // enough, a dynamic one will be allocated. + char stack_buf[sizeof(machine_int_t) * 4]; + char *buf = stack_buf; + int buf_size = sizeof(stack_buf); + int fmt_size; + + char *str = mp_obj_int_formatted(&buf, &buf_size, &fmt_size, self_in, 10, NULL, '\0', '\0'); + print(env, "%s", str); + + if (buf != stack_buf) { + m_free(buf, buf_size); } } +#if MICROPY_LONGINT_IMPL == MICROPY_LONGINT_IMPL_NONE || MICROPY_LONGINT_IMPL == MICROPY_LONGINT_IMPL_LONGLONG + +#if MICROPY_LONGINT_IMPL == MICROPY_LONGINT_IMPL_LONGLONG +typedef mp_longint_impl_t fmt_int_t; +#else +typedef mp_small_int_t fmt_int_t; +#endif + +static const uint log_base2_floor[] = { + 0, + 0, 1, 1, 2, + 2, 2, 2, 3, + 3, 3, 3, 3, + 3, 3, 3, 4, + 4, 4, 4, 4, + 4, 4, 4, 4, + 4, 4, 4, 4, + 4, 4, 4, 5 +}; + +uint int_as_str_size_formatted(uint base, const char *prefix, char comma) { + if (base < 2 || base > 32) { + return 0; + } + + uint num_digits = sizeof(fmt_int_t) * 8 / log_base2_floor[base] + 1; + uint num_commas = comma ? num_digits / 3: 0; + uint prefix_len = prefix ? strlen(prefix) : 0; + return num_digits + num_commas + prefix_len + 2; // +1 for sign, +1 for null byte +} + +// This routine expects you to pass in a buffer and size (in *buf and buf_size). +// If, for some reason, this buffer is too small, then it will allocate a +// buffer and return the allocated buffer and size in *buf and *buf_size. It +// is the callers responsibility to free this allocated buffer. +// +// The resulting formatted string will be returned from this function and the +// formatted size will be in *fmt_size. +char *mp_obj_int_formatted(char **buf, int *buf_size, int *fmt_size, mp_obj_t self_in, + int base, const char *prefix, char base_char, char comma) { + if (!MP_OBJ_IS_INT(self_in)) { + buf[0] = '\0'; + *fmt_size = 0; + return *buf; + } + fmt_int_t num; +#if MICROPY_LONGINT_IMPL == MICROPY_LONGINT_IMPL_LONGLONG + mp_obj_int_t *self = self_in; + if (MP_OBJ_IS_TYPE(self_in, &mp_type_int)) { + // mp_obj_get_int truncates to machine_int_t + num = self->val; + } else +#endif + { + num = mp_obj_get_int(self_in); + } + char sign = '\0'; + if (num < 0) { + num = -num; + sign = '-'; + } + + uint needed_size = int_as_str_size_formatted(base, prefix, comma); + if (needed_size > *buf_size) { + *buf = m_new(char, needed_size); + *buf_size = needed_size; + } + char *str = *buf; + + char *b = str + needed_size; + *(--b) = '\0'; + char *last_comma = b; + + if (num == 0) { + *(--b) = '0'; + } else { + do { + int c = num % base; + num /= base; + if (c >= 10) { + c += base_char - 10; + } else { + c += '0'; + } + *(--b) = c; + if (comma && num != 0 && b > str && (last_comma - b) == 3) { + *(--b) = comma; + last_comma = b; + } + } + while (b > str && num != 0); + } + if (prefix) { + size_t prefix_len = strlen(prefix); + char *p = b - prefix_len; + if (p > str) { + b = p; + while (*prefix) { + *p++ = *prefix++; + } + } + } + if (sign && b > str) { + *(--b) = sign; + } + *fmt_size = *buf + needed_size - b - 1; + + return b; +} + +bool mp_obj_int_is_positive(mp_obj_t self_in) { + return mp_obj_get_int(self_in) >= 0; +} +#endif // LONGLONG or NONE + +#if MICROPY_LONGINT_IMPL == MICROPY_LONGINT_IMPL_NONE + // This is called for operations on SMALL_INT that are not handled by mp_unary_op mp_obj_t mp_obj_int_unary_op(int op, mp_obj_t o_in) { return MP_OBJ_NULL; diff --git a/py/objint.h b/py/objint.h index fe7f60a2d4..7ee476269c 100644 --- a/py/objint.h +++ b/py/objint.h @@ -8,6 +8,9 @@ typedef struct _mp_obj_int_t { } mp_obj_int_t; void mp_obj_int_print(void (*print)(void *env, const char *fmt, ...), void *env, mp_obj_t self_in, mp_print_kind_t kind); +char *mp_obj_int_formatted(char **buf, int *buf_size, int *fmt_size, mp_obj_t self_in, + int base, const char *prefix, char base_char, char comma); +bool mp_obj_int_is_positive(mp_obj_t self_in); mp_obj_t mp_obj_int_unary_op(int op, mp_obj_t o_in); mp_obj_t mp_obj_int_binary_op(int op, mp_obj_t lhs_in, mp_obj_t rhs_in); mp_obj_t mp_obj_int_binary_op_extra_cases(int op, mp_obj_t lhs_in, mp_obj_t rhs_in); diff --git a/py/objint_longlong.c b/py/objint_longlong.c index 02389d6730..332f0bbb8a 100644 --- a/py/objint_longlong.c +++ b/py/objint_longlong.c @@ -1,5 +1,6 @@ #include #include +#include #include "nlr.h" #include "misc.h" @@ -21,15 +22,6 @@ #define SUFFIX "" #endif -void mp_obj_int_print(void (*print)(void *env, const char *fmt, ...), void *env, mp_obj_t self_in, mp_print_kind_t kind) { - if (MP_OBJ_IS_SMALL_INT(self_in)) { - print(env, INT_FMT, MP_OBJ_SMALL_INT_VALUE(self_in)); - } else { - mp_obj_int_t *self = self_in; - print(env, "%lld" SUFFIX, self->val); - } -} - mp_obj_t mp_obj_int_unary_op(int op, mp_obj_t o_in) { mp_obj_int_t *o = o_in; switch (op) { diff --git a/py/objint_mpz.c b/py/objint_mpz.c index 4ecc4017b4..6410ecc64d 100644 --- a/py/objint_mpz.c +++ b/py/objint_mpz.c @@ -22,16 +22,47 @@ STATIC mp_obj_int_t *mp_obj_int_new_mpz(void) { return o; } -void mp_obj_int_print(void (*print)(void *env, const char *fmt, ...), void *env, mp_obj_t self_in, mp_print_kind_t kind) { +// This routine expects you to pass in a buffer and size (in *buf and buf_size). +// If, for some reason, this buffer is too small, then it will allocate a +// buffer and return the allocated buffer and size in *buf and *buf_size. It +// is the callers responsibility to free this allocated buffer. +// +// The resulting formatted string will be returned from this function and the +// formatted size will be in *fmt_size. +char *mp_obj_int_formatted(char **buf, int *buf_size, int *fmt_size, mp_obj_t self_in, + int base, const char *prefix, char base_char, char comma) { + mpz_t small_mpz; + mpz_t *mpz; + mpz_dig_t small_dig[(sizeof(mp_small_int_t) * 8 + MPZ_DIG_SIZE - 1) / MPZ_DIG_SIZE]; + if (MP_OBJ_IS_SMALL_INT(self_in)) { - print(env, INT_FMT, MP_OBJ_SMALL_INT_VALUE(self_in)); + mpz_init_fixed_from_int(&small_mpz, small_dig, + sizeof(small_dig) / sizeof(small_dig[0]), + MP_OBJ_SMALL_INT_VALUE(self_in)); + mpz = &small_mpz; } else { - // TODO would rather not allocate memory to print... mp_obj_int_t *self = self_in; - char *str = mpz_as_str(&self->mpz, 10); - print(env, "%s", str); - m_free(str, 0); + mpz = &self->mpz; } + + uint needed_size = mpz_as_str_size_formatted(mpz, base, prefix, comma); + if (needed_size > *buf_size) { + *buf = m_new(char, needed_size); + *buf_size = needed_size; + } + char *str = *buf; + + *fmt_size = mpz_as_str_inpl(mpz, base, prefix, base_char, comma, str); + + return str; +} + +bool mp_obj_int_is_positive(mp_obj_t self_in) { + if (MP_OBJ_IS_SMALL_INT(self_in)) { + return MP_OBJ_SMALL_INT_VALUE(self_in) >= 0; + } + mp_obj_int_t *self = self_in; + return !self->mpz.neg; } mp_obj_t mp_obj_int_unary_op(int op, mp_obj_t o_in) { diff --git a/py/objstr.c b/py/objstr.c index 4395757a46..07134687d1 100644 --- a/py/objstr.c +++ b/py/objstr.c @@ -562,13 +562,17 @@ static bool arg_looks_numeric(mp_obj_t arg) { ; } -static machine_int_t arg_as_int(mp_obj_t arg) { +static mp_obj_t arg_as_int(mp_obj_t arg) { #if MICROPY_ENABLE_FLOAT if (MP_OBJ_IS_TYPE(arg, &mp_type_float)) { - return mp_obj_get_float(arg); + + // TODO: Needs a way to construct an mpz integer from a float + + mp_small_int_t num = mp_obj_get_float(arg); + return MP_OBJ_NEW_SMALL_INT(num); } #endif - return mp_obj_get_int(arg); + return arg; } mp_obj_t str_format(uint n_args, const mp_obj_t *args) { @@ -788,7 +792,7 @@ mp_obj_t str_format(uint n_args, const mp_obj_t *args) { if (arg_looks_integer(arg)) { switch (type) { case 'b': - pfenv_print_int(&pfenv_vstr, mp_obj_get_int(arg), 1, 2, 'a', flags, fill, width); + pfenv_print_int(&pfenv_vstr, arg, 1, 2, 'a', flags, fill, width); continue; case 'c': @@ -801,19 +805,23 @@ mp_obj_t str_format(uint n_args, const mp_obj_t *args) { case '\0': // No explicit format type implies 'd' case 'n': // I don't think we support locales in uPy so use 'd' case 'd': - pfenv_print_int(&pfenv_vstr, mp_obj_get_int(arg), 1, 10, 'a', flags, fill, width); + pfenv_print_int(&pfenv_vstr, arg, 1, 10, 'a', flags, fill, width); continue; case 'o': - pfenv_print_int(&pfenv_vstr, mp_obj_get_int(arg), 1, 8, 'a', flags, fill, width); + if (flags & PF_FLAG_SHOW_PREFIX) { + flags |= PF_FLAG_SHOW_OCTAL_LETTER; + } + + pfenv_print_int(&pfenv_vstr, arg, 1, 8, 'a', flags, fill, width); continue; case 'x': - pfenv_print_int(&pfenv_vstr, mp_obj_get_int(arg), 1, 16, 'a', flags, fill, width); + pfenv_print_int(&pfenv_vstr, arg, 1, 16, 'a', flags, fill, width); continue; case 'X': - pfenv_print_int(&pfenv_vstr, mp_obj_get_int(arg), 1, 16, 'A', flags, fill, width); + pfenv_print_int(&pfenv_vstr, arg, 1, 16, 'A', flags, fill, width); continue; case 'e': @@ -1050,7 +1058,7 @@ STATIC mp_obj_t str_modulo_format(mp_obj_t pattern, uint n_args, const mp_obj_t case 'o': if (alt) { - flags |= PF_FLAG_SHOW_PREFIX; + flags |= (PF_FLAG_SHOW_PREFIX | PF_FLAG_SHOW_OCTAL_LETTER); } pfenv_print_int(&pfenv_vstr, arg_as_int(arg), 1, 8, 'a', flags, fill, width); break; diff --git a/py/pfenv.c b/py/pfenv.c index 0d6fab3c48..ab45328ad7 100644 --- a/py/pfenv.c +++ b/py/pfenv.c @@ -5,6 +5,8 @@ #include "mpconfig.h" #include "qstr.h" #include "obj.h" +#include "mpz.h" +#include "objint.h" #include "pfenv.h" #if MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_DOUBLE @@ -79,71 +81,88 @@ int pfenv_print_strn(const pfenv_t *pfenv, const char *str, unsigned int len, in // We can use 16 characters for 32-bit and 32 characters for 64-bit #define INT_BUF_SIZE (sizeof(machine_int_t) * 4) -int pfenv_print_int(const pfenv_t *pfenv, machine_uint_t x, int sgn, int base, int base_char, int flags, char fill, int width) { - char sign = 0; - if (sgn) { - if ((machine_int_t)x < 0) { - sign = '-'; - x = -x; - } else if (flags & PF_FLAG_SHOW_SIGN) { - sign = '+'; +int pfenv_print_int(const pfenv_t *pfenv, mp_obj_t x, int sgn, int base, int base_char, int flags, char fill, int width) { + if (!MP_OBJ_IS_INT(x)) { + // This will convert booleans to int, or raise an error for + // non-integer types. + x = MP_OBJ_NEW_SMALL_INT(mp_obj_get_int(x)); + } + + char prefix_buf[4]; + char *prefix = prefix_buf; + + if (mp_obj_int_is_positive(x)) { + if (flags & PF_FLAG_SHOW_SIGN) { + *prefix++ = '+'; } else if (flags & PF_FLAG_SPACE_SIGN) { - sign = ' '; + *prefix++ = ' '; } } - char buf[INT_BUF_SIZE]; - char *b = buf + INT_BUF_SIZE; - - if (x == 0) { - *(--b) = '0'; - } else { - do { - int c = x % base; - x /= base; - if (c >= 10) { - c += base_char - 10; - } else { - c += '0'; - } - *(--b) = c; - } while (b > buf && x != 0); - } - - char prefix_char = '\0'; - if (flags & PF_FLAG_SHOW_PREFIX) { if (base == 2) { - prefix_char = base_char + 'b' - 'a'; + *prefix++ = '0'; + *prefix++ = base_char + 'b' - 'a'; } else if (base == 8) { - prefix_char = base_char + 'o' - 'a'; + *prefix++ = '0'; + if (flags & PF_FLAG_SHOW_OCTAL_LETTER) { + *prefix++ = base_char + 'o' - 'a'; + } } else if (base == 16) { - prefix_char = base_char + 'x' - 'a'; + *prefix++ = '0'; + *prefix++ = base_char + 'x' - 'a'; } } + *prefix = '\0'; + int prefix_len = prefix - prefix_buf; + prefix = prefix_buf; + + char comma = '\0'; + if (flags & PF_FLAG_SHOW_COMMA) { + comma = ','; + } + + // The size of this buffer is rather arbitrary. If it's not large + // enough, a dynamic one will be allocated. + char stack_buf[sizeof(machine_int_t) * 4]; + char *buf = stack_buf; + int buf_size = sizeof(stack_buf); + int fmt_size = 0; + char *str; + + char sign = '\0'; + if (flags & PF_FLAG_PAD_AFTER_SIGN) { + // We add the pad in this function, so since the pad goes after + // the sign & prefix, we format without a prefix + str = mp_obj_int_formatted(&buf, &buf_size, &fmt_size, + x, base, NULL, base_char, comma); + if (*str == '-') { + sign = *str++; + fmt_size--; + } + } else { + str = mp_obj_int_formatted(&buf, &buf_size, &fmt_size, + x, base, prefix, base_char, comma); + } int len = 0; if (flags & PF_FLAG_PAD_AFTER_SIGN) { + // pad after sign implies pad after prefix as well. if (sign) { - len += pfenv_print_strn(pfenv, &sign, 1, flags, fill, 1); + len += pfenv_print_strn(pfenv, &sign, 1, 0, 0, 1); width--; } - if (prefix_char) { - len += pfenv_print_strn(pfenv, "0", 1, flags, fill, 1); - len += pfenv_print_strn(pfenv, &prefix_char, 1, flags, fill, 1); - width -= 2; - } - } else { - if (prefix_char && b > &buf[1]) { - *(--b) = prefix_char; - *(--b) = '0'; - } - if (sign && b > buf) { - *(--b) = sign; + if (prefix_len) { + len += pfenv_print_strn(pfenv, prefix, prefix_len, 0, 0, 1); + width -= prefix_len; } } - len += pfenv_print_strn(pfenv, b, buf + INT_BUF_SIZE - b, flags, fill, width); + len += pfenv_print_strn(pfenv, str, fmt_size, flags, fill, width); + + if (buf != stack_buf) { + m_free(buf, buf_size); + } return len; } diff --git a/py/pfenv.h b/py/pfenv.h index 36b452b91c..32ecc159ff 100644 --- a/py/pfenv.h +++ b/py/pfenv.h @@ -1,13 +1,14 @@ -#define PF_FLAG_LEFT_ADJUST (0x001) -#define PF_FLAG_SHOW_SIGN (0x002) -#define PF_FLAG_SPACE_SIGN (0x004) -#define PF_FLAG_NO_TRAILZ (0x008) -#define PF_FLAG_SHOW_PREFIX (0x010) -#define PF_FLAG_SHOW_COMMA (0x020) -#define PF_FLAG_PAD_AFTER_SIGN (0x040) -#define PF_FLAG_CENTER_ADJUST (0x080) -#define PF_FLAG_ADD_PERCENT (0x100) -#define PF_FLAG_PAD_NAN_INF (0x200) +#define PF_FLAG_LEFT_ADJUST (0x001) +#define PF_FLAG_SHOW_SIGN (0x002) +#define PF_FLAG_SPACE_SIGN (0x004) +#define PF_FLAG_NO_TRAILZ (0x008) +#define PF_FLAG_SHOW_PREFIX (0x010) +#define PF_FLAG_SHOW_COMMA (0x020) +#define PF_FLAG_PAD_AFTER_SIGN (0x040) +#define PF_FLAG_CENTER_ADJUST (0x080) +#define PF_FLAG_ADD_PERCENT (0x100) +#define PF_FLAG_PAD_NAN_INF (0x200) +#define PF_FLAG_SHOW_OCTAL_LETTER (0x400) typedef struct _pfenv_t { void *data; @@ -17,7 +18,7 @@ typedef struct _pfenv_t { void pfenv_vstr_add_strn(void *data, const char *str, unsigned int len); int pfenv_print_strn(const pfenv_t *pfenv, const char *str, unsigned int len, int flags, char fill, int width); -int pfenv_print_int(const pfenv_t *pfenv, machine_uint_t x, int sgn, int base, int base_char, int flags, char fill, int width); +int pfenv_print_int(const pfenv_t *pfenv, mp_obj_t x, int sgn, int base, int base_char, int flags, char fill, int width); #if MICROPY_ENABLE_FLOAT int pfenv_print_float(const pfenv_t *pfenv, mp_float_t f, char fmt, int flags, char fill, int width, int prec); #endif diff --git a/tests/basics/string-format-modulo.py b/tests/basics/string-format-modulo.py index b736e2a73f..8e58be18c8 100644 --- a/tests/basics/string-format-modulo.py +++ b/tests/basics/string-format-modulo.py @@ -53,6 +53,7 @@ print("%X" % 18) print("%X" % 18.0) print("%#x" % 18) print("%#X" % 18) +print("%#6o" % 18) print("%#6x" % 18) print("%#06x" % 18) print("%e" % 1.23456) diff --git a/tests/basics/string-format.py b/tests/basics/string-format.py index 8049c6f73b..2d6d0cc721 100644 --- a/tests/basics/string-format.py +++ b/tests/basics/string-format.py @@ -24,11 +24,19 @@ test("{:4o}", 123) test("{:4x}", 123) test("{:4X}", 123) +test("{:4,d}", 12345678) + test("{:#4b}", 10) test("{:#4o}", 123) test("{:#4x}", 123) test("{:#4X}", 123) +test("{:#4d}", 0) +test("{:#4b}", 0) +test("{:#4o}", 0) +test("{:#4x}", 0) +test("{:#4X}", 0) + test("{:<6s}", "ab") test("{:>6s}", "ab") test("{:^6s}", "ab")