Merge pull request #6722 from dhalbert/micropython-float-print-fix

py/formatfloat: Format all whole-number floats exactly.
This commit is contained in:
Scott Shawcroft 2022-08-10 09:32:28 -07:00 committed by GitHub
commit 741a5c2bec
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 154 additions and 55 deletions

View File

@ -25,6 +25,7 @@
*/ */
#include "py/mpconfig.h" #include "py/mpconfig.h"
#include "py/misc.h"
#if MICROPY_FLOAT_IMPL != MICROPY_FLOAT_IMPL_NONE #if MICROPY_FLOAT_IMPL != MICROPY_FLOAT_IMPL_NONE
#include <assert.h> #include <assert.h>
@ -96,7 +97,16 @@ static inline int fp_isless1(float x) {
#define fp_iszero(x) (x == 0) #define fp_iszero(x) (x == 0)
#define fp_isless1(x) (x < 1.0) #define fp_isless1(x) (x < 1.0)
#endif #endif // MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_FLOAT/DOUBLE
static inline int fp_ge_eps(FPTYPE x, FPTYPE y) {
mp_float_union_t fb_y = {y};
// Back off 2 eps.
// This is valid for almost all values, but in practice
// it's only used when y = 1eX for X>=0.
fb_y.i -= 2;
return x >= fb_y.f;
}
static const FPTYPE g_pos_pow[] = { static const FPTYPE g_pos_pow[] = {
#if FPDECEXP > 32 #if FPDECEXP > 32
@ -173,6 +183,7 @@ int mp_format_float(FPTYPE f, char *buf, size_t buf_size, char fmt, int prec, ch
int num_digits = 0; int num_digits = 0;
const FPTYPE *pos_pow = g_pos_pow; const FPTYPE *pos_pow = g_pos_pow;
const FPTYPE *neg_pow = g_neg_pow; const FPTYPE *neg_pow = g_neg_pow;
int signed_e = 0;
if (fp_iszero(f)) { if (fp_iszero(f)) {
e = 0; e = 0;
@ -192,31 +203,24 @@ int mp_format_float(FPTYPE f, char *buf, size_t buf_size, char fmt, int prec, ch
} }
} }
} else if (fp_isless1(f)) { } else if (fp_isless1(f)) {
// We need to figure out what an integer digit will be used FPTYPE f_mod = f;
// in case 'f' is used (or we revert other format to it below).
// As we just tested number to be <1, this is obviously 0,
// but we can round it up to 1 below.
char first_dig = '0';
if (f >= FPROUND_TO_ONE) {
first_dig = '1';
}
// Build negative exponent // Build negative exponent
for (e = 0, e1 = FPDECEXP; e1; e1 >>= 1, pos_pow++, neg_pow++) { for (e = 0, e1 = FPDECEXP; e1; e1 >>= 1, pos_pow++, neg_pow++) {
if (*neg_pow > f) { if (*neg_pow > f_mod) {
e += e1; e += e1;
f *= *pos_pow; f_mod *= *pos_pow;
} }
} }
char e_sign_char = '-'; char e_sign_char = '-';
if (fp_isless1(f) && f >= FPROUND_TO_ONE) { if (fp_isless1(f_mod) && f_mod >= FPROUND_TO_ONE) {
f = FPCONST(1.0); f_mod = FPCONST(1.0);
if (e == 0) { if (e == 0) {
e_sign_char = '+'; e_sign_char = '+';
} }
} else if (fp_isless1(f)) { } else if (fp_isless1(f_mod)) {
e++; e++;
f *= FPCONST(10.0); f_mod *= FPCONST(10.0);
} }
// If the user specified 'g' format, and e is <= 4, then we'll switch // If the user specified 'g' format, and e is <= 4, then we'll switch
@ -224,8 +228,7 @@ int mp_format_float(FPTYPE f, char *buf, size_t buf_size, char fmt, int prec, ch
if (fmt == 'f' || (fmt == 'g' && e <= 4)) { if (fmt == 'f' || (fmt == 'g' && e <= 4)) {
fmt = 'f'; fmt = 'f';
dec = -1; dec = 0;
*s++ = first_dig;
if (org_fmt == 'g') { if (org_fmt == 'g') {
prec += (e - 1); prec += (e - 1);
@ -237,13 +240,8 @@ int mp_format_float(FPTYPE f, char *buf, size_t buf_size, char fmt, int prec, ch
} }
num_digits = prec; num_digits = prec;
if (num_digits) { signed_e = 0;
*s++ = '.'; ++num_digits;
while (--e && num_digits) {
*s++ = '0';
num_digits--;
}
}
} else { } else {
// For e & g formats, we'll be printing the exponent, so set the // For e & g formats, we'll be printing the exponent, so set the
// sign. // sign.
@ -256,22 +254,29 @@ int mp_format_float(FPTYPE f, char *buf, size_t buf_size, char fmt, int prec, ch
prec++; prec++;
} }
} }
signed_e = -e;
} }
} else { } else {
// Build positive exponent // Build positive exponent.
for (e = 0, e1 = FPDECEXP; e1; e1 >>= 1, pos_pow++, neg_pow++) { // We don't modify f at this point to avoid innaccuracies from
if (*pos_pow <= f) { // scaling it. Instead, we find the product of powers of 10
// that is not greater than it, and use that to start the
// mantissa.
FPTYPE u_base = FPCONST(1.0);
for (e = 0, e1 = FPDECEXP; e1; e1 >>= 1, pos_pow++) {
FPTYPE next_u = u_base * *pos_pow;
// fp_ge_eps performs "f >= (next_u - 2eps)" so that if, for
// numerical reasons, f is very close to a power of ten but
// not strictly equal, we still treat it as that power of 10.
// The comparison was failing for maybe 10% of 1eX values, but
// although rounding fixed many of them, there were still some
// rendering as 9.99999998e(X-1).
if (fp_ge_eps(f, next_u)) {
u_base = next_u;
e += e1; e += e1;
f *= *neg_pow;
} }
} }
// It can be that f was right on the edge of an entry in pos_pow needs to be reduced
if ((int)f >= 10) {
e += 1;
f *= FPCONST(0.1);
}
// If the user specified fixed format (fmt == 'f') and e makes the // If the user specified fixed format (fmt == 'f') and e makes the
// number too big to fit into the available buffer, then we'll // number too big to fit into the available buffer, then we'll
// switch to the 'e' format. // switch to the 'e' format.
@ -310,15 +315,15 @@ int mp_format_float(FPTYPE f, char *buf, size_t buf_size, char fmt, int prec, ch
} else { } else {
e_sign = '+'; e_sign = '+';
} }
signed_e = e;
} }
if (prec < 0) { if (prec < 0) {
// This can happen when the prec is trimmed to prevent buffer overflow // This can happen when the prec is trimmed to prevent buffer overflow
prec = 0; prec = 0;
} }
// We now have num.f as a floating point number between >= 1 and < 10 // At this point e contains the absolute value of the power of 10 exponent.
// (or equal to zero), and e contains the absolute value of the power of // (dec + 1) == the number of dgits before the decimal.
// 10 exponent. and (dec + 1) == the number of dgits before the decimal.
// For e, prec is # digits after the decimal // For e, prec is # digits after the decimal
// For f, prec is # digits after the decimal // For f, prec is # digits after the decimal
@ -336,25 +341,63 @@ int mp_format_float(FPTYPE f, char *buf, size_t buf_size, char fmt, int prec, ch
num_digits = prec; num_digits = prec;
} }
// Print the digits of the mantissa if (signed_e < 0) {
for (int i = 0; i < num_digits; ++i, --dec) { // The algorithm below treats numbers smaller than 1 by scaling them
int32_t d = (int32_t)f; // repeatedly by 10 to bring the new digit to the top. Our input number
if (d < 0) { // was smaller than 1, so scale it up to be 1 <= f < 10.
*s++ = '0'; FPTYPE u_base = FPCONST(1.0);
} else { const FPTYPE *pow_u = g_pos_pow;
*s++ = '0' + d; for (int m = FPDECEXP; m; m >>= 1, pow_u++) {
if (m & e) {
u_base *= *pow_u;
} }
}
f *= u_base;
}
int d = 0;
int num_digits_left = num_digits;
for (int digit_index = signed_e; num_digits_left >= 0; --digit_index) {
FPTYPE u_base = FPCONST(1.0);
if (digit_index > 0) {
// Generate 10^digit_index for positive digit_index.
const FPTYPE *pow_u = g_pos_pow;
int target_index = digit_index;
for (int m = FPDECEXP; m; m >>= 1, pow_u++) {
if (m & target_index) {
u_base *= *pow_u;
}
}
}
for (d = 0; d < 9; ++d) {
// This is essentially "if (f < u_base)", but with 2eps margin
// so that if f is just a tiny bit smaller, we treat it as
// equal (and accept the additional digit value).
if (!fp_ge_eps(f, u_base)) {
break;
}
f -= u_base;
}
// We calculate one more digit than we display, to use in rounding
// below. So only emit the digit if it's one that we display.
if (num_digits_left > 0) {
// Emit this number (the leading digit).
*s++ = '0' + d;
if (dec == 0 && prec > 0) { if (dec == 0 && prec > 0) {
*s++ = '.'; *s++ = '.';
} }
f -= (FPTYPE)d; }
--dec;
--num_digits_left;
if (digit_index <= 0) {
// Once we get below 1.0, we scale up f instead of calculting
// negative powers of 10 in u_base. This provides better
// renditions of exact decimals like 1/16 etc.
f *= FPCONST(10.0); f *= FPCONST(10.0);
} }
}
// Round // Rounding. If the next digit to print is >= 5, round up.
// If we print non-exponential format (i.e. 'f'), but a digit we're going if (d >= 5) {
// to round by (e) is too far away, then there's nothing to round.
if ((org_fmt != 'f' || e <= num_digits) && f >= FPCONST(5.0)) {
char *rs = s; char *rs = s;
rs--; rs--;
while (1) { while (1) {
@ -394,8 +437,11 @@ int mp_format_float(FPTYPE f, char *buf, size_t buf_size, char fmt, int prec, ch
} }
} else { } else {
// Need at extra digit at the end to make room for the leading '1' // Need at extra digit at the end to make room for the leading '1'
// but if we're at the buffer size limit, just drop the final digit.
if ((size_t)(s + 1 - buf) < buf_size) {
s++; s++;
} }
}
char *ss = s; char *ss = s;
while (ss > rs) { while (ss > rs) {
*ss = ss[-1]; *ss = ss[-1];

View File

@ -0,0 +1,4 @@
# check a case where rounding was suppressed inappropriately when "f" was
# promoted to "e" for large numbers.
v = 8.888e32
print("%.2f" % v) # '%.2f' format with e32 becomes '%.2e', expect 8.89e+32.

View File

@ -0,0 +1 @@
8.89e+32

View File

@ -0,0 +1,31 @@
# Test that integers format to exact values.
for b in [13, 123, 457, 23456]:
for r in range(1, 10):
e_fmt = "{:." + str(r) + "e}"
f_fmt = "{:." + str(r) + "f}"
g_fmt = "{:." + str(r) + "g}"
for e in range(0, 5):
f = b * (10**e)
title = str(b) + " x 10^" + str(e)
print(title, "with format", e_fmt, "gives", e_fmt.format(f))
print(title, "with format", f_fmt, "gives", f_fmt.format(f))
print(title, "with format", g_fmt, "gives", g_fmt.format(f))
# Check that powers of 10 (that fit in float32) format correctly.
for i in range(31):
# It works to 12 digits on all platforms *except* qemu-arm, where
# 10^11 comes out as 10000000820 or something.
print("{:.7g}".format(float("1e" + str(i))))
# 16777215 is 2^24 - 1, the largest integer that can be completely held
# in a float32.
print("{:f}".format(16777215))
# 4294967040 = 16777215 * 128 is the largest integer that is exactly
# represented by a float32 and that will also fit within a (signed) int32.
# The upper bound of our integer-handling code is actually double this,
# but that constant might cause trouble on systems using 32 bit ints.
print("{:f}".format(2147483520))
# Very large positive integers can be a test for precision and resolution.
# This is a weird way to represent 1e38 (largest power of 10 for float32).
print("{:.6e}".format(float("9" * 30 + "e8")))

View File

@ -0,0 +1,15 @@
# Test formatting of very large ints.
# Relies on double-precision floats.
import array
import sys
# Challenging way to express 1e200 and 1e100.
print("{:.12e}".format(float("9" * 400 + "e-200")))
print("{:.12e}".format(float("9" * 400 + "e-300")))
# These correspond to the binary representation of 1e200 in float64s:
v1 = 0x54B249AD2594C37D # 1e100
v2 = 0x6974E718D7D7625A # 1e200
print("{:.12e}".format(array.array("d", v1.to_bytes(8, sys.byteorder))[0]))
print("{:.12e}".format(array.array("d", v2.to_bytes(8, sys.byteorder))[0]))

View File

@ -426,6 +426,7 @@ def run_tests(pyb, tests, args, result_dir, num_threads=1):
if upy_float_precision < 64: if upy_float_precision < 64:
skip_tests.add("float/float_divmod.py") # tested by float/float_divmod_relaxed.py instead skip_tests.add("float/float_divmod.py") # tested by float/float_divmod_relaxed.py instead
skip_tests.add("float/float2int_doubleprec_intbig.py") skip_tests.add("float/float2int_doubleprec_intbig.py")
skip_tests.add("float/float_format_ints_doubleprec.py")
skip_tests.add("float/float_parse_doubleprec.py") skip_tests.add("float/float_parse_doubleprec.py")
if not has_complex: if not has_complex:

View File

@ -100,6 +100,7 @@ exclude_tests = (
"float/float_divmod.py", "float/float_divmod.py",
# requires double precision floating point to work # requires double precision floating point to work
"float/float2int_doubleprec_intbig.py", "float/float2int_doubleprec_intbig.py",
"float/float_format_ints_doubleprec.py",
"float/float_parse_doubleprec.py", "float/float_parse_doubleprec.py",
# inline asm FP tests (require Cortex-M4) # inline asm FP tests (require Cortex-M4)
"inlineasm/asmfpaddsub.py", "inlineasm/asmfpaddsub.py",