py/parsenum: Improve parsing of floating point numbers.

This patch improves parsing of floating point numbers by converting all the
digits (integer and fractional) together into a number 1 or greater, and
then applying the correct power of 10 at the very end.  In particular the
multiple "multiply by 0.1" operations to build a fraction are now combined
together and applied at the same time as the exponent, at the very end.

This helps to retain precision during parsing of floats, and also includes
a check that the number doesn't overflow during the parsing.  One benefit
is that a float will have the same value no matter where the decimal point
is located, eg 1.23 == 123e-2.
This commit is contained in:
Damien George 2017-11-27 12:51:52 +11:00
parent f59c6b48ae
commit 84895f1a21
4 changed files with 60 additions and 6 deletions

View File

@ -170,6 +170,14 @@ typedef enum {
mp_obj_t mp_parse_num_decimal(const char *str, size_t len, bool allow_imag, bool force_complex, mp_lexer_t *lex) { mp_obj_t mp_parse_num_decimal(const char *str, size_t len, bool allow_imag, bool force_complex, mp_lexer_t *lex) {
#if MICROPY_PY_BUILTINS_FLOAT #if MICROPY_PY_BUILTINS_FLOAT
// DEC_VAL_MAX only needs to be rough and is used to retain precision while not overflowing
#if MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_FLOAT
#define DEC_VAL_MAX 1e20F
#elif MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_DOUBLE
#define DEC_VAL_MAX 1e200
#endif
const char *top = str + len; const char *top = str + len;
mp_float_t dec_val = 0; mp_float_t dec_val = 0;
bool dec_neg = false; bool dec_neg = false;
@ -214,8 +222,8 @@ mp_obj_t mp_parse_num_decimal(const char *str, size_t len, bool allow_imag, bool
// string should be a decimal number // string should be a decimal number
parse_dec_in_t in = PARSE_DEC_IN_INTG; parse_dec_in_t in = PARSE_DEC_IN_INTG;
bool exp_neg = false; bool exp_neg = false;
mp_float_t frac_mult = 0.1;
mp_int_t exp_val = 0; mp_int_t exp_val = 0;
mp_int_t exp_extra = 0;
while (str < top) { while (str < top) {
mp_uint_t dig = *str++; mp_uint_t dig = *str++;
if ('0' <= dig && dig <= '9') { if ('0' <= dig && dig <= '9') {
@ -223,11 +231,18 @@ mp_obj_t mp_parse_num_decimal(const char *str, size_t len, bool allow_imag, bool
if (in == PARSE_DEC_IN_EXP) { if (in == PARSE_DEC_IN_EXP) {
exp_val = 10 * exp_val + dig; exp_val = 10 * exp_val + dig;
} else { } else {
if (in == PARSE_DEC_IN_FRAC) { if (dec_val < DEC_VAL_MAX) {
dec_val += dig * frac_mult; // dec_val won't overflow so keep accumulating
frac_mult *= MICROPY_FLOAT_CONST(0.1);
} else {
dec_val = 10 * dec_val + dig; dec_val = 10 * dec_val + dig;
if (in == PARSE_DEC_IN_FRAC) {
--exp_extra;
}
} else {
// dec_val might overflow and we anyway can't represent more digits
// of precision, so ignore the digit and just adjust the exponent
if (in == PARSE_DEC_IN_INTG) {
++exp_extra;
}
} }
} }
} else if (in == PARSE_DEC_IN_INTG && dig == '.') { } else if (in == PARSE_DEC_IN_INTG && dig == '.') {
@ -261,7 +276,7 @@ mp_obj_t mp_parse_num_decimal(const char *str, size_t len, bool allow_imag, bool
} }
// apply the exponent // apply the exponent
dec_val *= MICROPY_FLOAT_C_FUN(pow)(10, exp_val); dec_val *= MICROPY_FLOAT_C_FUN(pow)(10, exp_val + exp_extra);
} }
// negate value if needed // negate value if needed

View File

@ -0,0 +1,22 @@
# test parsing of floats
inf = float('inf')
# it shouldn't matter where the decimal point is if the exponent balances the value
print(float('1234') - float('0.1234e4'))
print(float('1.015625') - float('1015625e-6'))
# very large integer part with a very negative exponent should cancel out
print(float('9' * 60 + 'e-60'))
print(float('9' * 60 + 'e-40'))
print(float('9' * 60 + 'e-20') == float('1e40'))
# many fractional digits
print(float('.' + '9' * 70))
print(float('.' + '9' * 70 + 'e20'))
print(float('.' + '9' * 70 + 'e-50') == float('1e-50'))
# tiny fraction with large exponent
print(float('.' + '0' * 60 + '1e10') == float('1e-51'))
print(float('.' + '0' * 60 + '9e25'))
print(float('.' + '0' * 60 + '9e40'))

View File

@ -0,0 +1,16 @@
# test parsing of floats, requiring double-precision
# very large integer part with a very negative exponent should cancel out
print(float('9' * 400 + 'e-100'))
print(float('9' * 400 + 'e-200'))
print(float('9' * 400 + 'e-400'))
# many fractional digits
print(float('.' + '9' * 400))
print(float('.' + '9' * 400 + 'e100'))
print(float('.' + '9' * 400 + 'e-100'))
# tiny fraction with large exponent
print(float('.' + '0' * 400 + '9e100'))
print(float('.' + '0' * 400 + '9e200'))
print(float('.' + '0' * 400 + '9e400'))

View File

@ -271,6 +271,7 @@ def run_tests(pyb, tests, args, base_path="."):
if upy_float_precision < 64: if upy_float_precision < 64:
skip_tests.add('float/float_divmod.py') # tested by float/float_divmod_relaxed.py instead skip_tests.add('float/float_divmod.py') # tested by float/float_divmod_relaxed.py instead
skip_tests.add('float/float2int_doubleprec_intbig.py') skip_tests.add('float/float2int_doubleprec_intbig.py')
skip_tests.add('float/float_parse_doubleprec.py')
if not has_complex: if not has_complex:
skip_tests.add('float/complex1.py') skip_tests.add('float/complex1.py')