From 20773971186151b53426bd607908546598036a16 Mon Sep 17 00:00:00 2001 From: Damien George Date: Sat, 22 Feb 2014 18:12:43 +0000 Subject: [PATCH] py: Put number parsing code together in parsenum.c. --- py/objfloat.c | 9 ++- py/objint.c | 6 +- py/parse.h | 2 +- py/parsenum.c | 169 ++++++++++++++++++++++++++++++++++++++++++++++++++ py/parsenum.h | 2 + py/py.mk | 2 +- py/runtime.c | 68 ++------------------ py/strtonum.c | 98 ----------------------------- py/strtonum.h | 1 - 9 files changed, 187 insertions(+), 170 deletions(-) create mode 100644 py/parsenum.c create mode 100644 py/parsenum.h delete mode 100644 py/strtonum.c delete mode 100644 py/strtonum.h diff --git a/py/objfloat.c b/py/objfloat.c index 9d7b796895..268bc2bde5 100644 --- a/py/objfloat.c +++ b/py/objfloat.c @@ -8,6 +8,7 @@ #include "mpconfig.h" #include "qstr.h" #include "obj.h" +#include "parsenum.h" #include "runtime0.h" #if MICROPY_ENABLE_FLOAT @@ -32,8 +33,12 @@ STATIC mp_obj_t float_make_new(mp_obj_t type_in, uint n_args, uint n_kw, const m return mp_obj_new_float(0); case 1: - // TODO allow string as arg and parse it - if (MP_OBJ_IS_TYPE(args[0], &float_type)) { + if (MP_OBJ_IS_STR(args[0])) { + // a string, parse it + uint l; + const char *s = mp_obj_str_get_data(args[0], &l); + return mp_parse_num_decimal(s, l); + } else if (MP_OBJ_IS_TYPE(args[0], &float_type)) { return args[0]; } else { return mp_obj_new_float(mp_obj_get_float(args[0])); diff --git a/py/objint.c b/py/objint.c index 82bab9ea18..0caaab649b 100644 --- a/py/objint.c +++ b/py/objint.c @@ -5,10 +5,10 @@ #include "nlr.h" #include "misc.h" -#include "strtonum.h" #include "mpconfig.h" #include "qstr.h" #include "obj.h" +#include "parsenum.h" #include "objint.h" // This dispatcher function is expected to be independent of the implementation @@ -25,7 +25,7 @@ STATIC mp_obj_t int_make_new(mp_obj_t type_in, uint n_args, uint n_kw, const mp_ // a string, parse it uint l; const char *s = mp_obj_str_get_data(args[0], &l); - return MP_OBJ_NEW_SMALL_INT(mp_strtonum(s, 0)); + return mp_parse_num_integer(s, l, 0); } else { return MP_OBJ_NEW_SMALL_INT(mp_obj_get_int(args[0])); } @@ -36,7 +36,7 @@ STATIC mp_obj_t int_make_new(mp_obj_t type_in, uint n_args, uint n_kw, const mp_ // TODO proper error checking of argument types uint l; const char *s = mp_obj_str_get_data(args[0], &l); - return MP_OBJ_NEW_SMALL_INT(mp_strtonum(s, mp_obj_get_int(args[1]))); + return mp_parse_num_integer(s, l, mp_obj_get_int(args[1])); } default: diff --git a/py/parse.h b/py/parse.h index 6492f4d9e0..135de47d13 100644 --- a/py/parse.h +++ b/py/parse.h @@ -8,7 +8,7 @@ struct _mp_lexer_t; // - xx...x00110: an integer; bits 5 and above are the qstr holding the value // - xx...x01010: a decimal; bits 5 and above are the qstr holding the value // - xx...x01110: a string; bits 5 and above are the qstr holding the value -// - xx...x10010: a string with triple quotes; bits 5 and above are the qstr holding the value +// - xx...x10010: a string of bytes; bits 5 and above are the qstr holding the value // - xx...x10110: a token; bits 5 and above are mp_token_kind_t // TODO: these can now be unified with MP_OBJ_FITS_SMALL_INT(x) diff --git a/py/parsenum.c b/py/parsenum.c new file mode 100644 index 0000000000..64594cd1b4 --- /dev/null +++ b/py/parsenum.c @@ -0,0 +1,169 @@ +#include + +#include "misc.h" +#include "mpconfig.h" +#include "qstr.h" +#include "nlr.h" +#include "obj.h" +#include "parsenum.h" + +#if defined(UNIX) + +#include +#include + +mp_obj_t mp_parse_num_integer(const char *restrict str, uint len, int base) { + // TODO at the moment we ignore len; we should honour it! + // TODO detect integer overflow and return bignum + + int c, neg = 0; + const char *p = str; + char *num; + long found; + + // check radix base + if ((base != 0 && base < 2) || base > 36) { + nlr_jump(mp_obj_new_exception_msg(&mp_type_ValueError, "ValueError: int() arg 2 must be >=2 and <= 36")); + } + // skip surrounded whitespace + while (isspace((c = *(p++)))); + if (c == 0) { + goto value_error; + } + // preced sign + if (c == '+' || c == '-') { + neg = - (c == '-'); + c = *(p++); + } + + // find real radix base, and strip preced '0x', '0o' and '0b' + // TODO somehow merge with similar code in parse.c + if ((base == 0 || base == 16) && c == '0') { + c = *(p++); + if ((c | 32) == 'x') { + base = 16; + } else if (base == 0 && (c | 32) == 'o') { + base = 8; + } else if (base == 0 && (c | 32) == 'b') { + base = 2; + } else { + base = 10; + p -= 2; + } + } else if (base == 8 && c == '0') { + c = *(p++); + if ((c | 32) != 'o') { + p -= 2; + } + } else if (base == 2 && c == '0') { + c = *(p++); + if ((c | 32) != 'b') { + p -= 2; + } + } else { + if (base == 0) base = 10; + p--; + } + + errno = 0; + found = strtol(p, &num, base); + if (errno) { + goto value_error; + } else if (found && *(num) == 0) { + goto done; + } else if (found || num != p) { + goto check_tail_space; + } else { + goto value_error; + } + +check_tail_space: + if (*(num) != 0) { + while (isspace((c = *(num++)))); + if (c != 0) { + goto value_error; + } + } + +done: + return MP_OBJ_NEW_SMALL_INT((found ^ neg) - neg); + +value_error: + nlr_jump(mp_obj_new_exception_msg_varg(&mp_type_ValueError, "invalid literal for int() with base %d: '%s'", base, str)); +} + +#else /* defined(UNIX) */ + +mp_obj_t mp_parse_num_integer(const char *restrict str, uint len, int base) { + // TODO port strtol to stm + return MP_OBJ_NEW_SMALL_INT(0); +} + +#endif /* defined(UNIX) */ + +#define PARSE_DEC_IN_INTG (1) +#define PARSE_DEC_IN_FRAC (2) +#define PARSE_DEC_IN_EXP (3) + +mp_obj_t mp_parse_num_decimal(const char *str, uint len) { +#if MICROPY_ENABLE_FLOAT + int in = PARSE_DEC_IN_INTG; + mp_float_t dec_val = 0; + bool exp_neg = false; + int exp_val = 0; + int exp_extra = 0; + bool imag = false; + const char *top = str + len; + for (; str < top; str++) { + int dig = *str; + if ('0' <= dig && dig <= '9') { + dig -= '0'; + if (in == PARSE_DEC_IN_EXP) { + exp_val = 10 * exp_val + dig; + } else { + dec_val = 10 * dec_val + dig; + if (in == PARSE_DEC_IN_FRAC) { + exp_extra -= 1; + } + } + } else if (in == PARSE_DEC_IN_INTG && dig == '.') { + in = PARSE_DEC_IN_FRAC; + } else if (in != PARSE_DEC_IN_EXP && (dig == 'E' || dig == 'e')) { + in = PARSE_DEC_IN_EXP; + if (str[1] == '+') { + str++; + } else if (str[1] == '-') { + str++; + exp_neg = true; + } + } else if (dig == 'J' || dig == 'j') { + str++; + imag = true; + break; + } else { + // unknown character + break; + } + } + if (*str != 0) { + nlr_jump(mp_obj_new_exception_msg(&mp_type_SyntaxError, "invalid syntax for number")); + } + if (exp_neg) { + exp_val = -exp_val; + } + exp_val += exp_extra; + for (; exp_val > 0; exp_val--) { + dec_val *= 10; + } + for (; exp_val < 0; exp_val++) { + dec_val *= 0.1; + } + if (imag) { + return mp_obj_new_complex(0, dec_val); + } else { + return mp_obj_new_float(dec_val); + } +#else + nlr_jump(mp_obj_new_exception_msg(&mp_type_SyntaxError, "decimal numbers not supported")); +#endif +} diff --git a/py/parsenum.h b/py/parsenum.h new file mode 100644 index 0000000000..5a2e42da50 --- /dev/null +++ b/py/parsenum.h @@ -0,0 +1,2 @@ +mp_obj_t mp_parse_num_integer(const char *restrict str, uint len, int base); +mp_obj_t mp_parse_num_decimal(const char *str, uint len); diff --git a/py/py.mk b/py/py.mk index cbb63b4bfc..199c3aadc8 100644 --- a/py/py.mk +++ b/py/py.mk @@ -31,9 +31,9 @@ PY_O_BASENAME = \ asmthumb.o \ emitnthumb.o \ emitinlinethumb.o \ + parsenum.o \ runtime.o \ map.o \ - strtonum.o \ obj.o \ objarray.o \ objbool.o \ diff --git a/py/runtime.c b/py/runtime.c index b08ae3d4e7..20bfa0b20f 100644 --- a/py/runtime.c +++ b/py/runtime.c @@ -13,6 +13,7 @@ #include "mpconfig.h" #include "qstr.h" #include "obj.h" +#include "parsenum.h" #include "runtime0.h" #include "runtime.h" #include "map.h" @@ -371,72 +372,11 @@ mp_obj_t rt_list_append(mp_obj_t self_in, mp_obj_t arg) { return mp_obj_list_append(self_in, arg); } -#define PARSE_DEC_IN_INTG (1) -#define PARSE_DEC_IN_FRAC (2) -#define PARSE_DEC_IN_EXP (3) - mp_obj_t rt_load_const_dec(qstr qstr) { -#if MICROPY_ENABLE_FLOAT DEBUG_OP_printf("load '%s'\n", qstr_str(qstr)); - const char *s = qstr_str(qstr); - int in = PARSE_DEC_IN_INTG; - mp_float_t dec_val = 0; - bool exp_neg = false; - int exp_val = 0; - int exp_extra = 0; - bool imag = false; - for (; *s; s++) { - int dig = *s; - if ('0' <= dig && dig <= '9') { - dig -= '0'; - if (in == PARSE_DEC_IN_EXP) { - exp_val = 10 * exp_val + dig; - } else { - dec_val = 10 * dec_val + dig; - if (in == PARSE_DEC_IN_FRAC) { - exp_extra -= 1; - } - } - } else if (in == PARSE_DEC_IN_INTG && dig == '.') { - in = PARSE_DEC_IN_FRAC; - } else if (in != PARSE_DEC_IN_EXP && (dig == 'E' || dig == 'e')) { - in = PARSE_DEC_IN_EXP; - if (s[1] == '+') { - s++; - } else if (s[1] == '-') { - s++; - exp_neg = true; - } - } else if (dig == 'J' || dig == 'j') { - s++; - imag = true; - break; - } else { - // unknown character - break; - } - } - if (*s != 0) { - nlr_jump(mp_obj_new_exception_msg(&mp_type_SyntaxError, "invalid syntax for number")); - } - if (exp_neg) { - exp_val = -exp_val; - } - exp_val += exp_extra; - for (; exp_val > 0; exp_val--) { - dec_val *= 10; - } - for (; exp_val < 0; exp_val++) { - dec_val *= 0.1; - } - if (imag) { - return mp_obj_new_complex(0, dec_val); - } else { - return mp_obj_new_float(dec_val); - } -#else - nlr_jump(mp_obj_new_exception_msg(&mp_type_SyntaxError, "decimal numbers not supported")); -#endif + uint len; + const byte* data = qstr_data(qstr, &len); + return mp_parse_num_decimal((const char*)data, len); } mp_obj_t rt_load_const_str(qstr qstr) { diff --git a/py/strtonum.c b/py/strtonum.c deleted file mode 100644 index 4a62756512..0000000000 --- a/py/strtonum.c +++ /dev/null @@ -1,98 +0,0 @@ -#if defined(UNIX) - -#include -#include -#include - -#include "misc.h" -#include "strtonum.h" -#include "mpconfig.h" -#include "qstr.h" -#include "nlr.h" -#include "obj.h" - -long mp_strtonum(const char *restrict s, int base) { - int c, neg = 0; - const char *p = s; - char *num; - long found; - - // check radix base - if ((base != 0 && base < 2) || base > 36) { - nlr_jump(mp_obj_new_exception_msg(&mp_type_ValueError, "ValueError: int() arg 2 must be >=2 and <= 36")); - } - // skip surrounded whitespace - while (isspace((c = *(p++)))); - if (c == 0) { - goto value_error; - } - // preced sign - if (c == '+' || c == '-') { - neg = - (c == '-'); - c = *(p++); - } - - // find real radix base, and strip preced '0x', '0o' and '0b' - // TODO somehow merge with similar code in parse.c - if ((base == 0 || base == 16) && c == '0') { - c = *(p++); - if ((c | 32) == 'x') { - base = 16; - } else if (base == 0 && (c | 32) == 'o') { - base = 8; - } else if (base == 0 && (c | 32) == 'b') { - base = 2; - } else { - base = 10; - p -= 2; - } - } else if (base == 8 && c == '0') { - c = *(p++); - if ((c | 32) != 'o') { - p -= 2; - } - } else if (base == 2 && c == '0') { - c = *(p++); - if ((c | 32) != 'b') { - p -= 2; - } - } else { - if (base == 0) base = 10; - p--; - } - - errno = 0; - found = strtol(p, &num, base); - if (errno) { - goto value_error; - } else if (found && *(num) == 0) { - goto done; - } else if (found || num != p) { - goto check_tail_space; - } else { - goto value_error; - } - -check_tail_space: - if (*(num) != 0) { - while (isspace((c = *(num++)))); - if (c != 0) { - goto value_error; - } - } - -done: - return (found ^ neg) - neg; - -value_error: - nlr_jump(mp_obj_new_exception_msg_varg(&mp_type_ValueError, "invalid literal for int() with base %d: '%s'", base, s)); -} - -#else /* defined(UNIX) */ - -long mp_strtonum(const char *restrict s, int base) { - // TODO port strtol to stm - return 0; -} - -#endif /* defined(UNIX) */ diff --git a/py/strtonum.h b/py/strtonum.h deleted file mode 100644 index 10b6732edb..0000000000 --- a/py/strtonum.h +++ /dev/null @@ -1 +0,0 @@ -long mp_strtonum(const char *restrict s, int base);