py: Put number parsing code together in parsenum.c.

This commit is contained in:
Damien George 2014-02-22 18:12:43 +00:00
parent 2613ffde43
commit 2077397118
9 changed files with 187 additions and 170 deletions

View File

@ -8,6 +8,7 @@
#include "mpconfig.h"
#include "qstr.h"
#include "obj.h"
#include "parsenum.h"
#include "runtime0.h"
#if MICROPY_ENABLE_FLOAT
@ -32,8 +33,12 @@ STATIC mp_obj_t float_make_new(mp_obj_t type_in, uint n_args, uint n_kw, const m
return mp_obj_new_float(0);
case 1:
// TODO allow string as arg and parse it
if (MP_OBJ_IS_TYPE(args[0], &float_type)) {
if (MP_OBJ_IS_STR(args[0])) {
// a string, parse it
uint l;
const char *s = mp_obj_str_get_data(args[0], &l);
return mp_parse_num_decimal(s, l);
} else if (MP_OBJ_IS_TYPE(args[0], &float_type)) {
return args[0];
} else {
return mp_obj_new_float(mp_obj_get_float(args[0]));

View File

@ -5,10 +5,10 @@
#include "nlr.h"
#include "misc.h"
#include "strtonum.h"
#include "mpconfig.h"
#include "qstr.h"
#include "obj.h"
#include "parsenum.h"
#include "objint.h"
// This dispatcher function is expected to be independent of the implementation
@ -25,7 +25,7 @@ STATIC mp_obj_t int_make_new(mp_obj_t type_in, uint n_args, uint n_kw, const mp_
// a string, parse it
uint l;
const char *s = mp_obj_str_get_data(args[0], &l);
return MP_OBJ_NEW_SMALL_INT(mp_strtonum(s, 0));
return mp_parse_num_integer(s, l, 0);
} else {
return MP_OBJ_NEW_SMALL_INT(mp_obj_get_int(args[0]));
}
@ -36,7 +36,7 @@ STATIC mp_obj_t int_make_new(mp_obj_t type_in, uint n_args, uint n_kw, const mp_
// TODO proper error checking of argument types
uint l;
const char *s = mp_obj_str_get_data(args[0], &l);
return MP_OBJ_NEW_SMALL_INT(mp_strtonum(s, mp_obj_get_int(args[1])));
return mp_parse_num_integer(s, l, mp_obj_get_int(args[1]));
}
default:

View File

@ -8,7 +8,7 @@ struct _mp_lexer_t;
// - xx...x00110: an integer; bits 5 and above are the qstr holding the value
// - xx...x01010: a decimal; bits 5 and above are the qstr holding the value
// - xx...x01110: a string; bits 5 and above are the qstr holding the value
// - xx...x10010: a string with triple quotes; bits 5 and above are the qstr holding the value
// - xx...x10010: a string of bytes; bits 5 and above are the qstr holding the value
// - xx...x10110: a token; bits 5 and above are mp_token_kind_t
// TODO: these can now be unified with MP_OBJ_FITS_SMALL_INT(x)

169
py/parsenum.c Normal file
View File

@ -0,0 +1,169 @@
#include <stdlib.h>
#include "misc.h"
#include "mpconfig.h"
#include "qstr.h"
#include "nlr.h"
#include "obj.h"
#include "parsenum.h"
#if defined(UNIX)
#include <ctype.h>
#include <errno.h>
mp_obj_t mp_parse_num_integer(const char *restrict str, uint len, int base) {
// TODO at the moment we ignore len; we should honour it!
// TODO detect integer overflow and return bignum
int c, neg = 0;
const char *p = str;
char *num;
long found;
// check radix base
if ((base != 0 && base < 2) || base > 36) {
nlr_jump(mp_obj_new_exception_msg(&mp_type_ValueError, "ValueError: int() arg 2 must be >=2 and <= 36"));
}
// skip surrounded whitespace
while (isspace((c = *(p++))));
if (c == 0) {
goto value_error;
}
// preced sign
if (c == '+' || c == '-') {
neg = - (c == '-');
c = *(p++);
}
// find real radix base, and strip preced '0x', '0o' and '0b'
// TODO somehow merge with similar code in parse.c
if ((base == 0 || base == 16) && c == '0') {
c = *(p++);
if ((c | 32) == 'x') {
base = 16;
} else if (base == 0 && (c | 32) == 'o') {
base = 8;
} else if (base == 0 && (c | 32) == 'b') {
base = 2;
} else {
base = 10;
p -= 2;
}
} else if (base == 8 && c == '0') {
c = *(p++);
if ((c | 32) != 'o') {
p -= 2;
}
} else if (base == 2 && c == '0') {
c = *(p++);
if ((c | 32) != 'b') {
p -= 2;
}
} else {
if (base == 0) base = 10;
p--;
}
errno = 0;
found = strtol(p, &num, base);
if (errno) {
goto value_error;
} else if (found && *(num) == 0) {
goto done;
} else if (found || num != p) {
goto check_tail_space;
} else {
goto value_error;
}
check_tail_space:
if (*(num) != 0) {
while (isspace((c = *(num++))));
if (c != 0) {
goto value_error;
}
}
done:
return MP_OBJ_NEW_SMALL_INT((found ^ neg) - neg);
value_error:
nlr_jump(mp_obj_new_exception_msg_varg(&mp_type_ValueError, "invalid literal for int() with base %d: '%s'", base, str));
}
#else /* defined(UNIX) */
mp_obj_t mp_parse_num_integer(const char *restrict str, uint len, int base) {
// TODO port strtol to stm
return MP_OBJ_NEW_SMALL_INT(0);
}
#endif /* defined(UNIX) */
#define PARSE_DEC_IN_INTG (1)
#define PARSE_DEC_IN_FRAC (2)
#define PARSE_DEC_IN_EXP (3)
mp_obj_t mp_parse_num_decimal(const char *str, uint len) {
#if MICROPY_ENABLE_FLOAT
int in = PARSE_DEC_IN_INTG;
mp_float_t dec_val = 0;
bool exp_neg = false;
int exp_val = 0;
int exp_extra = 0;
bool imag = false;
const char *top = str + len;
for (; str < top; str++) {
int dig = *str;
if ('0' <= dig && dig <= '9') {
dig -= '0';
if (in == PARSE_DEC_IN_EXP) {
exp_val = 10 * exp_val + dig;
} else {
dec_val = 10 * dec_val + dig;
if (in == PARSE_DEC_IN_FRAC) {
exp_extra -= 1;
}
}
} else if (in == PARSE_DEC_IN_INTG && dig == '.') {
in = PARSE_DEC_IN_FRAC;
} else if (in != PARSE_DEC_IN_EXP && (dig == 'E' || dig == 'e')) {
in = PARSE_DEC_IN_EXP;
if (str[1] == '+') {
str++;
} else if (str[1] == '-') {
str++;
exp_neg = true;
}
} else if (dig == 'J' || dig == 'j') {
str++;
imag = true;
break;
} else {
// unknown character
break;
}
}
if (*str != 0) {
nlr_jump(mp_obj_new_exception_msg(&mp_type_SyntaxError, "invalid syntax for number"));
}
if (exp_neg) {
exp_val = -exp_val;
}
exp_val += exp_extra;
for (; exp_val > 0; exp_val--) {
dec_val *= 10;
}
for (; exp_val < 0; exp_val++) {
dec_val *= 0.1;
}
if (imag) {
return mp_obj_new_complex(0, dec_val);
} else {
return mp_obj_new_float(dec_val);
}
#else
nlr_jump(mp_obj_new_exception_msg(&mp_type_SyntaxError, "decimal numbers not supported"));
#endif
}

2
py/parsenum.h Normal file
View File

@ -0,0 +1,2 @@
mp_obj_t mp_parse_num_integer(const char *restrict str, uint len, int base);
mp_obj_t mp_parse_num_decimal(const char *str, uint len);

View File

@ -31,9 +31,9 @@ PY_O_BASENAME = \
asmthumb.o \
emitnthumb.o \
emitinlinethumb.o \
parsenum.o \
runtime.o \
map.o \
strtonum.o \
obj.o \
objarray.o \
objbool.o \

View File

@ -13,6 +13,7 @@
#include "mpconfig.h"
#include "qstr.h"
#include "obj.h"
#include "parsenum.h"
#include "runtime0.h"
#include "runtime.h"
#include "map.h"
@ -371,72 +372,11 @@ mp_obj_t rt_list_append(mp_obj_t self_in, mp_obj_t arg) {
return mp_obj_list_append(self_in, arg);
}
#define PARSE_DEC_IN_INTG (1)
#define PARSE_DEC_IN_FRAC (2)
#define PARSE_DEC_IN_EXP (3)
mp_obj_t rt_load_const_dec(qstr qstr) {
#if MICROPY_ENABLE_FLOAT
DEBUG_OP_printf("load '%s'\n", qstr_str(qstr));
const char *s = qstr_str(qstr);
int in = PARSE_DEC_IN_INTG;
mp_float_t dec_val = 0;
bool exp_neg = false;
int exp_val = 0;
int exp_extra = 0;
bool imag = false;
for (; *s; s++) {
int dig = *s;
if ('0' <= dig && dig <= '9') {
dig -= '0';
if (in == PARSE_DEC_IN_EXP) {
exp_val = 10 * exp_val + dig;
} else {
dec_val = 10 * dec_val + dig;
if (in == PARSE_DEC_IN_FRAC) {
exp_extra -= 1;
}
}
} else if (in == PARSE_DEC_IN_INTG && dig == '.') {
in = PARSE_DEC_IN_FRAC;
} else if (in != PARSE_DEC_IN_EXP && (dig == 'E' || dig == 'e')) {
in = PARSE_DEC_IN_EXP;
if (s[1] == '+') {
s++;
} else if (s[1] == '-') {
s++;
exp_neg = true;
}
} else if (dig == 'J' || dig == 'j') {
s++;
imag = true;
break;
} else {
// unknown character
break;
}
}
if (*s != 0) {
nlr_jump(mp_obj_new_exception_msg(&mp_type_SyntaxError, "invalid syntax for number"));
}
if (exp_neg) {
exp_val = -exp_val;
}
exp_val += exp_extra;
for (; exp_val > 0; exp_val--) {
dec_val *= 10;
}
for (; exp_val < 0; exp_val++) {
dec_val *= 0.1;
}
if (imag) {
return mp_obj_new_complex(0, dec_val);
} else {
return mp_obj_new_float(dec_val);
}
#else
nlr_jump(mp_obj_new_exception_msg(&mp_type_SyntaxError, "decimal numbers not supported"));
#endif
uint len;
const byte* data = qstr_data(qstr, &len);
return mp_parse_num_decimal((const char*)data, len);
}
mp_obj_t rt_load_const_str(qstr qstr) {

View File

@ -1,98 +0,0 @@
#if defined(UNIX)
#include <ctype.h>
#include <errno.h>
#include <stdlib.h>
#include "misc.h"
#include "strtonum.h"
#include "mpconfig.h"
#include "qstr.h"
#include "nlr.h"
#include "obj.h"
long mp_strtonum(const char *restrict s, int base) {
int c, neg = 0;
const char *p = s;
char *num;
long found;
// check radix base
if ((base != 0 && base < 2) || base > 36) {
nlr_jump(mp_obj_new_exception_msg(&mp_type_ValueError, "ValueError: int() arg 2 must be >=2 and <= 36"));
}
// skip surrounded whitespace
while (isspace((c = *(p++))));
if (c == 0) {
goto value_error;
}
// preced sign
if (c == '+' || c == '-') {
neg = - (c == '-');
c = *(p++);
}
// find real radix base, and strip preced '0x', '0o' and '0b'
// TODO somehow merge with similar code in parse.c
if ((base == 0 || base == 16) && c == '0') {
c = *(p++);
if ((c | 32) == 'x') {
base = 16;
} else if (base == 0 && (c | 32) == 'o') {
base = 8;
} else if (base == 0 && (c | 32) == 'b') {
base = 2;
} else {
base = 10;
p -= 2;
}
} else if (base == 8 && c == '0') {
c = *(p++);
if ((c | 32) != 'o') {
p -= 2;
}
} else if (base == 2 && c == '0') {
c = *(p++);
if ((c | 32) != 'b') {
p -= 2;
}
} else {
if (base == 0) base = 10;
p--;
}
errno = 0;
found = strtol(p, &num, base);
if (errno) {
goto value_error;
} else if (found && *(num) == 0) {
goto done;
} else if (found || num != p) {
goto check_tail_space;
} else {
goto value_error;
}
check_tail_space:
if (*(num) != 0) {
while (isspace((c = *(num++))));
if (c != 0) {
goto value_error;
}
}
done:
return (found ^ neg) - neg;
value_error:
nlr_jump(mp_obj_new_exception_msg_varg(&mp_type_ValueError, "invalid literal for int() with base %d: '%s'", base, s));
}
#else /* defined(UNIX) */
long mp_strtonum(const char *restrict s, int base) {
// TODO port strtol to stm
return 0;
}
#endif /* defined(UNIX) */

View File

@ -1 +0,0 @@
long mp_strtonum(const char *restrict s, int base);