Add basic implementation of bytes type, piggybacking on str.

This reuses as much str implementation as possible, from this we
can make them more separate as needed.
This commit is contained in:
Paul Sokolovsky 2014-01-24 22:50:40 +02:00
parent 2b2cb7b7f4
commit 91fb1c9b13
6 changed files with 96 additions and 24 deletions

View File

@ -208,6 +208,7 @@ mp_obj_t mp_obj_new_int(machine_int_t value);
mp_obj_t mp_obj_new_int_from_uint(machine_uint_t value); mp_obj_t mp_obj_new_int_from_uint(machine_uint_t value);
mp_obj_t mp_obj_new_int_from_long_str(const char *s); mp_obj_t mp_obj_new_int_from_long_str(const char *s);
mp_obj_t mp_obj_new_str(const byte* data, uint len, bool make_qstr_if_not_already); mp_obj_t mp_obj_new_str(const byte* data, uint len, bool make_qstr_if_not_already);
mp_obj_t mp_obj_new_bytes(const byte* data, uint len);
#if MICROPY_ENABLE_FLOAT #if MICROPY_ENABLE_FLOAT
mp_obj_t mp_obj_new_float(mp_float_t val); mp_obj_t mp_obj_new_float(mp_float_t val);
mp_obj_t mp_obj_new_complex(mp_float_t real, mp_float_t imag); mp_obj_t mp_obj_new_complex(mp_float_t real, mp_float_t imag);
@ -280,7 +281,7 @@ void mp_obj_exception_get_traceback(mp_obj_t self_in, machine_uint_t *n, machine
// str // str
extern const mp_obj_type_t str_type; extern const mp_obj_type_t str_type;
mp_obj_t mp_obj_str_builder_start(uint len, byte **data); mp_obj_t mp_obj_str_builder_start(const mp_obj_type_t *type, uint len, byte **data);
mp_obj_t mp_obj_str_builder_end(mp_obj_t o_in); mp_obj_t mp_obj_str_builder_end(mp_obj_t o_in);
bool mp_obj_str_equal(mp_obj_t s1, mp_obj_t s2); bool mp_obj_str_equal(mp_obj_t s1, mp_obj_t s2);
uint mp_obj_str_get_hash(mp_obj_t self_in); uint mp_obj_str_get_hash(mp_obj_t self_in);
@ -288,6 +289,9 @@ uint mp_obj_str_get_len(mp_obj_t self_in);
const char *mp_obj_str_get_str(mp_obj_t self_in); // use this only if you need the string to be null terminated const char *mp_obj_str_get_str(mp_obj_t self_in); // use this only if you need the string to be null terminated
const byte *mp_obj_str_get_data(mp_obj_t self_in, uint *len); const byte *mp_obj_str_get_data(mp_obj_t self_in, uint *len);
// bytes
extern const mp_obj_type_t bytes_type;
#if MICROPY_ENABLE_FLOAT #if MICROPY_ENABLE_FLOAT
// float // float
extern const mp_obj_type_t float_type; extern const mp_obj_type_t float_type;

View File

@ -28,16 +28,21 @@ typedef struct _mp_obj_str_t {
// use this macro to extract the string data and length // use this macro to extract the string data and length
#define GET_STR_DATA_LEN(str_obj_in, str_data, str_len) const byte *str_data; uint str_len; if (MP_OBJ_IS_QSTR(str_obj_in)) { str_data = qstr_data(MP_OBJ_QSTR_VALUE(str_obj_in), &str_len); } else { str_len = ((mp_obj_str_t*)str_obj_in)->len; str_data = ((mp_obj_str_t*)str_obj_in)->data; } #define GET_STR_DATA_LEN(str_obj_in, str_data, str_len) const byte *str_data; uint str_len; if (MP_OBJ_IS_QSTR(str_obj_in)) { str_data = qstr_data(MP_OBJ_QSTR_VALUE(str_obj_in), &str_len); } else { str_len = ((mp_obj_str_t*)str_obj_in)->len; str_data = ((mp_obj_str_t*)str_obj_in)->data; }
static mp_obj_t mp_obj_new_str_iterator(mp_obj_t str, int cur); static mp_obj_t mp_obj_new_str_iterator(mp_obj_t str);
static mp_obj_t mp_obj_new_bytes_iterator(mp_obj_t str);
/******************************************************************************/ /******************************************************************************/
/* str */ /* str */
void str_print(void (*print)(void *env, const char *fmt, ...), void *env, mp_obj_t self_in, mp_print_kind_t kind) { void str_print(void (*print)(void *env, const char *fmt, ...), void *env, mp_obj_t self_in, mp_print_kind_t kind) {
GET_STR_DATA_LEN(self_in, str_data, str_len); GET_STR_DATA_LEN(self_in, str_data, str_len);
if (kind == PRINT_STR) { bool is_bytes = MP_OBJ_IS_TYPE(self_in, &bytes_type);
if (kind == PRINT_STR && !is_bytes) {
print(env, "%.*s", str_len, str_data); print(env, "%.*s", str_len, str_data);
} else { } else {
if (is_bytes) {
print(env, "b");
}
// TODO need to escape chars etc // TODO need to escape chars etc
print(env, "'%.*s'", str_len, str_data); print(env, "'%.*s'", str_len, str_data);
} }
@ -71,7 +76,11 @@ mp_obj_t str_binary_op(int op, mp_obj_t lhs_in, mp_obj_t rhs_in) {
// ["no", "yes"][1 == 2] is common idiom // ["no", "yes"][1 == 2] is common idiom
if (MP_OBJ_IS_SMALL_INT(rhs_in)) { if (MP_OBJ_IS_SMALL_INT(rhs_in)) {
uint index = mp_get_index(mp_obj_get_type(lhs_in), lhs_len, rhs_in); uint index = mp_get_index(mp_obj_get_type(lhs_in), lhs_len, rhs_in);
return mp_obj_new_str(lhs_data + index, 1, true); if (MP_OBJ_IS_TYPE(lhs_in, &bytes_type)) {
return MP_OBJ_NEW_SMALL_INT(lhs_data[index]);
} else {
return mp_obj_new_str(lhs_data + index, 1, true);
}
#if MICROPY_ENABLE_SLICE #if MICROPY_ENABLE_SLICE
} else if (MP_OBJ_IS_TYPE(rhs_in, &slice_type)) { } else if (MP_OBJ_IS_TYPE(rhs_in, &slice_type)) {
machine_int_t start, stop, step; machine_int_t start, stop, step;
@ -120,7 +129,7 @@ mp_obj_t str_binary_op(int op, mp_obj_t lhs_in, mp_obj_t rhs_in) {
// code for non-qstr // code for non-qstr
byte *data; byte *data;
mp_obj_t s = mp_obj_str_builder_start(alloc_len, &data); mp_obj_t s = mp_obj_str_builder_start(mp_obj_get_type(lhs_in), alloc_len, &data);
memcpy(data, lhs_data, lhs_len); memcpy(data, lhs_data, lhs_len);
memcpy(data + lhs_len, rhs_data, rhs_len); memcpy(data + lhs_len, rhs_data, rhs_len);
return mp_obj_str_builder_end(s); return mp_obj_str_builder_end(s);
@ -143,7 +152,7 @@ mp_obj_t str_binary_op(int op, mp_obj_t lhs_in, mp_obj_t rhs_in) {
} }
int n = MP_OBJ_SMALL_INT_VALUE(rhs_in); int n = MP_OBJ_SMALL_INT_VALUE(rhs_in);
byte *data; byte *data;
mp_obj_t s = mp_obj_str_builder_start(lhs_len * n, &data); mp_obj_t s = mp_obj_str_builder_start(mp_obj_get_type(lhs_in), lhs_len * n, &data);
mp_seq_multiply(lhs_data, sizeof(*lhs_data), lhs_len, n, data); mp_seq_multiply(lhs_data, sizeof(*lhs_data), lhs_len, n, data);
return mp_obj_str_builder_end(s); return mp_obj_str_builder_end(s);
} }
@ -152,10 +161,6 @@ mp_obj_t str_binary_op(int op, mp_obj_t lhs_in, mp_obj_t rhs_in) {
return MP_OBJ_NULL; // op not supported return MP_OBJ_NULL; // op not supported
} }
static mp_obj_t str_getiter(mp_obj_t o_in) {
return mp_obj_new_str_iterator(o_in, 0);
}
mp_obj_t str_join(mp_obj_t self_in, mp_obj_t arg) { mp_obj_t str_join(mp_obj_t self_in, mp_obj_t arg) {
assert(MP_OBJ_IS_STR(self_in)); assert(MP_OBJ_IS_STR(self_in));
@ -188,7 +193,7 @@ mp_obj_t str_join(mp_obj_t self_in, mp_obj_t arg) {
// make joined string // make joined string
byte *data; byte *data;
mp_obj_t joined_str = mp_obj_str_builder_start(required_len, &data); mp_obj_t joined_str = mp_obj_str_builder_start(mp_obj_get_type(self_in), required_len, &data);
for (int i = 0; i < seq_len; i++) { for (int i = 0; i < seq_len; i++) {
if (i > 0) { if (i > 0) {
memcpy(data, sep_str, sep_len); memcpy(data, sep_str, sep_len);
@ -393,13 +398,23 @@ const mp_obj_type_t str_type = {
"str", "str",
.print = str_print, .print = str_print,
.binary_op = str_binary_op, .binary_op = str_binary_op,
.getiter = str_getiter, .getiter = mp_obj_new_str_iterator,
.methods = str_type_methods, .methods = str_type_methods,
}; };
mp_obj_t mp_obj_str_builder_start(uint len, byte **data) { // Reuses most of methods from str
const mp_obj_type_t bytes_type = {
{ &mp_const_type },
"bytes",
.print = str_print,
.binary_op = str_binary_op,
.getiter = mp_obj_new_bytes_iterator,
.methods = str_type_methods,
};
mp_obj_t mp_obj_str_builder_start(const mp_obj_type_t *type, uint len, byte **data) {
mp_obj_str_t *o = m_new_obj_var(mp_obj_str_t, byte, len + 1); mp_obj_str_t *o = m_new_obj_var(mp_obj_str_t, byte, len + 1);
o->base.type = &str_type; o->base.type = type;
o->len = len; o->len = len;
*data = o->data; *data = o->data;
return o; return o;
@ -413,6 +428,16 @@ mp_obj_t mp_obj_str_builder_end(mp_obj_t o_in) {
return o; return o;
} }
static mp_obj_t str_new(const mp_obj_type_t *type, const byte* data, uint len) {
mp_obj_str_t *o = m_new_obj_var(mp_obj_str_t, byte, len + 1);
o->base.type = type;
o->hash = qstr_compute_hash(data, len);
o->len = len;
memcpy(o->data, data, len * sizeof(byte));
o->data[len] = '\0'; // for now we add null for compatibility with C ASCIIZ strings
return o;
}
mp_obj_t mp_obj_new_str(const byte* data, uint len, bool make_qstr_if_not_already) { mp_obj_t mp_obj_new_str(const byte* data, uint len, bool make_qstr_if_not_already) {
qstr q = qstr_find_strn(data, len); qstr q = qstr_find_strn(data, len);
if (q != MP_QSTR_NULL) { if (q != MP_QSTR_NULL) {
@ -423,16 +448,14 @@ mp_obj_t mp_obj_new_str(const byte* data, uint len, bool make_qstr_if_not_alread
return MP_OBJ_NEW_QSTR(qstr_from_strn((const char*)data, len)); return MP_OBJ_NEW_QSTR(qstr_from_strn((const char*)data, len));
} else { } else {
// no existing qstr, don't make one // no existing qstr, don't make one
mp_obj_str_t *o = m_new_obj_var(mp_obj_str_t, byte, len + 1); return str_new(&str_type, data, len);
o->base.type = &str_type;
o->hash = qstr_compute_hash(data, len);
o->len = len;
memcpy(o->data, data, len * sizeof(byte));
o->data[len] = '\0'; // for now we add null for compatibility with C ASCIIZ strings
return o;
} }
} }
mp_obj_t mp_obj_new_bytes(const byte* data, uint len) {
return str_new(&bytes_type, data, len);
}
bool mp_obj_str_equal(mp_obj_t s1, mp_obj_t s2) { bool mp_obj_str_equal(mp_obj_t s1, mp_obj_t s2) {
if (MP_OBJ_IS_QSTR(s1) && MP_OBJ_IS_QSTR(s2)) { if (MP_OBJ_IS_QSTR(s1) && MP_OBJ_IS_QSTR(s2)) {
return s1 == s2; return s1 == s2;
@ -522,10 +545,36 @@ static const mp_obj_type_t str_it_type = {
.iternext = str_it_iternext, .iternext = str_it_iternext,
}; };
mp_obj_t mp_obj_new_str_iterator(mp_obj_t str, int cur) { mp_obj_t bytes_it_iternext(mp_obj_t self_in) {
mp_obj_str_it_t *self = self_in;
GET_STR_DATA_LEN(self->str, str, len);
if (self->cur < len) {
mp_obj_t o_out = MP_OBJ_NEW_SMALL_INT(str[self->cur]);
self->cur += 1;
return o_out;
} else {
return mp_const_stop_iteration;
}
}
static const mp_obj_type_t bytes_it_type = {
{ &mp_const_type },
"bytes_iterator",
.iternext = bytes_it_iternext,
};
mp_obj_t mp_obj_new_str_iterator(mp_obj_t str) {
mp_obj_str_it_t *o = m_new_obj(mp_obj_str_it_t); mp_obj_str_it_t *o = m_new_obj(mp_obj_str_it_t);
o->base.type = &str_it_type; o->base.type = &str_it_type;
o->str = str; o->str = str;
o->cur = cur; o->cur = 0;
return o;
}
mp_obj_t mp_obj_new_bytes_iterator(mp_obj_t str) {
mp_obj_str_it_t *o = m_new_obj(mp_obj_str_it_t);
o->base.type = &bytes_it_type;
o->str = str;
o->cur = 0;
return o; return o;
} }

View File

@ -408,6 +408,13 @@ mp_obj_t rt_load_const_str(qstr qstr) {
return MP_OBJ_NEW_QSTR(qstr); return MP_OBJ_NEW_QSTR(qstr);
} }
mp_obj_t rt_load_const_bytes(qstr qstr) {
DEBUG_OP_printf("load b'%s'\n", qstr_str(qstr));
uint len;
const byte *data = qstr_data(qstr, &len);
return mp_obj_new_bytes(data, len);
}
mp_obj_t rt_load_name(qstr qstr) { mp_obj_t rt_load_name(qstr qstr) {
// logic: search locals, globals, builtins // logic: search locals, globals, builtins
DEBUG_OP_printf("load name %s\n", qstr_str(qstr)); DEBUG_OP_printf("load name %s\n", qstr_str(qstr));

View File

@ -2,6 +2,7 @@ int rt_is_true(mp_obj_t arg);
mp_obj_t rt_load_const_dec(qstr qstr); mp_obj_t rt_load_const_dec(qstr qstr);
mp_obj_t rt_load_const_str(qstr qstr); mp_obj_t rt_load_const_str(qstr qstr);
mp_obj_t rt_load_const_bytes(qstr qstr);
mp_obj_t rt_load_name(qstr qstr); mp_obj_t rt_load_name(qstr qstr);
mp_obj_t rt_load_global(qstr qstr); mp_obj_t rt_load_global(qstr qstr);
mp_obj_t rt_load_build_class(void); mp_obj_t rt_load_build_class(void);

View File

@ -138,7 +138,7 @@ bool mp_execute_byte_code_2(const byte *code_info, const byte **ip_in_out, mp_ob
case MP_BC_LOAD_CONST_BYTES: case MP_BC_LOAD_CONST_BYTES:
DECODE_QSTR; DECODE_QSTR;
PUSH(rt_load_const_str(qst)); // TODO PUSH(rt_load_const_bytes(qst));
break; break;
case MP_BC_LOAD_CONST_STRING: case MP_BC_LOAD_CONST_STRING:

11
tests/basics/bytes.py Normal file
View File

@ -0,0 +1,11 @@
a = b"123"
print(a)
print(str(a))
print(repr(a))
print(a[0], a[2])
print(a[-1])
s = 0
for i in a:
s += i
print(s)