Qstr's are now split into a linked-list of qstr pools. This has 2 benefits: the first pool can be in ROM (huge benefit, since we no longer use RAM for the core qstrs), and subsequent pools use m_new for the next pool instead of m_renew (thus avoiding a huge single table for all the qstrs). Still would be better to use a hash table, but this scheme takes us part of the way (eventually convert the pools to hash tables). Also fixed bug with import. Also improved the way the module code is referenced (not magic number 1 anymore).
206 lines
6.3 KiB
C
206 lines
6.3 KiB
C
#include <stdlib.h>
|
|
#include <stdint.h>
|
|
#include <stdarg.h>
|
|
#include <string.h>
|
|
#include <assert.h>
|
|
|
|
#include "nlr.h"
|
|
#include "misc.h"
|
|
#include "mpconfig.h"
|
|
#include "mpqstr.h"
|
|
#include "obj.h"
|
|
#include "runtime0.h"
|
|
#include "runtime.h"
|
|
|
|
typedef struct _mp_obj_str_t {
|
|
mp_obj_base_t base;
|
|
qstr qstr;
|
|
} mp_obj_str_t;
|
|
|
|
void str_print(void (*print)(void *env, const char *fmt, ...), void *env, mp_obj_t self_in) {
|
|
mp_obj_str_t *self = self_in;
|
|
// TODO need to escape chars etc
|
|
print(env, "'%s'", qstr_str(self->qstr));
|
|
}
|
|
|
|
mp_obj_t str_binary_op(int op, mp_obj_t lhs_in, mp_obj_t rhs_in) {
|
|
mp_obj_str_t *lhs = lhs_in;
|
|
const char *lhs_str = qstr_str(lhs->qstr);
|
|
switch (op) {
|
|
case RT_BINARY_OP_SUBSCR:
|
|
// TODO: need predicate to check for int-like type (bools are such for example)
|
|
// ["no", "yes"][1 == 2] is common idiom
|
|
if (MP_OBJ_IS_SMALL_INT(rhs_in)) {
|
|
// TODO: This implements byte string access for single index so far
|
|
// TODO: Handle negative indexes.
|
|
return mp_obj_new_int(lhs_str[mp_obj_get_int(rhs_in)]);
|
|
#if MICROPY_ENABLE_SLICE
|
|
} else if (MP_OBJ_IS_TYPE(rhs_in, &slice_type)) {
|
|
machine_int_t start, stop, step;
|
|
mp_obj_slice_get(rhs_in, &start, &stop, &step);
|
|
assert(step == 1);
|
|
int len = strlen(lhs_str);
|
|
if (start < 0) {
|
|
start = len + start;
|
|
if (start < 0) {
|
|
start = 0;
|
|
}
|
|
} else if (start > len) {
|
|
start = len;
|
|
}
|
|
if (stop <= 0) {
|
|
stop = len + stop;
|
|
// CPython returns empty string in such case
|
|
if (stop < 0) {
|
|
stop = start;
|
|
}
|
|
} else if (stop > len) {
|
|
stop = len;
|
|
}
|
|
return mp_obj_new_str(qstr_from_strn_copy(lhs_str + start, stop - start));
|
|
#endif
|
|
} else {
|
|
// Message doesn't match CPython, but we don't have so much bytes as they
|
|
// to spend them on verbose wording
|
|
nlr_jump(mp_obj_new_exception_msg(MP_QSTR_TypeError, "index must be int"));
|
|
}
|
|
|
|
case RT_BINARY_OP_ADD:
|
|
case RT_BINARY_OP_INPLACE_ADD:
|
|
if (MP_OBJ_IS_TYPE(rhs_in, &str_type)) {
|
|
// add 2 strings
|
|
const char *rhs_str = qstr_str(((mp_obj_str_t*)rhs_in)->qstr);
|
|
size_t lhs_len = strlen(lhs_str);
|
|
size_t rhs_len = strlen(rhs_str);
|
|
int alloc_len = lhs_len + rhs_len + 1;
|
|
char *val = m_new(char, alloc_len);
|
|
memcpy(val, lhs_str, lhs_len);
|
|
memcpy(val + lhs_len, rhs_str, rhs_len);
|
|
val[lhs_len + rhs_len] = '\0';
|
|
return mp_obj_new_str(qstr_from_str_take(val, alloc_len));
|
|
}
|
|
break;
|
|
}
|
|
|
|
return MP_OBJ_NULL; // op not supported
|
|
}
|
|
|
|
mp_obj_t str_join(mp_obj_t self_in, mp_obj_t arg) {
|
|
assert(MP_OBJ_IS_TYPE(self_in, &str_type));
|
|
mp_obj_str_t *self = self_in;
|
|
|
|
// get separation string
|
|
const char *sep_str = qstr_str(self->qstr);
|
|
size_t sep_len = strlen(sep_str);
|
|
|
|
// process args
|
|
uint seq_len;
|
|
mp_obj_t *seq_items;
|
|
if (MP_OBJ_IS_TYPE(arg, &tuple_type)) {
|
|
mp_obj_tuple_get(arg, &seq_len, &seq_items);
|
|
} else if (MP_OBJ_IS_TYPE(arg, &list_type)) {
|
|
mp_obj_list_get(arg, &seq_len, &seq_items);
|
|
} else {
|
|
goto bad_arg;
|
|
}
|
|
|
|
// count required length
|
|
int required_len = 0;
|
|
for (int i = 0; i < seq_len; i++) {
|
|
if (!MP_OBJ_IS_TYPE(seq_items[i], &str_type)) {
|
|
goto bad_arg;
|
|
}
|
|
if (i > 0) {
|
|
required_len += sep_len;
|
|
}
|
|
required_len += strlen(qstr_str(mp_obj_str_get(seq_items[i])));
|
|
}
|
|
|
|
// make joined string
|
|
char *joined_str = m_new(char, required_len + 1);
|
|
char *s_dest = joined_str;
|
|
for (int i = 0; i < seq_len; i++) {
|
|
if (i > 0) {
|
|
memcpy(s_dest, sep_str, sep_len);
|
|
s_dest += sep_len;
|
|
}
|
|
const char *s2 = qstr_str(mp_obj_str_get(seq_items[i]));
|
|
size_t s2_len = strlen(s2);
|
|
memcpy(s_dest, s2, s2_len);
|
|
s_dest += s2_len;
|
|
}
|
|
*s_dest = '\0';
|
|
|
|
// return joined string
|
|
return mp_obj_new_str(qstr_from_str_take(joined_str, required_len + 1));
|
|
|
|
bad_arg:
|
|
nlr_jump(mp_obj_new_exception_msg(MP_QSTR_TypeError, "?str.join expecting a list of str's"));
|
|
}
|
|
|
|
void vstr_printf_wrapper(void *env, const char *fmt, ...) {
|
|
va_list args;
|
|
va_start(args, fmt);
|
|
vstr_vprintf(env, fmt, args);
|
|
va_end(args);
|
|
}
|
|
|
|
mp_obj_t str_format(int n_args, const mp_obj_t *args) {
|
|
assert(MP_OBJ_IS_TYPE(args[0], &str_type));
|
|
mp_obj_str_t *self = args[0];
|
|
|
|
const char *str = qstr_str(self->qstr);
|
|
int arg_i = 1;
|
|
vstr_t *vstr = vstr_new();
|
|
for (; *str; str++) {
|
|
if (*str == '{') {
|
|
str++;
|
|
if (*str == '{') {
|
|
vstr_add_char(vstr, '{');
|
|
} else if (*str == '}') {
|
|
if (arg_i >= n_args) {
|
|
nlr_jump(mp_obj_new_exception_msg(MP_QSTR_IndexError, "tuple index out of range"));
|
|
}
|
|
mp_obj_print_helper(vstr_printf_wrapper, vstr, args[arg_i]);
|
|
arg_i++;
|
|
}
|
|
} else {
|
|
vstr_add_char(vstr, *str);
|
|
}
|
|
}
|
|
|
|
return mp_obj_new_str(qstr_from_str_take(vstr->buf, vstr->alloc));
|
|
}
|
|
|
|
static MP_DEFINE_CONST_FUN_OBJ_2(str_join_obj, str_join);
|
|
static MP_DEFINE_CONST_FUN_OBJ_VAR(str_format_obj, 1, str_format);
|
|
|
|
const mp_obj_type_t str_type = {
|
|
{ &mp_const_type },
|
|
"str",
|
|
str_print, // print
|
|
NULL, // call_n
|
|
NULL, // unary_op
|
|
str_binary_op, // binary_op
|
|
NULL, // getiter
|
|
NULL, // iternext
|
|
{ // method list
|
|
{ "join", &str_join_obj },
|
|
{ "format", &str_format_obj },
|
|
{ NULL, NULL }, // end-of-list sentinel
|
|
},
|
|
};
|
|
|
|
mp_obj_t mp_obj_new_str(qstr qstr) {
|
|
mp_obj_str_t *o = m_new_obj(mp_obj_str_t);
|
|
o->base.type = &str_type;
|
|
o->qstr = qstr;
|
|
return o;
|
|
}
|
|
|
|
qstr mp_obj_str_get(mp_obj_t self_in) {
|
|
assert(MP_OBJ_IS_TYPE(self_in, &str_type));
|
|
mp_obj_str_t *self = self_in;
|
|
return self->qstr;
|
|
}
|