py: Change vstr so that it doesn't null terminate buffer by default.

This cleans up vstr so that it's a pure "variable buffer", and the user
can decide whether they need to add a terminating null byte.  In most
places where vstr is used, the vstr did not need to be null terminated
and so this patch saves code size, a tiny bit of RAM, and makes vstr
usage more efficient.  When null termination is needed it must be
done explicitly using vstr_null_terminate.
This commit is contained in:
Damien George 2015-01-28 23:43:01 +00:00
parent 57aebe1714
commit 0d3cb6726d
14 changed files with 67 additions and 70 deletions

View File

@ -39,9 +39,7 @@ STATIC mp_obj_t mod_ujson_dumps(mp_obj_t obj) {
vstr_t vstr;
vstr_init(&vstr, 8);
mp_obj_print_helper((void (*)(void *env, const char *fmt, ...))vstr_printf, &vstr, obj, PRINT_JSON);
mp_obj_t ret = mp_obj_new_str(vstr.buf, vstr.len, false);
vstr_clear(&vstr);
return ret;
return mp_obj_new_str_from_vstr(&mp_type_str, &vstr);
}
STATIC MP_DEFINE_CONST_FUN_OBJ_1(mod_ujson_dumps_obj, mod_ujson_dumps);

View File

@ -111,6 +111,7 @@ int readline_process_char(int c) {
if (rl.line->len > rl.orig_line_len && (MP_STATE_PORT(readline_hist)[0] == NULL || strcmp(MP_STATE_PORT(readline_hist)[0], rl.line->buf + rl.orig_line_len) != 0)) {
// a line which is not empty and different from the last one
// so update the history
vstr_null_terminate(rl.line);
char *most_recent_hist = str_dup_maybe(rl.line->buf + rl.orig_line_len);
if (most_recent_hist != NULL) {
for (int i = READLINE_HIST_SIZE - 1; i > 0; i--) {

View File

@ -61,12 +61,14 @@ bool mp_obj_is_package(mp_obj_t module) {
}
STATIC mp_import_stat_t stat_dir_or_file(vstr_t *path) {
vstr_null_terminate(path);
//printf("stat %s\n", vstr_str(path));
mp_import_stat_t stat = mp_import_stat(vstr_str(path));
if (stat == MP_IMPORT_STAT_DIR) {
return stat;
}
vstr_add_str(path, ".py");
vstr_null_terminate(path);
stat = mp_import_stat(vstr_str(path));
if (stat == MP_IMPORT_STAT_FILE) {
return stat;
@ -134,6 +136,7 @@ STATIC void do_load_from_lexer(mp_obj_t module_obj, mp_lexer_t *lex, const char
STATIC void do_load(mp_obj_t module_obj, vstr_t *file) {
// create the lexer
vstr_null_terminate(file);
mp_lexer_t *lex = mp_lexer_new_from_file(vstr_str(file));
do_load_from_lexer(module_obj, lex, vstr_str(file));
}
@ -263,7 +266,7 @@ mp_obj_t mp_builtin___import__(mp_uint_t n_args, const mp_obj_t *args) {
// create a qstr for the module name up to this depth
qstr mod_name = qstr_from_strn(mod_str, i);
DEBUG_printf("Processing module: %s\n", qstr_str(mod_name));
DEBUG_printf("Previous path: %s\n", vstr_str(&path));
DEBUG_printf("Previous path: %.*s\n", vstr_len(&path), vstr_str(&path));
// find the file corresponding to the module name
mp_import_stat_t stat;
@ -276,7 +279,7 @@ mp_obj_t mp_builtin___import__(mp_uint_t n_args, const mp_obj_t *args) {
vstr_add_strn(&path, mod_str + last, i - last);
stat = stat_dir_or_file(&path);
}
DEBUG_printf("Current path: %s\n", vstr_str(&path));
DEBUG_printf("Current path: %.*s\n", vstr_len(&path), vstr_str(&path));
if (stat == MP_IMPORT_STAT_NO_EXIST) {
#if MICROPY_MODULE_WEAK_LINKS
@ -320,12 +323,13 @@ mp_obj_t mp_builtin___import__(mp_uint_t n_args, const mp_obj_t *args) {
}
if (stat == MP_IMPORT_STAT_DIR) {
DEBUG_printf("%s is dir\n", vstr_str(&path));
DEBUG_printf("%.*s is dir\n", vstr_len(&path), vstr_str(&path));
// https://docs.python.org/3/reference/import.html
// "Specifically, any module that contains a __path__ attribute is considered a package."
mp_store_attr(module_obj, MP_QSTR___path__, mp_obj_new_str(vstr_str(&path), vstr_len(&path), false));
vstr_add_char(&path, PATH_SEP_CHAR);
vstr_add_str(&path, "__init__.py");
vstr_null_terminate(&path);
if (mp_import_stat(vstr_str(&path)) != MP_IMPORT_STAT_FILE) {
vstr_cut_tail_bytes(&path, sizeof("/__init__.py") - 1); // cut off /__init__.py
mp_warning("%s is imported as namespace package", vstr_str(&path));

View File

@ -555,7 +555,7 @@ STATIC void cpython_c_tuple(compiler_t *comp, mp_parse_node_t pn, mp_parse_node_
} else {
vstr_printf(vstr, ")");
}
EMIT_ARG(load_const_verbatim_str, vstr_str(vstr));
EMIT_ARG(load_const_verbatim_strn, vstr_str(vstr), vstr_len(vstr));
vstr_free(vstr);
} else {
if (!MP_PARSE_NODE_IS_NULL(pn)) {
@ -1538,7 +1538,7 @@ STATIC void compile_import_from(compiler_t *comp, mp_parse_node_struct_t *pns) {
// build the "fromlist" tuple
#if MICROPY_EMIT_CPYTHON
EMIT_ARG(load_const_verbatim_str, "('*',)");
EMIT_ARG(load_const_verbatim_strn, "('*',)", 6);
#else
EMIT_ARG(load_const_str, MP_QSTR__star_, false);
EMIT_ARG(build_tuple, 1);
@ -1576,7 +1576,7 @@ STATIC void compile_import_from(compiler_t *comp, mp_parse_node_struct_t *pns) {
vstr_printf(vstr, ",");
}
vstr_printf(vstr, ")");
EMIT_ARG(load_const_verbatim_str, vstr_str(vstr));
EMIT_ARG(load_const_verbatim_strn, vstr_str(vstr), vstr_len(vstr));
vstr_free(vstr);
}
#else

View File

@ -157,7 +157,7 @@ typedef struct _emit_method_table_t {
#if MICROPY_EMIT_CPYTHON
// these methods are only needed for emitcpy
void (*load_const_verbatim_str)(emit_t *emit, const char *str);
void (*load_const_verbatim_strn)(emit_t *emit, const char *str, mp_uint_t len);
void (*load_closure)(emit_t *emit, qstr qst, mp_uint_t local_num);
void (*setup_loop)(emit_t *emit, mp_uint_t label);
#endif

View File

@ -800,10 +800,10 @@ STATIC void emit_cpy_end_except_handler(emit_t *emit) {
emit_cpy_adjust_stack_size(emit, -5); // stack adjust
}
STATIC void emit_cpy_load_const_verbatim_str(emit_t *emit, const char *str) {
STATIC void emit_cpy_load_const_verbatim_strn(emit_t *emit, const char *str, mp_uint_t len) {
emit_pre(emit, 1, 3);
if (emit->pass == MP_PASS_EMIT) {
printf("LOAD_CONST %s\n", str);
printf("LOAD_CONST %.*s\n", (int)len, str);
}
}
@ -912,7 +912,7 @@ const emit_method_table_t emit_cpython_method_table = {
emit_cpy_end_except_handler,
// emitcpy specific functions
emit_cpy_load_const_verbatim_str,
emit_cpy_load_const_verbatim_strn,
emit_cpy_load_closure,
emit_cpy_setup_loop,
};

View File

@ -145,6 +145,7 @@ size_t vstr_len(vstr_t *vstr);
void vstr_hint_size(vstr_t *vstr, size_t size);
char *vstr_extend(vstr_t *vstr, size_t size);
char *vstr_add_len(vstr_t *vstr, size_t len);
void vstr_null_terminate(vstr_t *vstr);
void vstr_add_byte(vstr_t *vstr, byte v);
void vstr_add_char(vstr_t *vstr, unichar chr);
void vstr_add_str(vstr_t *vstr, const char *str);

View File

@ -217,7 +217,7 @@ STATIC mp_obj_t bytes_make_new(mp_obj_t type_in, mp_uint_t n_args, mp_uint_t n_k
vstr_init(&vstr, 16);
} else {
mp_int_t len = MP_OBJ_SMALL_INT_VALUE(len_in);
vstr_init(&vstr, len + 1);
vstr_init(&vstr, len);
}
mp_obj_t iterable = mp_getiter(args[0]);
@ -856,7 +856,7 @@ mp_obj_t mp_obj_str_format(mp_uint_t n_args, const mp_obj_t *args, mp_map_t *kwa
while (str < top && *str != '}' && *str != '!' && *str != ':') {
vstr_add_char(field_name, *str++);
}
vstr_add_char(field_name, '\0');
vstr_null_terminate(field_name);
}
// conversion ::= "r" | "s"
@ -887,7 +887,7 @@ mp_obj_t mp_obj_str_format(mp_uint_t n_args, const mp_obj_t *args, mp_map_t *kwa
while (str < top && *str != '}') {
vstr_add_char(format_spec, *str++);
}
vstr_add_char(format_spec, '\0');
vstr_null_terminate(format_spec);
}
}
if (str >= top) {
@ -1890,6 +1890,7 @@ mp_obj_t mp_obj_new_str_from_vstr(const mp_obj_type_t *type, vstr_t *vstr) {
o->len = vstr->len;
o->hash = qstr_compute_hash((byte*)vstr->buf, vstr->len);
o->data = (byte*)m_renew(char, vstr->buf, vstr->alloc, vstr->len + 1);
((byte*)o->data)[o->len] = '\0'; // add null byte
vstr->buf = NULL;
vstr->alloc = 0;
return o;

View File

@ -177,7 +177,6 @@ STATIC mp_obj_t stream_read(mp_uint_t n_args, const mp_obj_t *args) {
nlr_raise(mp_obj_new_exception_arg1(&mp_type_OSError, MP_OBJ_NEW_SMALL_INT(error)));
} else {
vstr.len = out_sz;
vstr.buf[vstr.len] = '\0';
return mp_obj_new_str_from_vstr(STREAM_CONTENT_TYPE(o->type->stream_p), &vstr);
}
}
@ -289,7 +288,6 @@ STATIC mp_obj_t stream_readall(mp_obj_t self_in) {
}
vstr.len = total_size;
vstr.buf[total_size] = '\0';
return mp_obj_new_str_from_vstr(STREAM_CONTENT_TYPE(o->type->stream_p), &vstr);
}
@ -306,15 +304,15 @@ STATIC mp_obj_t stream_unbuffered_readline(mp_uint_t n_args, const mp_obj_t *arg
max_size = MP_OBJ_SMALL_INT_VALUE(args[1]);
}
vstr_t *vstr;
vstr_t vstr;
if (max_size != -1) {
vstr = vstr_new_size(max_size);
vstr_init(&vstr, max_size);
} else {
vstr = vstr_new();
vstr_init(&vstr, 16);
}
while (max_size == -1 || max_size-- != 0) {
char *p = vstr_add_len(vstr, 1);
char *p = vstr_add_len(&vstr, 1);
if (p == NULL) {
nlr_raise(mp_obj_new_exception_msg_varg(&mp_type_MemoryError, "out of memory"));
}
@ -323,14 +321,14 @@ STATIC mp_obj_t stream_unbuffered_readline(mp_uint_t n_args, const mp_obj_t *arg
mp_uint_t out_sz = o->type->stream_p->read(o, p, 1, &error);
if (out_sz == MP_STREAM_ERROR) {
if (is_nonblocking_error(error)) {
if (vstr->len == 1) {
if (vstr.len == 1) {
// We just incremented it, but otherwise we read nothing
// and immediately got EAGAIN. This is case is not well
// specified in
// https://docs.python.org/3/library/io.html#io.IOBase.readline
// unlike similar case for read(). But we follow the latter's
// behavior - return None.
vstr_free(vstr);
vstr_clear(&vstr);
return mp_const_none;
} else {
goto done;
@ -343,16 +341,15 @@ done:
// Back out previously added byte
// Consider, what's better - read a char and get OutOfMemory (so read
// char is lost), or allocate first as we do.
vstr_cut_tail_bytes(vstr, 1);
vstr_cut_tail_bytes(&vstr, 1);
break;
}
if (*p == '\n') {
break;
}
}
mp_obj_t ret = mp_obj_new_str_from_vstr(STREAM_CONTENT_TYPE(o->type->stream_p), vstr);
vstr_free(vstr);
return ret;
return mp_obj_new_str_from_vstr(STREAM_CONTENT_TYPE(o->type->stream_p), &vstr);
}
// TODO take an optional extra argument (what does it do exactly?)

View File

@ -35,12 +35,10 @@
// returned value is always at least 1 greater than argument
#define ROUND_ALLOC(a) (((a) & ((~0) - 7)) + 8)
// Init the vstr so it allocs exactly given number of bytes.
// Length is set to zero, and null byte written in first position.
// Init the vstr so it allocs exactly given number of bytes. Set length to zero.
void vstr_init(vstr_t *vstr, size_t alloc) {
if (alloc < 2) {
// need at least 1 byte for the null byte at the end
alloc = 2;
if (alloc < 1) {
alloc = 1;
}
vstr->alloc = alloc;
vstr->len = 0;
@ -49,24 +47,20 @@ void vstr_init(vstr_t *vstr, size_t alloc) {
vstr->had_error = true;
return;
}
vstr->buf[0] = 0;
vstr->had_error = false;
vstr->fixed_buf = false;
}
// Init the vstr so it allocs exactly enough ram to hold given length (plus the
// null terminating byte), set the length, and write the null byte at the end.
// Init the vstr so it allocs exactly enough ram to hold given length, and set the length.
void vstr_init_len(vstr_t *vstr, size_t len) {
vstr_init(vstr, len + 1);
vstr_add_len(vstr, len);
vstr_init(vstr, len);
vstr->len = len;
}
void vstr_init_fixed_buf(vstr_t *vstr, size_t alloc, char *buf) {
assert(alloc > 0); // need at least room for the null byte
vstr->alloc = alloc;
vstr->len = 0;
vstr->buf = buf;
vstr->buf[0] = 0;
vstr->had_error = false;
vstr->fixed_buf = true;
}
@ -79,16 +73,16 @@ void vstr_clear(vstr_t *vstr) {
}
vstr_t *vstr_new(void) {
vstr_t *vstr = m_new(vstr_t, 1);
vstr_t *vstr = m_new_obj(vstr_t);
if (vstr == NULL) {
return NULL;
}
vstr_init(vstr, 32);
vstr_init(vstr, 16);
return vstr;
}
vstr_t *vstr_new_size(size_t alloc) {
vstr_t *vstr = m_new(vstr_t, 1);
vstr_t *vstr = m_new_obj(vstr_t);
if (vstr == NULL) {
return NULL;
}
@ -107,7 +101,6 @@ void vstr_free(vstr_t *vstr) {
void vstr_reset(vstr_t *vstr) {
vstr->len = 0;
vstr->buf[0] = 0;
vstr->had_error = false;
}
@ -129,7 +122,7 @@ size_t vstr_len(vstr_t *vstr) {
return vstr->len;
}
// Extend vstr strictly by requested size, return pointer to newly added chunk
// Extend vstr strictly by requested size, return pointer to newly added chunk.
char *vstr_extend(vstr_t *vstr, size_t size) {
if (vstr->fixed_buf) {
return NULL;
@ -146,11 +139,11 @@ char *vstr_extend(vstr_t *vstr, size_t size) {
}
STATIC bool vstr_ensure_extra(vstr_t *vstr, size_t size) {
if (vstr->len + size + 1 > vstr->alloc) {
if (vstr->len + size > vstr->alloc) {
if (vstr->fixed_buf) {
return false;
}
size_t new_alloc = ROUND_ALLOC((vstr->len + size + 1) * 2);
size_t new_alloc = ROUND_ALLOC((vstr->len + size) * 2);
char *new_buf = m_renew(char, vstr->buf, vstr->alloc, new_alloc);
if (new_buf == NULL) {
vstr->had_error = true;
@ -175,10 +168,17 @@ char *vstr_add_len(vstr_t *vstr, size_t len) {
}
char *buf = vstr->buf + vstr->len;
vstr->len += len;
vstr->buf[vstr->len] = 0;
return buf;
}
// Doesn't increase len, just makes sure there is a null byte at the end
void vstr_null_terminate(vstr_t *vstr) {
if (vstr->had_error || !vstr_ensure_extra(vstr, 1)) {
return;
}
vstr->buf[vstr->len] = '\0';
}
void vstr_add_byte(vstr_t *vstr, byte b) {
byte *buf = (byte*)vstr_add_len(vstr, 1);
if (buf == NULL) {
@ -224,11 +224,7 @@ void vstr_add_char(vstr_t *vstr, unichar c) {
buf[3] = (c & 0x3F) | 0x80;
}
#else
byte *buf = (byte*)vstr_add_len(vstr, 1);
if (buf == NULL) {
return;
}
buf[0] = c;
vstr_add_byte(vstr, c);
#endif
}
@ -239,7 +235,7 @@ void vstr_add_str(vstr_t *vstr, const char *str) {
void vstr_add_strn(vstr_t *vstr, const char *str, size_t len) {
if (vstr->had_error || !vstr_ensure_extra(vstr, len)) {
// if buf is fixed, we got here because there isn't enough room left
// so just try to copy as much as we can, with room for null byte
// so just try to copy as much as we can, with room for a possible null byte
if (vstr->fixed_buf && vstr->len + 1 < vstr->alloc) {
len = vstr->alloc - vstr->len - 1;
goto copy;
@ -249,7 +245,6 @@ void vstr_add_strn(vstr_t *vstr, const char *str, size_t len) {
copy:
memmove(vstr->buf + vstr->len, str, len);
vstr->len += len;
vstr->buf[vstr->len] = 0;
}
STATIC char *vstr_ins_blank_bytes(vstr_t *vstr, size_t byte_pos, size_t byte_len) {
@ -265,8 +260,8 @@ STATIC char *vstr_ins_blank_bytes(vstr_t *vstr, size_t byte_pos, size_t byte_len
if (!vstr_ensure_extra(vstr, byte_len)) {
return NULL;
}
// copy up the string to make room for the new bytes; +1 for the null byte
memmove(vstr->buf + byte_pos + byte_len, vstr->buf + byte_pos, l - byte_pos + 1);
// copy up the string to make room for the new bytes
memmove(vstr->buf + byte_pos + byte_len, vstr->buf + byte_pos, l - byte_pos);
// increase the length
vstr->len += byte_len;
}
@ -301,7 +296,6 @@ void vstr_cut_tail_bytes(vstr_t *vstr, size_t len) {
} else {
vstr->len -= len;
}
vstr->buf[vstr->len] = 0;
}
void vstr_cut_out_bytes(vstr_t *vstr, size_t byte_pos, size_t bytes_to_cut) {
@ -309,10 +303,8 @@ void vstr_cut_out_bytes(vstr_t *vstr, size_t byte_pos, size_t bytes_to_cut) {
return;
} else if (byte_pos + bytes_to_cut >= vstr->len) {
vstr->len = byte_pos;
vstr->buf[vstr->len] = 0;
} else {
// move includes +1 for null byte at the end
memmove(vstr->buf + byte_pos, vstr->buf + byte_pos + bytes_to_cut, vstr->len - byte_pos - bytes_to_cut + 1);
memmove(vstr->buf + byte_pos, vstr->buf + byte_pos + bytes_to_cut, vstr->len - byte_pos - bytes_to_cut);
vstr->len -= bytes_to_cut;
}
}

View File

@ -39,9 +39,7 @@ STATIC mp_obj_t mp_builtin_input(uint n_args, const mp_obj_t *args) {
if (line.len == 0 && ret == CHAR_CTRL_D) {
nlr_raise(mp_obj_new_exception(&mp_type_EOFError));
}
mp_obj_t o = mp_obj_new_str(line.buf, line.len, false);
vstr_clear(&line);
return o;
return mp_obj_new_str_from_vstr(&mp_type_str, &line);
}
MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(mp_builtin_input_obj, 0, 1, mp_builtin_input);

View File

@ -217,7 +217,6 @@ STATIC mp_obj_t socket_recv(mp_obj_t self_in, mp_obj_t len_in) {
return mp_const_empty_bytes;
}
vstr.len = ret;
vstr.buf[vstr.len] = '\0';
return mp_obj_new_str_from_vstr(&mp_type_bytes, &vstr);
}
STATIC MP_DEFINE_CONST_FUN_OBJ_2(socket_recv_obj, socket_recv);
@ -269,7 +268,6 @@ STATIC mp_obj_t socket_recvfrom(mp_obj_t self_in, mp_obj_t len_in) {
tuple[0] = mp_const_empty_bytes;
} else {
vstr.len = ret;
vstr.buf[vstr.len] = '\0';
tuple[0] = mp_obj_new_str_from_vstr(&mp_type_bytes, &vstr);
}
tuple[1] = mod_network_format_inet_addr(ip, port);

View File

@ -248,11 +248,12 @@ int pyexec_friendly_repl_process_char(int c) {
return 0;
}
vstr_null_terminate(&repl.line);
if (!mp_repl_continue_with_input(vstr_str(&repl.line))) {
goto exec;
}
vstr_add_char(&repl.line, '\n');
vstr_add_byte(&repl.line, '\n');
repl.cont_line = true;
stdout_tx_str("... ");
readline_note_newline();
@ -274,8 +275,9 @@ int pyexec_friendly_repl_process_char(int c) {
return 0;
}
vstr_null_terminate(&repl.line);
if (mp_repl_continue_with_input(vstr_str(&repl.line))) {
vstr_add_char(&repl.line, '\n');
vstr_add_byte(&repl.line, '\n');
stdout_tx_str("... ");
readline_note_newline();
return 0;
@ -362,8 +364,12 @@ friendly_repl_reset:
continue;
}
while (mp_repl_continue_with_input(vstr_str(&line))) {
vstr_add_char(&line, '\n');
for (;;) {
vstr_null_terminate(&line);
if (!mp_repl_continue_with_input(vstr_str(&line))) {
break;
}
vstr_add_byte(&line, '\n');
ret = readline(&line, "... ");
if (ret == CHAR_CTRL_C) {
// cancel everything

View File

@ -319,6 +319,7 @@ soft_reset:
} else {
vstr_add_str(vstr, mp_obj_str_get_str(pyb_config_main));
}
vstr_null_terminate(vstr);
if (!pyexec_file(vstr_str(vstr))) {
flash_error(3);
}