From 45fb143ba62b70fc25264401f86ed79494d73ff9 Mon Sep 17 00:00:00 2001 From: Paul Sokolovsky Date: Sat, 26 Apr 2014 05:46:06 +0300 Subject: [PATCH 1/4] streams: Make .write() support arbitrary objects with buffer interface. This in particular fixes writing str vs bytes. --- py/stream.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/py/stream.c b/py/stream.c index ba80250216..72e6ab509d 100644 --- a/py/stream.c +++ b/py/stream.c @@ -42,10 +42,11 @@ STATIC mp_obj_t stream_write(mp_obj_t self_in, mp_obj_t arg) { nlr_raise(mp_obj_new_exception_msg(&mp_type_OSError, "Operation not supported")); } - uint sz; - const char *buf = mp_obj_str_get_data(arg, &sz); + mp_buffer_info_t bufinfo; + mp_get_buffer_raise(arg, &bufinfo, MP_BUFFER_READ); + int error; - machine_int_t out_sz = o->type->stream_p->write(self_in, buf, sz, &error); + machine_int_t out_sz = o->type->stream_p->write(self_in, bufinfo.buf, bufinfo.len, &error); if (out_sz == -1) { nlr_raise(mp_obj_new_exception_msg_varg(&mp_type_OSError, "[Errno %d]", error)); } else { From 12a9cfed7693a3e2495e619ec90c922c6ecb0908 Mon Sep 17 00:00:00 2001 From: Paul Sokolovsky Date: Sat, 26 Apr 2014 05:53:02 +0300 Subject: [PATCH 2/4] modsocket: Make .send() support arbitrary objects with buffer interface. This is CPython-compliant (except that CPython doesn't support buffer protocol for str). --- unix/modsocket.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/unix/modsocket.c b/unix/modsocket.c index b7d167c8ad..4c76502da6 100644 --- a/unix/modsocket.c +++ b/unix/modsocket.c @@ -148,9 +148,9 @@ STATIC mp_obj_t socket_send(uint n_args, const mp_obj_t *args) { flags = MP_OBJ_SMALL_INT_VALUE(args[2]); } - uint sz; - const char *buf = mp_obj_str_get_data(args[1], &sz); - int out_sz = send(self->fd, buf, sz, flags); + mp_buffer_info_t bufinfo; + mp_get_buffer_raise(args[1], &bufinfo, MP_BUFFER_READ); + int out_sz = send(self->fd, bufinfo.buf, bufinfo.len, flags); RAISE_ERRNO(out_sz, errno); return MP_OBJ_NEW_SMALL_INT((machine_int_t)out_sz); From 881078403e9edf63f3145f2a484a58d5a74f74f1 Mon Sep 17 00:00:00 2001 From: Paul Sokolovsky Date: Sat, 26 Apr 2014 06:20:08 +0300 Subject: [PATCH 3/4] objstr: Implement .lstrip() & .rstrip(). Share code with .strip(). TODO: optimize .rstrip(). --- py/objstr.c | 30 ++++++++++++++++++++++++++++-- py/qstrdefs.h | 2 ++ tests/basics/string_strip.py | 6 ++++++ 3 files changed, 36 insertions(+), 2 deletions(-) diff --git a/py/objstr.c b/py/objstr.c index b0d5cba6f1..b5504da1b5 100644 --- a/py/objstr.c +++ b/py/objstr.c @@ -493,7 +493,9 @@ STATIC mp_obj_t str_startswith(mp_obj_t self_in, mp_obj_t arg) { return MP_BOOL(memcmp(str, prefix, prefix_len) == 0); } -STATIC mp_obj_t str_strip(uint n_args, const mp_obj_t *args) { +enum { LSTRIP, RSTRIP, STRIP }; + +STATIC mp_obj_t str_uni_strip(int type, uint n_args, const mp_obj_t *args) { assert(1 <= n_args && n_args <= 2); assert(MP_OBJ_IS_STR(args[0])); @@ -516,13 +518,18 @@ STATIC mp_obj_t str_strip(uint n_args, const mp_obj_t *args) { machine_uint_t first_good_char_pos = 0; bool first_good_char_pos_set = false; machine_uint_t last_good_char_pos = 0; + // TODO: For RSPLIT, scan from end for (machine_uint_t i = 0; i < orig_str_len; i++) { if (find_subbytes(chars_to_del, chars_to_del_len, &orig_str[i], 1, 1) == NULL) { - last_good_char_pos = i; if (!first_good_char_pos_set) { first_good_char_pos = i; + if (type == LSTRIP) { + last_good_char_pos = orig_str_len - 1; + break; + } first_good_char_pos_set = true; } + last_good_char_pos = i; } } @@ -532,11 +539,26 @@ STATIC mp_obj_t str_strip(uint n_args, const mp_obj_t *args) { } assert(last_good_char_pos >= first_good_char_pos); + if (type == RSTRIP) { + first_good_char_pos = 0; + } //+1 to accomodate the last character machine_uint_t stripped_len = last_good_char_pos - first_good_char_pos + 1; return mp_obj_new_str(orig_str + first_good_char_pos, stripped_len, false); } +STATIC mp_obj_t str_strip(uint n_args, const mp_obj_t *args) { + return str_uni_strip(STRIP, n_args, args); +} + +STATIC mp_obj_t str_lstrip(uint n_args, const mp_obj_t *args) { + return str_uni_strip(LSTRIP, n_args, args); +} + +STATIC mp_obj_t str_rstrip(uint n_args, const mp_obj_t *args) { + return str_uni_strip(RSTRIP, n_args, args); +} + // Takes an int arg, but only parses unsigned numbers, and only changes // *num if at least one digit was parsed. static int str_to_int(const char *str, int *num) { @@ -1354,6 +1376,8 @@ STATIC MP_DEFINE_CONST_FUN_OBJ_2(str_join_obj, str_join); STATIC MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_split_obj, 1, 3, str_split); STATIC MP_DEFINE_CONST_FUN_OBJ_2(str_startswith_obj, str_startswith); STATIC MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_strip_obj, 1, 2, str_strip); +STATIC MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_lstrip_obj, 1, 2, str_lstrip); +STATIC MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_rstrip_obj, 1, 2, str_rstrip); STATIC MP_DEFINE_CONST_FUN_OBJ_VAR(str_format_obj, 1, mp_obj_str_format); STATIC MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_replace_obj, 3, 4, str_replace); STATIC MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_count_obj, 2, 4, str_count); @@ -1373,6 +1397,8 @@ STATIC const mp_map_elem_t str_locals_dict_table[] = { { MP_OBJ_NEW_QSTR(MP_QSTR_split), (mp_obj_t)&str_split_obj }, { MP_OBJ_NEW_QSTR(MP_QSTR_startswith), (mp_obj_t)&str_startswith_obj }, { MP_OBJ_NEW_QSTR(MP_QSTR_strip), (mp_obj_t)&str_strip_obj }, + { MP_OBJ_NEW_QSTR(MP_QSTR_lstrip), (mp_obj_t)&str_lstrip_obj }, + { MP_OBJ_NEW_QSTR(MP_QSTR_rstrip), (mp_obj_t)&str_rstrip_obj }, { MP_OBJ_NEW_QSTR(MP_QSTR_format), (mp_obj_t)&str_format_obj }, { MP_OBJ_NEW_QSTR(MP_QSTR_replace), (mp_obj_t)&str_replace_obj }, { MP_OBJ_NEW_QSTR(MP_QSTR_count), (mp_obj_t)&str_count_obj }, diff --git a/py/qstrdefs.h b/py/qstrdefs.h index 784bf59d23..bcbbcf98e9 100644 --- a/py/qstrdefs.h +++ b/py/qstrdefs.h @@ -180,6 +180,8 @@ Q(pop) Q(sort) Q(join) Q(strip) +Q(lstrip) +Q(rstrip) Q(format) Q(key) Q(reverse) diff --git a/tests/basics/string_strip.py b/tests/basics/string_strip.py index 518dfd66ec..8e03eff93a 100644 --- a/tests/basics/string_strip.py +++ b/tests/basics/string_strip.py @@ -4,3 +4,9 @@ print(" T E S T".strip()) print("abcabc".strip("ce")) print("aaa".strip("b")) print("abc efg ".strip("g a")) + +print(' spacious '.lstrip()) +print('www.example.com'.lstrip('cmowz.')) + +print(' spacious '.rstrip()) +print('mississippi'.rstrip('ipz')) From e14d096cb7a30b956f6c5b610c6854a6b17d0592 Mon Sep 17 00:00:00 2001 From: Paul Sokolovsky Date: Sat, 26 Apr 2014 06:48:31 +0300 Subject: [PATCH 4/4] objstr: Optimize .rstrip() by scanning string from end. --- py/objstr.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/py/objstr.c b/py/objstr.c index b5504da1b5..6819a4ad68 100644 --- a/py/objstr.c +++ b/py/objstr.c @@ -518,19 +518,29 @@ STATIC mp_obj_t str_uni_strip(int type, uint n_args, const mp_obj_t *args) { machine_uint_t first_good_char_pos = 0; bool first_good_char_pos_set = false; machine_uint_t last_good_char_pos = 0; - // TODO: For RSPLIT, scan from end - for (machine_uint_t i = 0; i < orig_str_len; i++) { + machine_uint_t i = 0; + machine_int_t delta = 1; + if (type == RSTRIP) { + i = orig_str_len - 1; + delta = -1; + } + for (machine_uint_t len = orig_str_len; len > 0; len--) { if (find_subbytes(chars_to_del, chars_to_del_len, &orig_str[i], 1, 1) == NULL) { if (!first_good_char_pos_set) { first_good_char_pos = i; if (type == LSTRIP) { last_good_char_pos = orig_str_len - 1; break; + } else if (type == RSTRIP) { + first_good_char_pos = 0; + last_good_char_pos = i; + break; } first_good_char_pos_set = true; } last_good_char_pos = i; } + i += delta; } if (first_good_char_pos == 0 && last_good_char_pos == 0) { @@ -539,9 +549,6 @@ STATIC mp_obj_t str_uni_strip(int type, uint n_args, const mp_obj_t *args) { } assert(last_good_char_pos >= first_good_char_pos); - if (type == RSTRIP) { - first_good_char_pos = 0; - } //+1 to accomodate the last character machine_uint_t stripped_len = last_good_char_pos - first_good_char_pos + 1; return mp_obj_new_str(orig_str + first_good_char_pos, stripped_len, false);