py/objstr: Make dedicated splitlines function, supporting diff newlines.

It now supports \n, \r and \r\n as newline separators.

Adds 56 bytes to stmhal and 80 bytes to unix x86-64.

Fixes issue #1689.
This commit is contained in:
Damien George 2016-05-13 12:21:32 +01:00
parent 1e388079f9
commit cc80c4dd59
2 changed files with 63 additions and 29 deletions

View File

@ -464,9 +464,7 @@ STATIC mp_obj_t str_join(mp_obj_t self_in, mp_obj_t arg) {
return mp_obj_new_str_from_vstr(self_type, &vstr);
}
enum {SPLIT = 0, KEEP = 1, SPLITLINES = 2};
STATIC inline mp_obj_t str_split_internal(mp_uint_t n_args, const mp_obj_t *args, int type) {
mp_obj_t mp_obj_str_split(size_t n_args, const mp_obj_t *args) {
const mp_obj_type_t *self_type = mp_obj_get_type(args[0]);
mp_int_t splits = -1;
mp_obj_t sep = mp_const_none;
@ -527,13 +525,7 @@ STATIC inline mp_obj_t str_split_internal(mp_uint_t n_args, const mp_obj_t *args
}
s++;
}
mp_uint_t sub_len = s - start;
if (MP_LIKELY(!(sub_len == 0 && s == top && (type && SPLITLINES)))) {
if (start + sub_len != top && (type & KEEP)) {
sub_len++;
}
mp_obj_list_append(res, mp_obj_new_str_of_type(self_type, start, sub_len));
}
mp_obj_list_append(res, mp_obj_new_str_of_type(self_type, start, s - start));
if (s >= top) {
break;
}
@ -547,25 +539,49 @@ STATIC inline mp_obj_t str_split_internal(mp_uint_t n_args, const mp_obj_t *args
return res;
}
mp_obj_t mp_obj_str_split(size_t n_args, const mp_obj_t *args) {
return str_split_internal(n_args, args, SPLIT);
}
#if MICROPY_PY_BUILTINS_STR_SPLITLINES
STATIC mp_obj_t str_splitlines(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args) {
enum { ARG_keepends };
static const mp_arg_t allowed_args[] = {
{ MP_QSTR_keepends, MP_ARG_BOOL, {.u_bool = false} },
};
// parse args
struct {
mp_arg_val_t keepends;
} args;
mp_arg_parse_all(n_args - 1, pos_args + 1, kw_args,
MP_ARRAY_SIZE(allowed_args), allowed_args, (mp_arg_val_t*)&args);
mp_arg_val_t args[MP_ARRAY_SIZE(allowed_args)];
mp_arg_parse_all(n_args - 1, pos_args + 1, kw_args, MP_ARRAY_SIZE(allowed_args), allowed_args, args);
mp_obj_t new_args[2] = {pos_args[0], MP_OBJ_NEW_QSTR(MP_QSTR__0x0a_)};
return str_split_internal(2, new_args, SPLITLINES | (args.keepends.u_bool ? KEEP : 0));
const mp_obj_type_t *self_type = mp_obj_get_type(pos_args[0]);
mp_obj_t res = mp_obj_new_list(0, NULL);
GET_STR_DATA_LEN(pos_args[0], s, len);
const byte *top = s + len;
while (s < top) {
const byte *start = s;
size_t match = 0;
while (s < top) {
if (*s == '\n') {
match = 1;
break;
} else if (*s == '\r') {
if (s[1] == '\n') {
match = 2;
} else {
match = 1;
}
break;
}
s++;
}
size_t sub_len = s - start;
if (args[ARG_keepends].u_bool) {
sub_len += match;
}
mp_obj_list_append(res, mp_obj_new_str_of_type(self_type, start, sub_len));
s += match;
}
return res;
}
#endif

View File

@ -1,13 +1,31 @@
try:
str.splitlines
except:
import sys
print("SKIP")
sys.exit()
# test string.splitlines() method
# test \n as newline
print("foo\nbar".splitlines())
print("foo\nbar\n".splitlines())
print("foo and\nbar\n".splitlines())
print("foo\nbar\n\n".splitlines())
print("foo\n\nbar\n\n".splitlines())
print("\nfoo\nbar\n".splitlines())
# test \r as newline
print("foo\rbar\r".splitlines())
print("\rfoo and\r\rbar\r".splitlines())
# test \r\n as newline
print("foo\r\nbar\r\n".splitlines())
print("\r\nfoo and\r\n\r\nbar\r\n".splitlines())
# test keepends arg
print("foo\nbar".splitlines(True))
print("foo\nbar\n".splitlines(True))
print("foo\nbar".splitlines(keepends=True))
print("foo\nbar\n".splitlines(keepends=True))
print("foo\nbar\n\n".splitlines(True))
print("foo\rbar".splitlines(keepends=True))
print("foo\rbar\r\r".splitlines(keepends=True))
print("foo\r\nbar".splitlines(keepends=True))
print("foo\r\nbar\r\n\r\n".splitlines(keepends=True))
# test splitting bytes objects
print(b"foo\nbar".splitlines())
print(b"foo\nbar\n".splitlines())
print(b"foo\r\nbar\r\n\r\n".splitlines(True))