objstr.c: Partial implementation of .rsplit().
sep=None is TODO.
This commit is contained in:
parent
51fab28e94
commit
2a27365854
66
py/objstr.c
66
py/objstr.c
@ -38,6 +38,7 @@
|
||||
#include "runtime.h"
|
||||
#include "pfenv.h"
|
||||
#include "objstr.h"
|
||||
#include "objlist.h"
|
||||
|
||||
STATIC mp_obj_t str_modulo_format(mp_obj_t pattern, uint n_args, const mp_obj_t *args);
|
||||
const mp_obj_t mp_const_empty_bytes;
|
||||
@ -483,6 +484,69 @@ STATIC mp_obj_t str_split(uint n_args, const mp_obj_t *args) {
|
||||
return res;
|
||||
}
|
||||
|
||||
STATIC mp_obj_t str_rsplit(uint n_args, const mp_obj_t *args) {
|
||||
if (n_args < 3) {
|
||||
// If we don't have split limit, it doesn't matter from which side
|
||||
// we split.
|
||||
return str_split(n_args, args);
|
||||
}
|
||||
const mp_obj_type_t *self_type = mp_obj_get_type(args[0]);
|
||||
mp_obj_t sep = args[1];
|
||||
GET_STR_DATA_LEN(args[0], s, len);
|
||||
|
||||
machine_int_t splits = mp_obj_get_int(args[2]);
|
||||
machine_int_t org_splits = splits;
|
||||
// Preallocate list to the max expected # of elements, as we
|
||||
// will fill it from the end.
|
||||
mp_obj_list_t *res = mp_obj_new_list(splits + 1, NULL);
|
||||
int idx = splits;
|
||||
|
||||
if (sep == mp_const_none) {
|
||||
// TODO
|
||||
assert(0);
|
||||
} else {
|
||||
uint sep_len;
|
||||
const char *sep_str = mp_obj_str_get_data(sep, &sep_len);
|
||||
|
||||
if (sep_len == 0) {
|
||||
nlr_raise(mp_obj_new_exception_msg(&mp_type_ValueError, "empty separator"));
|
||||
}
|
||||
|
||||
const byte *beg = s;
|
||||
const byte *last = s + len;
|
||||
for (;;) {
|
||||
s = last - sep_len;
|
||||
for (;;) {
|
||||
if (splits == 0 || s < beg) {
|
||||
break;
|
||||
} else if (memcmp(s, sep_str, sep_len) == 0) {
|
||||
break;
|
||||
}
|
||||
s--;
|
||||
}
|
||||
if (s < beg || splits == 0) {
|
||||
res->items[idx] = str_new(self_type, beg, last - beg);
|
||||
break;
|
||||
}
|
||||
res->items[idx--] = str_new(self_type, s + sep_len, last - s - sep_len);
|
||||
last = s;
|
||||
if (splits > 0) {
|
||||
splits--;
|
||||
}
|
||||
}
|
||||
if (idx != 0) {
|
||||
// We split less parts than split limit, now go cleanup surplus
|
||||
int used = org_splits + 1 - idx;
|
||||
memcpy(res->items, &res->items[idx], used * sizeof(mp_obj_t));
|
||||
mp_seq_clear(res->items, used, res->alloc, sizeof(*res->items));
|
||||
res->len = used;
|
||||
}
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
STATIC mp_obj_t str_finder(uint n_args, const mp_obj_t *args, machine_int_t direction, bool is_index) {
|
||||
assert(2 <= n_args && n_args <= 4);
|
||||
assert(MP_OBJ_IS_STR(args[0]));
|
||||
@ -1460,6 +1524,7 @@ STATIC MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_index_obj, 2, 4, str_index);
|
||||
STATIC MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_rindex_obj, 2, 4, str_rindex);
|
||||
STATIC MP_DEFINE_CONST_FUN_OBJ_2(str_join_obj, str_join);
|
||||
STATIC MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_split_obj, 1, 3, str_split);
|
||||
STATIC MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_rsplit_obj, 1, 3, str_rsplit);
|
||||
STATIC MP_DEFINE_CONST_FUN_OBJ_2(str_startswith_obj, str_startswith);
|
||||
STATIC MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_strip_obj, 1, 2, str_strip);
|
||||
STATIC MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_lstrip_obj, 1, 2, str_lstrip);
|
||||
@ -1483,6 +1548,7 @@ STATIC const mp_map_elem_t str_locals_dict_table[] = {
|
||||
{ MP_OBJ_NEW_QSTR(MP_QSTR_rindex), (mp_obj_t)&str_rindex_obj },
|
||||
{ MP_OBJ_NEW_QSTR(MP_QSTR_join), (mp_obj_t)&str_join_obj },
|
||||
{ MP_OBJ_NEW_QSTR(MP_QSTR_split), (mp_obj_t)&str_split_obj },
|
||||
{ MP_OBJ_NEW_QSTR(MP_QSTR_rsplit), (mp_obj_t)&str_rsplit_obj },
|
||||
{ MP_OBJ_NEW_QSTR(MP_QSTR_startswith), (mp_obj_t)&str_startswith_obj },
|
||||
{ MP_OBJ_NEW_QSTR(MP_QSTR_strip), (mp_obj_t)&str_strip_obj },
|
||||
{ MP_OBJ_NEW_QSTR(MP_QSTR_lstrip), (mp_obj_t)&str_lstrip_obj },
|
||||
|
@ -236,6 +236,7 @@ Q(find)
|
||||
Q(rfind)
|
||||
Q(rindex)
|
||||
Q(split)
|
||||
Q(rsplit)
|
||||
Q(startswith)
|
||||
Q(replace)
|
||||
Q(partition)
|
||||
|
42
tests/basics/string_rsplit.py
Normal file
42
tests/basics/string_rsplit.py
Normal file
@ -0,0 +1,42 @@
|
||||
# default separator (whitespace)
|
||||
print("a b".rsplit())
|
||||
#print(" a b ".rsplit(None))
|
||||
#print(" a b ".rsplit(None, 1))
|
||||
#print(" a b ".rsplit(None, 2))
|
||||
#print(" a b c ".rsplit(None, 1))
|
||||
#print(" a b c ".rsplit(None, 0))
|
||||
#print(" a b c ".rsplit(None, -1))
|
||||
|
||||
# empty separator should fail
|
||||
try:
|
||||
"abc".rsplit('')
|
||||
except ValueError:
|
||||
print("ValueError")
|
||||
|
||||
# non-empty separator
|
||||
print("abc".rsplit("a"))
|
||||
print("abc".rsplit("b"))
|
||||
print("abc".rsplit("c"))
|
||||
print("abc".rsplit("z"))
|
||||
print("abc".rsplit("ab"))
|
||||
print("abc".rsplit("bc"))
|
||||
print("abc".rsplit("abc"))
|
||||
print("abc".rsplit("abcd"))
|
||||
print("abcabc".rsplit("bc"))
|
||||
print("abcabc".rsplit("bc", 0))
|
||||
print("abcabc".rsplit("bc", 1))
|
||||
print("abcabc".rsplit("bc", 2))
|
||||
|
||||
print("10/11/12".rsplit("/", 1))
|
||||
print("10/11/12".rsplit("/", 2))
|
||||
print("10/11/12".rsplit("/", 3))
|
||||
print("10/11/12".rsplit("/", 4))
|
||||
print("10/11/12".rsplit("/", 5))
|
||||
|
||||
print("/*10/*11/*12/*".rsplit("/*", 1))
|
||||
print("/*10/*11/*12/*".rsplit("/*", 2))
|
||||
print("/*10/*11/*12/*".rsplit("/*", 3))
|
||||
print("/*10/*11/*12/*".rsplit("/*", 4))
|
||||
print("/*10/*11/*12/*".rsplit("/*", 5))
|
||||
|
||||
print(b"abcabc".rsplit(b"bc", 2))
|
Loading…
x
Reference in New Issue
Block a user