py/objstr: Don't treat bytes as unicode in str.count.
`b'\xaa \xaa'.count(b'\xaa')` now (correctly) returns 2 instead of 1. Fixes issue #9404. This work was funded through GitHub Sponsors. Signed-off-by: Jim Mussared <jim.mussared@gmail.com>
This commit is contained in:
parent
dd9dcb594c
commit
9d6f474ea4
|
@ -1768,6 +1768,8 @@ STATIC mp_obj_t str_count(size_t n_args, const mp_obj_t *args) {
|
||||||
return MP_OBJ_NEW_SMALL_INT(utf8_charlen(start, end - start) + 1);
|
return MP_OBJ_NEW_SMALL_INT(utf8_charlen(start, end - start) + 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool is_str = self_type == &mp_type_str;
|
||||||
|
|
||||||
// count the occurrences
|
// count the occurrences
|
||||||
mp_int_t num_occurrences = 0;
|
mp_int_t num_occurrences = 0;
|
||||||
for (const byte *haystack_ptr = start; haystack_ptr + needle_len <= end;) {
|
for (const byte *haystack_ptr = start; haystack_ptr + needle_len <= end;) {
|
||||||
|
@ -1775,7 +1777,7 @@ STATIC mp_obj_t str_count(size_t n_args, const mp_obj_t *args) {
|
||||||
num_occurrences++;
|
num_occurrences++;
|
||||||
haystack_ptr += needle_len;
|
haystack_ptr += needle_len;
|
||||||
} else {
|
} else {
|
||||||
haystack_ptr = utf8_next_char(haystack_ptr);
|
haystack_ptr = is_str ? utf8_next_char(haystack_ptr) : haystack_ptr + 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -48,6 +48,13 @@ print(b"aaaa".count(b'a', 1, 5))
|
||||||
print(b"aaaa".count(b'a', -1, 5))
|
print(b"aaaa".count(b'a', -1, 5))
|
||||||
print(b"abbabba".count(b"abba"))
|
print(b"abbabba".count(b"abba"))
|
||||||
|
|
||||||
|
print(b'\xaa \xaa'.count(b'\xaa'))
|
||||||
|
print(b'\xaa \xaa \xaa \xaa'.count(b'\xaa'))
|
||||||
|
print(b'\xaa \xaa \xaa \xaa'.count(b'\xaa'), 1)
|
||||||
|
print(b'\xaa \xaa \xaa \xaa'.count(b'\xaa'), 2)
|
||||||
|
print(b'\xaa \xaa \xaa \xaa'.count(b'\xaa'), 1, 3)
|
||||||
|
print(b'\xaa \xaa \xaa \xaa'.count(b'\xaa'), 2, 3)
|
||||||
|
|
||||||
def t():
|
def t():
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue