From 69135219111be239d6088457604dcfc185e6ceee Mon Sep 17 00:00:00 2001 From: Paul Sokolovsky Date: Sat, 10 May 2014 19:47:41 +0300 Subject: [PATCH] objstr: Implement .lower() and .upper(). --- py/misc.h | 2 ++ py/objstr.c | 30 ++++++++++++++++++++++++++++++ py/qstrdefs.h | 2 ++ py/unicode.c | 16 +++++++++++++++- tests/basics/string_upperlow.py | 4 ++++ 5 files changed, 53 insertions(+), 1 deletion(-) create mode 100644 tests/basics/string_upperlow.py diff --git a/py/misc.h b/py/misc.h index 7f3e83d8c6..013b5f123e 100644 --- a/py/misc.h +++ b/py/misc.h @@ -96,6 +96,8 @@ bool unichar_isalpha(unichar c); bool unichar_isprint(unichar c); bool unichar_isdigit(unichar c); bool unichar_isxdigit(unichar c); +unichar unichar_tolower(unichar c); +unichar unichar_toupper(unichar c); /** variable string *********************************************/ diff --git a/py/objstr.c b/py/objstr.c index e42b21fc6c..26c6edde9e 100644 --- a/py/objstr.c +++ b/py/objstr.c @@ -1365,6 +1365,32 @@ STATIC mp_obj_t str_rpartition(mp_obj_t self_in, mp_obj_t arg) { return str_partitioner(self_in, arg, -1); } +enum { CASE_UPPER, CASE_LOWER }; + +// Supposedly not too critical operations, so optimize for code size +STATIC mp_obj_t str_caseconv(int op, mp_obj_t self_in) { + GET_STR_DATA_LEN(self_in, self_data, self_len); + byte *data; + mp_obj_t s = mp_obj_str_builder_start(mp_obj_get_type(self_in), self_len, &data); + for (int i = 0; i < self_len; i++) { + if (op == CASE_UPPER) { + *data++ = unichar_toupper(*self_data++); + } else { + *data++ = unichar_tolower(*self_data++); + } + } + *data = 0; + return mp_obj_str_builder_end(s); +} + +STATIC mp_obj_t str_lower(mp_obj_t self_in) { + return str_caseconv(CASE_LOWER, self_in); +} + +STATIC mp_obj_t str_upper(mp_obj_t self_in) { + return str_caseconv(CASE_UPPER, self_in); +} + #if MICROPY_CPYTHON_COMPAT // These methods are superfluous in the presense of str() and bytes() // constructors. @@ -1428,6 +1454,8 @@ STATIC MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_replace_obj, 3, 4, str_replace); STATIC MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_count_obj, 2, 4, str_count); STATIC MP_DEFINE_CONST_FUN_OBJ_2(str_partition_obj, str_partition); STATIC MP_DEFINE_CONST_FUN_OBJ_2(str_rpartition_obj, str_rpartition); +STATIC MP_DEFINE_CONST_FUN_OBJ_1(str_lower_obj, str_lower); +STATIC MP_DEFINE_CONST_FUN_OBJ_1(str_upper_obj, str_upper); STATIC const mp_map_elem_t str_locals_dict_table[] = { #if MICROPY_CPYTHON_COMPAT @@ -1449,6 +1477,8 @@ STATIC const mp_map_elem_t str_locals_dict_table[] = { { MP_OBJ_NEW_QSTR(MP_QSTR_count), (mp_obj_t)&str_count_obj }, { MP_OBJ_NEW_QSTR(MP_QSTR_partition), (mp_obj_t)&str_partition_obj }, { MP_OBJ_NEW_QSTR(MP_QSTR_rpartition), (mp_obj_t)&str_rpartition_obj }, + { MP_OBJ_NEW_QSTR(MP_QSTR_lower), (mp_obj_t)&str_lower_obj }, + { MP_OBJ_NEW_QSTR(MP_QSTR_upper), (mp_obj_t)&str_upper_obj }, }; STATIC MP_DEFINE_CONST_DICT(str_locals_dict, str_locals_dict_table); diff --git a/py/qstrdefs.h b/py/qstrdefs.h index 7de2491fa8..13476b3be8 100644 --- a/py/qstrdefs.h +++ b/py/qstrdefs.h @@ -239,6 +239,8 @@ Q(startswith) Q(replace) Q(partition) Q(rpartition) +Q(lower) +Q(upper) Q(iterable) Q(start) diff --git a/py/unicode.c b/py/unicode.c index fff6030fc0..1cd82f3be8 100644 --- a/py/unicode.c +++ b/py/unicode.c @@ -97,6 +97,7 @@ bool unichar_isxdigit(unichar c) { bool char_is_alpha_or_digit(unichar c) { return c < 128 && (attr[c] & (FL_ALPHA | FL_DIGIT)) != 0; } +*/ bool char_is_upper(unichar c) { return c < 128 && (attr[c] & FL_UPPER) != 0; @@ -105,4 +106,17 @@ bool char_is_upper(unichar c) { bool char_is_lower(unichar c) { return c < 128 && (attr[c] & FL_LOWER) != 0; } -*/ + +unichar unichar_tolower(unichar c) { + if (char_is_upper(c)) { + return c + 0x20; + } + return c; +} + +unichar unichar_toupper(unichar c) { + if (char_is_lower(c)) { + return c - 0x20; + } + return c; +} diff --git a/tests/basics/string_upperlow.py b/tests/basics/string_upperlow.py new file mode 100644 index 0000000000..950ea24d11 --- /dev/null +++ b/tests/basics/string_upperlow.py @@ -0,0 +1,4 @@ +print("".lower()) +print(" t\tn\nr\rv\vf\f".upper()) +print(" T E S T".lower()) +print("*@a1b2cabc_[]/\\".upper())