diff --git a/py/lexer.c b/py/lexer.c index 13c3a3e9b5..8a8875ed51 100644 --- a/py/lexer.c +++ b/py/lexer.c @@ -492,11 +492,19 @@ STATIC void mp_lexer_next_token_into(mp_lexer_t *lex, bool first_token) { } } if (c != MP_LEXER_EOF) { + #if MICROPY_PY_BUILTINS_STR_UNICODE if (c < 0x110000 && !is_bytes) { vstr_add_char(&lex->vstr, c); } else if (c < 0x100 && is_bytes) { vstr_add_byte(&lex->vstr, c); - } else { + } + #else + // without unicode everything is just added as an 8-bit byte + if (c < 0x100) { + vstr_add_byte(&lex->vstr, c); + } + #endif + else { assert(!"TODO: Throw an error, invalid escape code probably"); } } diff --git a/py/misc.h b/py/misc.h index 42d30055f9..b9ef68badd 100644 --- a/py/misc.h +++ b/py/misc.h @@ -92,7 +92,15 @@ size_t m_get_peak_bytes_allocated(void); /** unichar / UTF-8 *********************************************/ -typedef int unichar; // TODO +#if MICROPY_PY_BUILTINS_STR_UNICODE +#include // only include if we need it +// with unicode enabled we need a type which can fit chars up to 0x10ffff +typedef uint32_t unichar; +#else +// without unicode enabled we can only need to fit chars up to 0xff +// (on 16-bit archs uint is 16-bits and more efficient than uint32_t) +typedef uint unichar; +#endif unichar utf8_get_char(const byte *s); const byte *utf8_next_char(const byte *s); diff --git a/py/modbuiltins.c b/py/modbuiltins.c index 68a22934b1..d537d49dea 100644 --- a/py/modbuiltins.c +++ b/py/modbuiltins.c @@ -182,11 +182,11 @@ STATIC mp_obj_t mp_builtin_chr(mp_obj_t o_in) { return mp_obj_new_str(str, len, true); #else mp_int_t ord = mp_obj_get_int(o_in); - if (0 <= ord && ord <= 0x10ffff) { + if (0 <= ord && ord <= 0xff) { char str[1] = {ord}; return mp_obj_new_str(str, 1, true); } else { - nlr_raise(mp_obj_new_exception_msg(&mp_type_ValueError, "chr() arg not in range(0x110000)")); + nlr_raise(mp_obj_new_exception_msg(&mp_type_ValueError, "chr() arg not in range(256)")); } #endif }