From 06f22cb59c493371c8a394680916e76f9fedf604 Mon Sep 17 00:00:00 2001 From: Jeff Epler Date: Fri, 12 Aug 2022 08:25:27 -0500 Subject: [PATCH 1/2] When reading data from a file into a str, check if it's utf-8 Otherwise, weird stuff can happen down the line when it is print()ed, especially as it can break the webrepl of circuitpython. --- py/stream.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/py/stream.c b/py/stream.c index 27609043fc..e7e77341bd 100644 --- a/py/stream.c +++ b/py/stream.c @@ -31,6 +31,7 @@ #include "py/objstr.h" #include "py/stream.h" #include "py/runtime.h" +#include "py/unicode.h" #include "supervisor/shared/translate/translate.h" // This file defines generic Python stream read/write methods which @@ -43,6 +44,13 @@ STATIC mp_obj_t stream_readall(mp_obj_t self_in); #define STREAM_CONTENT_TYPE(stream) (((stream)->is_text) ? &mp_type_str : &mp_type_bytes) +static mp_obj_t mp_obj_new_str_from_vstr_check(const mp_obj_type_t *type, vstr_t *vstr) { + if (type == &mp_type_str && !utf8_check((void *)vstr->buf, vstr->len)) { + mp_raise_msg(&mp_type_UnicodeError, NULL); + } + return mp_obj_new_str_from_vstr(type, vstr); +} + // Returns error condition in *errcode, if non-zero, return value is number of bytes written // before error condition occurred. If *errcode == 0, returns total bytes written (which will // be equal to input size). @@ -201,8 +209,7 @@ STATIC mp_obj_t stream_read_generic(size_t n_args, const mp_obj_t *args, byte fl } } } - - return mp_obj_new_str_from_vstr(&mp_type_str, &vstr); + return mp_obj_new_str_from_vstr_check(&mp_type_str, &vstr); } #endif @@ -223,7 +230,7 @@ STATIC mp_obj_t stream_read_generic(size_t n_args, const mp_obj_t *args, byte fl mp_raise_OSError(error); } else { vstr.len = out_sz; - return mp_obj_new_str_from_vstr(STREAM_CONTENT_TYPE(stream_p), &vstr); + return mp_obj_new_str_from_vstr_check(STREAM_CONTENT_TYPE(stream_p), &vstr); } } @@ -364,7 +371,7 @@ STATIC mp_obj_t stream_readall(mp_obj_t self_in) { } vstr.len = total_size; - return mp_obj_new_str_from_vstr(STREAM_CONTENT_TYPE(stream_p), &vstr); + return mp_obj_new_str_from_vstr_check(STREAM_CONTENT_TYPE(stream_p), &vstr); } // Unbuffered, inefficient implementation of readline() for raw I/O files. @@ -417,7 +424,7 @@ STATIC mp_obj_t stream_unbuffered_readline(size_t n_args, const mp_obj_t *args) } } - return mp_obj_new_str_from_vstr(STREAM_CONTENT_TYPE(stream_p), &vstr); + return mp_obj_new_str_from_vstr_check(STREAM_CONTENT_TYPE(stream_p), &vstr); } MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(mp_stream_unbuffered_readline_obj, 1, 2, stream_unbuffered_readline); From 606c75ab62491c6620c6f75c9f5b88157b9736d6 Mon Sep 17 00:00:00 2001 From: Jeff Epler Date: Fri, 12 Aug 2022 08:34:33 -0500 Subject: [PATCH 2/2] test must read data in binary mode --- tests/extmod/qrio.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/extmod/qrio.py b/tests/extmod/qrio.py index 5e9ed4c12a..53c83706f8 100644 --- a/tests/extmod/qrio.py +++ b/tests/extmod/qrio.py @@ -5,7 +5,7 @@ except: raise SystemExit loc = __file__.rsplit("/", 1)[0] -with open(f"{loc}/data/qr.pgm") as f: +with open(f"{loc}/data/qr.pgm", "rb") as f: content = f.read()[-320 * 240 :] decoder = qrio.QRDecoder(320, 240)