From e82940697c777160fc382e4927b60b3ac9d3f447 Mon Sep 17 00:00:00 2001 From: Jeff Epler Date: Tue, 8 Sep 2020 20:54:47 -0500 Subject: [PATCH] Fix decompression of unicode values above 2047 Two problems: The lead byte for 3-byte sequences was wrong, and one mid-byte was not even filled in due to a missing "++"! Apparently this was broken ever since the first "Compress as unicode, not bytes" commit, but I believed I'd "tested" it by running on the Pinyin translation. This rendered at least the Korean and Japanese translations completely illegible, affecting 5.0 and all later releases. --- supervisor/shared/translate.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/supervisor/shared/translate.c b/supervisor/shared/translate.c index 187d5ff8a5..6218ff461b 100644 --- a/supervisor/shared/translate.c +++ b/supervisor/shared/translate.c @@ -51,8 +51,8 @@ STATIC int put_utf8(char *buf, int u) { *buf = 0b10000000 | (u & 0b00111111); return 2; } else { // u <= 0xffff) - *buf++ = 0b11000000 | (u >> 12); - *buf = 0b10000000 | ((u >> 6) & 0b00111111); + *buf++ = 0b11100000 | (u >> 12); + *buf++ = 0b10000000 | ((u >> 6) & 0b00111111); *buf = 0b10000000 | (u & 0b00111111); return 3; }