Turn on unicode for FATFS

This also tweaks the repr for unicode strings to only escape a few
utf-8 code points. This makes emoji show in os.listdir() for
example.

Also, enable exfat support on full builds.

Fixes #5146
This commit is contained in:
Scott Shawcroft 2021-08-17 17:41:59 -07:00
parent 5b0009cbc4
commit 11f1c42bb5
No known key found for this signature in database
GPG Key ID: 0DFD512649C052DA
5 changed files with 30 additions and 6 deletions

View File

@ -1175,6 +1175,7 @@ static DWORD get_fat ( /* 0xFFFFFFFF:Disk error, 1:Internal error, 2..0x7FF
break; break;
} }
} }
MP_FALLTHROUGH
/* go to default */ /* go to default */
#endif #endif
default: default:
@ -5518,6 +5519,7 @@ FRESULT f_mkfs (
} }
st = 1; /* Do not compress short run */ st = 1; /* Do not compress short run */
/* go to next case */ /* go to next case */
MP_FALLTHROUGH
case 1: case 1:
ch = si++; /* Fill the short run */ ch = si++; /* Fill the short run */
if (--j == 0) st = 0; if (--j == 0) st = 0;

View File

@ -163,8 +163,11 @@
/ memory for the working buffer, memory management functions, ff_memalloc() and / memory for the working buffer, memory management functions, ff_memalloc() and
/ ff_memfree() in ffsystem.c, need to be added to the project. */ / ff_memfree() in ffsystem.c, need to be added to the project. */
#ifdef MICROPY_FATFS_LFN_UNICODE
#define FF_LFN_UNICODE (MICROPY_FATFS_LFN_UNICODE)
#else
#define FF_LFN_UNICODE 0 #define FF_LFN_UNICODE 0
#endif
/* This option switches the character encoding on the API when LFN is enabled. /* This option switches the character encoding on the API when LFN is enabled.
/ /
/ 0: ANSI/OEM in current CP (TCHAR = char) / 0: ANSI/OEM in current CP (TCHAR = char)

View File

@ -132,12 +132,15 @@
// //
// 1 = SFN/ANSI 437=LFN/U.S.(OEM) // 1 = SFN/ANSI 437=LFN/U.S.(OEM)
#define MICROPY_FATFS_ENABLE_LFN (1) #define MICROPY_FATFS_ENABLE_LFN (1)
// Code page is ignored because unicode is enabled.
// Don't use parens on the value below because it gets combined with a prefix in // Don't use parens on the value below because it gets combined with a prefix in
// the preprocessor. // the preprocessor.
#define MICROPY_FATFS_LFN_CODE_PAGE 437 #define MICROPY_FATFS_LFN_CODE_PAGE 437
#define MICROPY_FATFS_USE_LABEL (1) #define MICROPY_FATFS_USE_LABEL (1)
#define MICROPY_FATFS_RPATH (2) #define MICROPY_FATFS_RPATH (2)
#define MICROPY_FATFS_MULTI_PARTITION (1) #define MICROPY_FATFS_MULTI_PARTITION (1)
#define MICROPY_FATFS_EXFAT (CIRCUITPY_FULL_BUILD)
#define MICROPY_FATFS_LFN_UNICODE 2 // UTF-8
// Only enable this if you really need it. It allocates a byte cache of this size. // Only enable this if you really need it. It allocates a byte cache of this size.
// #define MICROPY_FATFS_MAX_SS (4096) // #define MICROPY_FATFS_MAX_SS (4096)

View File

@ -41,6 +41,13 @@ STATIC mp_obj_t mp_obj_new_str_iterator(mp_obj_t str, mp_obj_iter_buf_t *iter_bu
/******************************************************************************/ /******************************************************************************/
/* str */ /* str */
// These settings approximate CPython's printability. It is not
// exhaustive and may print "unprintable" characters. All ASCII control codes
// are escaped along with variable space widths and paragraph designators.
// Unlike CPython, we do not escape private use codes or reserved characters.
// We assume that the unicode is well formed.
// CPython policy is documented here: https://github.com/python/cpython/blob/bb3e0c240bc60fe08d332ff5955d54197f79751c/Objects/unicodectype.c#L147-L159
STATIC void uni_print_quoted(const mp_print_t *print, const byte *str_data, uint str_len) { STATIC void uni_print_quoted(const mp_print_t *print, const byte *str_data, uint str_len) {
// this escapes characters, but it will be very slow to print (calling print many times) // this escapes characters, but it will be very slow to print (calling print many times)
bool has_single_quote = false; bool has_single_quote = false;
@ -61,25 +68,26 @@ STATIC void uni_print_quoted(const mp_print_t *print, const byte *str_data, uint
while (s < top) { while (s < top) {
unichar ch; unichar ch;
ch = utf8_get_char(s); ch = utf8_get_char(s);
const byte *start = s;
s = utf8_next_char(s); s = utf8_next_char(s);
if (ch == quote_char) { if (ch == quote_char) {
mp_printf(print, "\\%c", quote_char); mp_printf(print, "\\%c", quote_char);
} else if (ch == '\\') { } else if (ch == '\\') {
mp_print_str(print, "\\\\"); mp_print_str(print, "\\\\");
} else if (32 <= ch && ch <= 126) {
mp_printf(print, "%c", ch);
} else if (ch == '\n') { } else if (ch == '\n') {
mp_print_str(print, "\\n"); mp_print_str(print, "\\n");
} else if (ch == '\r') { } else if (ch == '\r') {
mp_print_str(print, "\\r"); mp_print_str(print, "\\r");
} else if (ch == '\t') { } else if (ch == '\t') {
mp_print_str(print, "\\t"); mp_print_str(print, "\\t");
} else if (ch < 0x100) { } else if (ch <= 0x1f || (0x7f <= ch && ch <= 0xa0) || ch == 0xad) {
mp_printf(print, "\\x%02x", ch); mp_printf(print, "\\x%02x", ch);
} else if (ch < 0x10000) { } else if ((0x2000 <= ch && ch <= 0x200f) || ch == 0x2028 || ch == 0x2029) {
mp_printf(print, "\\u%04x", ch); mp_printf(print, "\\u%04x", ch);
} else { } else {
mp_printf(print, "\\U%08x", ch); // Print the full character out.
int width = s - start;
mp_print_strn(print, (const char *)start, width, 0, ' ', width);
} }
} }
mp_printf(print, "%c", quote_char); mp_printf(print, "%c", quote_char);

View File

@ -0,0 +1,8 @@
# ¥ is 1 byte wide
# Œ is 2 bytes wide
# 😅 is 4 bytes wide
a = "hello¥Œ😅.txt\n\r\t'\"\\"
print(a)
print(repr(a))