py/persistentcode: Remove unicode feature flag from .mpy file.

Prior to this commit, even with unicode disabled .py and .mpy files could
contain unicode characters, eg by entering them directly in a string as
utf-8 encoded.

The only thing the compiler disallowed (with unicode disabled) was using
\uxxxx and \Uxxxxxxxx notation to specify a character within a string with
value >= 0x100; that would give a SyntaxError.

With this change mpy-cross will now accept \u and \U notation to insert a
character with value >= 0x100 into a string (because the -mno-unicode
option is now gone, there's no way to forbid this).  The runtime will
happily work with strings with such characters, just like it already works
with strings with characters that were utf-8 encoded directly.

This change simplifies things because there are no longer any feature
flags in .mpy files, and any bytecode .mpy will now run on any target.

Signed-off-by: Damien George <damien@micropython.org>
This commit is contained in:
Damien George 2022-05-16 19:20:52 +10:00
parent b295b6f1f3
commit c49d5207e9
8 changed files with 35 additions and 56 deletions

View File

@ -108,7 +108,6 @@ STATIC int usage(char **argv) {
"\n"
"Target specific options:\n"
"-msmall-int-bits=number : set the maximum bits used to encode a small-int\n"
"-mno-unicode : don't support unicode in compiled strings\n"
"-march=<arch> : set architecture for native emitter; x86, x64, armv6, armv7m, armv7em, armv7emsp, armv7emdp, xtensa, xtensawin\n"
"\n"
"Implementation specific options:\n", argv[0]
@ -203,7 +202,6 @@ MP_NOINLINE int main_(int argc, char **argv) {
// set default compiler configuration
mp_dynamic_compiler.small_int_bits = 31;
mp_dynamic_compiler.py_builtins_str_unicode = 1;
#if defined(__i386__)
mp_dynamic_compiler.native_arch = MP_NATIVE_ARCH_X86;
mp_dynamic_compiler.nlr_buf_num_regs = MICROPY_NLR_NUM_REGS_X86;
@ -261,10 +259,6 @@ MP_NOINLINE int main_(int argc, char **argv) {
return usage(argv);
}
// TODO check that small_int_bits is within range of host's capabilities
} else if (strcmp(argv[a], "-mno-unicode") == 0) {
mp_dynamic_compiler.py_builtins_str_unicode = 0;
} else if (strcmp(argv[a], "-municode") == 0) {
mp_dynamic_compiler.py_builtins_str_unicode = 1;
} else if (strncmp(argv[a], "-march=", sizeof("-march=") - 1) == 0) {
const char *arch = argv[a] + sizeof("-march=") - 1;
if (strcmp(arch, "x86") == 0) {

View File

@ -473,25 +473,23 @@ STATIC void parse_string_literal(mp_lexer_t *lex, bool is_raw, bool is_fstring)
}
}
if (c != MP_LEXER_EOF) {
if (MICROPY_PY_BUILTINS_STR_UNICODE_DYNAMIC) {
if (c < 0x110000 && lex->tok_kind == MP_TOKEN_STRING) {
vstr_add_char(&lex->vstr, c);
} else if (c < 0x100 && lex->tok_kind == MP_TOKEN_BYTES) {
vstr_add_byte(&lex->vstr, c);
} else {
// unicode character out of range
// this raises a generic SyntaxError; could provide more info
lex->tok_kind = MP_TOKEN_INVALID;
}
} else {
// without unicode everything is just added as an 8-bit byte
if (c < 0x100) {
vstr_add_byte(&lex->vstr, c);
} else {
// 8-bit character out of range
// this raises a generic SyntaxError; could provide more info
lex->tok_kind = MP_TOKEN_INVALID;
}
#if MICROPY_PY_BUILTINS_STR_UNICODE
if (c < 0x110000 && lex->tok_kind == MP_TOKEN_STRING) {
// Valid unicode character in a str object.
vstr_add_char(&lex->vstr, c);
} else if (c < 0x100 && lex->tok_kind == MP_TOKEN_BYTES) {
// Valid byte in a bytes object.
vstr_add_byte(&lex->vstr, c);
}
#else
if (c < 0x100) {
// Without unicode everything is just added as an 8-bit byte.
vstr_add_byte(&lex->vstr, c);
}
#endif
else {
// Character out of range; this raises a generic SyntaxError.
lex->tok_kind = MP_TOKEN_INVALID;
}
}
} else {

View File

@ -429,13 +429,6 @@
#define MICROPY_DYNAMIC_COMPILER (0)
#endif
// Configure dynamic compiler macros
#if MICROPY_DYNAMIC_COMPILER
#define MICROPY_PY_BUILTINS_STR_UNICODE_DYNAMIC (mp_dynamic_compiler.py_builtins_str_unicode)
#else
#define MICROPY_PY_BUILTINS_STR_UNICODE_DYNAMIC MICROPY_PY_BUILTINS_STR_UNICODE
#endif
// Whether to enable constant folding; eg 1+2 rewritten as 3
#ifndef MICROPY_COMP_CONST_FOLDING
#define MICROPY_COMP_CONST_FOLDING (MICROPY_CONFIG_ROM_LEVEL_AT_LEAST_CORE_FEATURES)

View File

@ -55,7 +55,6 @@ enum {
#if MICROPY_DYNAMIC_COMPILER
typedef struct mp_dynamic_compiler_t {
uint8_t small_int_bits; // must be <= host small_int_bits
bool py_builtins_str_unicode;
uint8_t native_arch;
uint8_t nlr_buf_num_regs;
} mp_dynamic_compiler_t;

View File

@ -42,15 +42,11 @@
#define MPY_FEATURE_DECODE_ARCH(feat) ((feat) >> 2)
// The feature flag bits encode the compile-time config options that affect
// the generate bytecode. Note: position 0 is now unused
// (formerly MICROPY_OPT_CACHE_MAP_LOOKUP_IN_BYTECODE).
#define MPY_FEATURE_FLAGS ( \
((MICROPY_PY_BUILTINS_STR_UNICODE) << 1) \
)
// the generate bytecode. Note: no longer used.
// (formerly MICROPY_OPT_CACHE_MAP_LOOKUP_IN_BYTECODE and MICROPY_PY_BUILTINS_STR_UNICODE).
#define MPY_FEATURE_FLAGS (0)
// This is a version of the flags that can be configured at runtime.
#define MPY_FEATURE_FLAGS_DYNAMIC ( \
((MICROPY_PY_BUILTINS_STR_UNICODE_DYNAMIC) << 1) \
)
#define MPY_FEATURE_FLAGS_DYNAMIC (0)
// Define the host architecture
#if MICROPY_EMIT_X86

View File

@ -49,14 +49,14 @@ class UserFS:
# by the required value of sys.implementation._mpy.
features0_file_contents = {
# -march=x64
0xA06: b'M\x06\n\x1f\x01\x004build/features0.native.mpy\x00\x8aB\xe9/\x00\x00\x00SH\x8b\x1d\x83\x00\x00\x00\xbe\x02\x00\x00\x00\xffS\x18\xbf\x01\x00\x00\x00H\x85\xc0u\x0cH\x8bC \xbe\x02\x00\x00\x00[\xff\xe0H\x0f\xaf\xf8H\xff\xc8\xeb\xe6ATUSH\x8b\x1dQ\x00\x00\x00H\x8bG\x08L\x8bc(H\x8bx\x08A\xff\xd4H\x8d5+\x00\x00\x00H\x89\xc5H\x8b\x059\x00\x00\x00\x0f\xb78\xffShH\x89\xefA\xff\xd4H\x8b\x03[]A\\\xc3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x85\x00\x12factorial\x00\x10\r$\x01&\x9f \x01"\xff',
0x806: b'M\x06\b\x1f\x01\x004build/features0.native.mpy\x00\x8aB\xe9/\x00\x00\x00SH\x8b\x1d\x83\x00\x00\x00\xbe\x02\x00\x00\x00\xffS\x18\xbf\x01\x00\x00\x00H\x85\xc0u\x0cH\x8bC \xbe\x02\x00\x00\x00[\xff\xe0H\x0f\xaf\xf8H\xff\xc8\xeb\xe6ATUSH\x8b\x1dQ\x00\x00\x00H\x8bG\x08L\x8bc(H\x8bx\x08A\xff\xd4H\x8d5+\x00\x00\x00H\x89\xc5H\x8b\x059\x00\x00\x00\x0f\xb78\xffShH\x89\xefA\xff\xd4H\x8b\x03[]A\\\xc3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x85\x00\x12factorial\x00\x10\r$\x01&\x9f \x01"\xff',
# -march=armv7m
0x1606: b"M\x06\x16\x1f\x01\x004build/features0.native.mpy\x00\x88B\x1a\xe0\x00\x00\x13\xb5\nK\nJ{D\x9cX\x02!\xe3h\x98G\x03F\x01 3\xb9\x02!#i\x01\x93\x02\xb0\xbd\xe8\x10@\x18GXC\x01;\xf4\xe7\x00\xbfn\x00\x00\x00\x00\x00\x00\x00\xf8\xb5\nN\nK~D\xf4XChgiXh\xb8G\x05F\x07K\x08I\xf2XyD\x10\x88ck\x98G(F\xb8G h\xf8\xbd\x00\xbf:\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x1e\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x84\x00\x00\x00\x00\x00\x00\x00\x01\x84\x10\x12factorial\x00\x10\r>\x01@\x9f:\x01<\xff",
0x1406: b"M\x06\x14\x1f\x01\x004build/features0.native.mpy\x00\x88B\x1a\xe0\x00\x00\x13\xb5\nK\nJ{D\x9cX\x02!\xe3h\x98G\x03F\x01 3\xb9\x02!#i\x01\x93\x02\xb0\xbd\xe8\x10@\x18GXC\x01;\xf4\xe7\x00\xbfn\x00\x00\x00\x00\x00\x00\x00\xf8\xb5\nN\nK~D\xf4XChgiXh\xb8G\x05F\x07K\x08I\xf2XyD\x10\x88ck\x98G(F\xb8G h\xf8\xbd\x00\xbf:\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x1e\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x84\x00\x00\x00\x00\x00\x00\x00\x01\x84\x10\x12factorial\x00\x10\r>\x01@\x9f:\x01<\xff",
}
# Populate other armv7m-derived archs based on armv7m.
for arch in (0x1A06, 0x1E06, 0x2206):
features0_file_contents[arch] = features0_file_contents[0x1606]
for arch in (0x1806, 0x1C06, 0x2006):
features0_file_contents[arch] = features0_file_contents[0x1406]
if sys.implementation._mpy not in features0_file_contents:
print("SKIP")

View File

@ -52,11 +52,11 @@ class UserFS:
# fmt: off
user_files = {
# bad architecture
'/mod0.mpy': b'M\x06\xfe\x00\x10',
'/mod0.mpy': b'M\x06\xfc\x00\x10',
# test loading of viper and asm
'/mod1.mpy': (
b'M\x06\x0a\x1f' # header
b'M\x06\x08\x1f' # header
b'\x02' # n_qstr
b'\x00' # n_obj
@ -85,7 +85,7 @@ user_files = {
# test loading viper with additional scope flags and relocation
'/mod2.mpy': (
b'M\x06\x0a\x1f' # header
b'M\x06\x08\x1f' # header
b'\x02' # n_qstr
b'\x00' # n_obj

View File

@ -48,7 +48,6 @@ MP_CODE_NATIVE_VIPER = 4
MP_SCOPE_FLAG_VIPERRELOC = 0x10
MP_SCOPE_FLAG_VIPERRODATA = 0x20
MP_SCOPE_FLAG_VIPERBSS = 0x40
MICROPY_PY_BUILTINS_STR_UNICODE = 2
MP_SMALL_INT_BITS = 31
# ELF constants
@ -116,7 +115,7 @@ class ArchData:
ARCH_DATA = {
"x86": ArchData(
"EM_386",
MP_NATIVE_ARCH_X86 << 2 | MICROPY_PY_BUILTINS_STR_UNICODE,
MP_NATIVE_ARCH_X86 << 2,
2,
4,
(R_386_PC32, R_386_GOT32, R_386_GOT32X),
@ -124,7 +123,7 @@ ARCH_DATA = {
),
"x64": ArchData(
"EM_X86_64",
MP_NATIVE_ARCH_X64 << 2 | MICROPY_PY_BUILTINS_STR_UNICODE,
MP_NATIVE_ARCH_X64 << 2,
2,
8,
(R_X86_64_GOTPCREL, R_X86_64_REX_GOTPCRELX),
@ -132,7 +131,7 @@ ARCH_DATA = {
),
"armv7m": ArchData(
"EM_ARM",
MP_NATIVE_ARCH_ARMV7M << 2 | MICROPY_PY_BUILTINS_STR_UNICODE,
MP_NATIVE_ARCH_ARMV7M << 2,
2,
4,
(R_ARM_GOT_BREL,),
@ -140,7 +139,7 @@ ARCH_DATA = {
),
"armv7emsp": ArchData(
"EM_ARM",
MP_NATIVE_ARCH_ARMV7EMSP << 2 | MICROPY_PY_BUILTINS_STR_UNICODE,
MP_NATIVE_ARCH_ARMV7EMSP << 2,
2,
4,
(R_ARM_GOT_BREL,),
@ -148,7 +147,7 @@ ARCH_DATA = {
),
"armv7emdp": ArchData(
"EM_ARM",
MP_NATIVE_ARCH_ARMV7EMDP << 2 | MICROPY_PY_BUILTINS_STR_UNICODE,
MP_NATIVE_ARCH_ARMV7EMDP << 2,
2,
4,
(R_ARM_GOT_BREL,),
@ -156,7 +155,7 @@ ARCH_DATA = {
),
"xtensa": ArchData(
"EM_XTENSA",
MP_NATIVE_ARCH_XTENSA << 2 | MICROPY_PY_BUILTINS_STR_UNICODE,
MP_NATIVE_ARCH_XTENSA << 2,
2,
4,
(R_XTENSA_32, R_XTENSA_PLT),
@ -164,7 +163,7 @@ ARCH_DATA = {
),
"xtensawin": ArchData(
"EM_XTENSA",
MP_NATIVE_ARCH_XTENSAWIN << 2 | MICROPY_PY_BUILTINS_STR_UNICODE,
MP_NATIVE_ARCH_XTENSAWIN << 2,
4,
4,
(R_XTENSA_32, R_XTENSA_PLT),