py/persistentcode: Remove unicode feature flag from .mpy file.
Prior to this commit, even with unicode disabled .py and .mpy files could contain unicode characters, eg by entering them directly in a string as utf-8 encoded. The only thing the compiler disallowed (with unicode disabled) was using \uxxxx and \Uxxxxxxxx notation to specify a character within a string with value >= 0x100; that would give a SyntaxError. With this change mpy-cross will now accept \u and \U notation to insert a character with value >= 0x100 into a string (because the -mno-unicode option is now gone, there's no way to forbid this). The runtime will happily work with strings with such characters, just like it already works with strings with characters that were utf-8 encoded directly. This change simplifies things because there are no longer any feature flags in .mpy files, and any bytecode .mpy will now run on any target. Signed-off-by: Damien George <damien@micropython.org>
This commit is contained in:
parent
b295b6f1f3
commit
c49d5207e9
@ -108,7 +108,6 @@ STATIC int usage(char **argv) {
|
||||
"\n"
|
||||
"Target specific options:\n"
|
||||
"-msmall-int-bits=number : set the maximum bits used to encode a small-int\n"
|
||||
"-mno-unicode : don't support unicode in compiled strings\n"
|
||||
"-march=<arch> : set architecture for native emitter; x86, x64, armv6, armv7m, armv7em, armv7emsp, armv7emdp, xtensa, xtensawin\n"
|
||||
"\n"
|
||||
"Implementation specific options:\n", argv[0]
|
||||
@ -203,7 +202,6 @@ MP_NOINLINE int main_(int argc, char **argv) {
|
||||
|
||||
// set default compiler configuration
|
||||
mp_dynamic_compiler.small_int_bits = 31;
|
||||
mp_dynamic_compiler.py_builtins_str_unicode = 1;
|
||||
#if defined(__i386__)
|
||||
mp_dynamic_compiler.native_arch = MP_NATIVE_ARCH_X86;
|
||||
mp_dynamic_compiler.nlr_buf_num_regs = MICROPY_NLR_NUM_REGS_X86;
|
||||
@ -261,10 +259,6 @@ MP_NOINLINE int main_(int argc, char **argv) {
|
||||
return usage(argv);
|
||||
}
|
||||
// TODO check that small_int_bits is within range of host's capabilities
|
||||
} else if (strcmp(argv[a], "-mno-unicode") == 0) {
|
||||
mp_dynamic_compiler.py_builtins_str_unicode = 0;
|
||||
} else if (strcmp(argv[a], "-municode") == 0) {
|
||||
mp_dynamic_compiler.py_builtins_str_unicode = 1;
|
||||
} else if (strncmp(argv[a], "-march=", sizeof("-march=") - 1) == 0) {
|
||||
const char *arch = argv[a] + sizeof("-march=") - 1;
|
||||
if (strcmp(arch, "x86") == 0) {
|
||||
|
36
py/lexer.c
36
py/lexer.c
@ -473,25 +473,23 @@ STATIC void parse_string_literal(mp_lexer_t *lex, bool is_raw, bool is_fstring)
|
||||
}
|
||||
}
|
||||
if (c != MP_LEXER_EOF) {
|
||||
if (MICROPY_PY_BUILTINS_STR_UNICODE_DYNAMIC) {
|
||||
if (c < 0x110000 && lex->tok_kind == MP_TOKEN_STRING) {
|
||||
vstr_add_char(&lex->vstr, c);
|
||||
} else if (c < 0x100 && lex->tok_kind == MP_TOKEN_BYTES) {
|
||||
vstr_add_byte(&lex->vstr, c);
|
||||
} else {
|
||||
// unicode character out of range
|
||||
// this raises a generic SyntaxError; could provide more info
|
||||
lex->tok_kind = MP_TOKEN_INVALID;
|
||||
}
|
||||
} else {
|
||||
// without unicode everything is just added as an 8-bit byte
|
||||
if (c < 0x100) {
|
||||
vstr_add_byte(&lex->vstr, c);
|
||||
} else {
|
||||
// 8-bit character out of range
|
||||
// this raises a generic SyntaxError; could provide more info
|
||||
lex->tok_kind = MP_TOKEN_INVALID;
|
||||
}
|
||||
#if MICROPY_PY_BUILTINS_STR_UNICODE
|
||||
if (c < 0x110000 && lex->tok_kind == MP_TOKEN_STRING) {
|
||||
// Valid unicode character in a str object.
|
||||
vstr_add_char(&lex->vstr, c);
|
||||
} else if (c < 0x100 && lex->tok_kind == MP_TOKEN_BYTES) {
|
||||
// Valid byte in a bytes object.
|
||||
vstr_add_byte(&lex->vstr, c);
|
||||
}
|
||||
#else
|
||||
if (c < 0x100) {
|
||||
// Without unicode everything is just added as an 8-bit byte.
|
||||
vstr_add_byte(&lex->vstr, c);
|
||||
}
|
||||
#endif
|
||||
else {
|
||||
// Character out of range; this raises a generic SyntaxError.
|
||||
lex->tok_kind = MP_TOKEN_INVALID;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
|
@ -429,13 +429,6 @@
|
||||
#define MICROPY_DYNAMIC_COMPILER (0)
|
||||
#endif
|
||||
|
||||
// Configure dynamic compiler macros
|
||||
#if MICROPY_DYNAMIC_COMPILER
|
||||
#define MICROPY_PY_BUILTINS_STR_UNICODE_DYNAMIC (mp_dynamic_compiler.py_builtins_str_unicode)
|
||||
#else
|
||||
#define MICROPY_PY_BUILTINS_STR_UNICODE_DYNAMIC MICROPY_PY_BUILTINS_STR_UNICODE
|
||||
#endif
|
||||
|
||||
// Whether to enable constant folding; eg 1+2 rewritten as 3
|
||||
#ifndef MICROPY_COMP_CONST_FOLDING
|
||||
#define MICROPY_COMP_CONST_FOLDING (MICROPY_CONFIG_ROM_LEVEL_AT_LEAST_CORE_FEATURES)
|
||||
|
@ -55,7 +55,6 @@ enum {
|
||||
#if MICROPY_DYNAMIC_COMPILER
|
||||
typedef struct mp_dynamic_compiler_t {
|
||||
uint8_t small_int_bits; // must be <= host small_int_bits
|
||||
bool py_builtins_str_unicode;
|
||||
uint8_t native_arch;
|
||||
uint8_t nlr_buf_num_regs;
|
||||
} mp_dynamic_compiler_t;
|
||||
|
@ -42,15 +42,11 @@
|
||||
#define MPY_FEATURE_DECODE_ARCH(feat) ((feat) >> 2)
|
||||
|
||||
// The feature flag bits encode the compile-time config options that affect
|
||||
// the generate bytecode. Note: position 0 is now unused
|
||||
// (formerly MICROPY_OPT_CACHE_MAP_LOOKUP_IN_BYTECODE).
|
||||
#define MPY_FEATURE_FLAGS ( \
|
||||
((MICROPY_PY_BUILTINS_STR_UNICODE) << 1) \
|
||||
)
|
||||
// the generate bytecode. Note: no longer used.
|
||||
// (formerly MICROPY_OPT_CACHE_MAP_LOOKUP_IN_BYTECODE and MICROPY_PY_BUILTINS_STR_UNICODE).
|
||||
#define MPY_FEATURE_FLAGS (0)
|
||||
// This is a version of the flags that can be configured at runtime.
|
||||
#define MPY_FEATURE_FLAGS_DYNAMIC ( \
|
||||
((MICROPY_PY_BUILTINS_STR_UNICODE_DYNAMIC) << 1) \
|
||||
)
|
||||
#define MPY_FEATURE_FLAGS_DYNAMIC (0)
|
||||
|
||||
// Define the host architecture
|
||||
#if MICROPY_EMIT_X86
|
||||
|
@ -49,14 +49,14 @@ class UserFS:
|
||||
# by the required value of sys.implementation._mpy.
|
||||
features0_file_contents = {
|
||||
# -march=x64
|
||||
0xA06: b'M\x06\n\x1f\x01\x004build/features0.native.mpy\x00\x8aB\xe9/\x00\x00\x00SH\x8b\x1d\x83\x00\x00\x00\xbe\x02\x00\x00\x00\xffS\x18\xbf\x01\x00\x00\x00H\x85\xc0u\x0cH\x8bC \xbe\x02\x00\x00\x00[\xff\xe0H\x0f\xaf\xf8H\xff\xc8\xeb\xe6ATUSH\x8b\x1dQ\x00\x00\x00H\x8bG\x08L\x8bc(H\x8bx\x08A\xff\xd4H\x8d5+\x00\x00\x00H\x89\xc5H\x8b\x059\x00\x00\x00\x0f\xb78\xffShH\x89\xefA\xff\xd4H\x8b\x03[]A\\\xc3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x85\x00\x12factorial\x00\x10\r$\x01&\x9f \x01"\xff',
|
||||
0x806: b'M\x06\b\x1f\x01\x004build/features0.native.mpy\x00\x8aB\xe9/\x00\x00\x00SH\x8b\x1d\x83\x00\x00\x00\xbe\x02\x00\x00\x00\xffS\x18\xbf\x01\x00\x00\x00H\x85\xc0u\x0cH\x8bC \xbe\x02\x00\x00\x00[\xff\xe0H\x0f\xaf\xf8H\xff\xc8\xeb\xe6ATUSH\x8b\x1dQ\x00\x00\x00H\x8bG\x08L\x8bc(H\x8bx\x08A\xff\xd4H\x8d5+\x00\x00\x00H\x89\xc5H\x8b\x059\x00\x00\x00\x0f\xb78\xffShH\x89\xefA\xff\xd4H\x8b\x03[]A\\\xc3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x85\x00\x12factorial\x00\x10\r$\x01&\x9f \x01"\xff',
|
||||
# -march=armv7m
|
||||
0x1606: b"M\x06\x16\x1f\x01\x004build/features0.native.mpy\x00\x88B\x1a\xe0\x00\x00\x13\xb5\nK\nJ{D\x9cX\x02!\xe3h\x98G\x03F\x01 3\xb9\x02!#i\x01\x93\x02\xb0\xbd\xe8\x10@\x18GXC\x01;\xf4\xe7\x00\xbfn\x00\x00\x00\x00\x00\x00\x00\xf8\xb5\nN\nK~D\xf4XChgiXh\xb8G\x05F\x07K\x08I\xf2XyD\x10\x88ck\x98G(F\xb8G h\xf8\xbd\x00\xbf:\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x1e\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x84\x00\x00\x00\x00\x00\x00\x00\x01\x84\x10\x12factorial\x00\x10\r>\x01@\x9f:\x01<\xff",
|
||||
0x1406: b"M\x06\x14\x1f\x01\x004build/features0.native.mpy\x00\x88B\x1a\xe0\x00\x00\x13\xb5\nK\nJ{D\x9cX\x02!\xe3h\x98G\x03F\x01 3\xb9\x02!#i\x01\x93\x02\xb0\xbd\xe8\x10@\x18GXC\x01;\xf4\xe7\x00\xbfn\x00\x00\x00\x00\x00\x00\x00\xf8\xb5\nN\nK~D\xf4XChgiXh\xb8G\x05F\x07K\x08I\xf2XyD\x10\x88ck\x98G(F\xb8G h\xf8\xbd\x00\xbf:\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x1e\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x84\x00\x00\x00\x00\x00\x00\x00\x01\x84\x10\x12factorial\x00\x10\r>\x01@\x9f:\x01<\xff",
|
||||
}
|
||||
|
||||
# Populate other armv7m-derived archs based on armv7m.
|
||||
for arch in (0x1A06, 0x1E06, 0x2206):
|
||||
features0_file_contents[arch] = features0_file_contents[0x1606]
|
||||
for arch in (0x1806, 0x1C06, 0x2006):
|
||||
features0_file_contents[arch] = features0_file_contents[0x1406]
|
||||
|
||||
if sys.implementation._mpy not in features0_file_contents:
|
||||
print("SKIP")
|
||||
|
@ -52,11 +52,11 @@ class UserFS:
|
||||
# fmt: off
|
||||
user_files = {
|
||||
# bad architecture
|
||||
'/mod0.mpy': b'M\x06\xfe\x00\x10',
|
||||
'/mod0.mpy': b'M\x06\xfc\x00\x10',
|
||||
|
||||
# test loading of viper and asm
|
||||
'/mod1.mpy': (
|
||||
b'M\x06\x0a\x1f' # header
|
||||
b'M\x06\x08\x1f' # header
|
||||
|
||||
b'\x02' # n_qstr
|
||||
b'\x00' # n_obj
|
||||
@ -85,7 +85,7 @@ user_files = {
|
||||
|
||||
# test loading viper with additional scope flags and relocation
|
||||
'/mod2.mpy': (
|
||||
b'M\x06\x0a\x1f' # header
|
||||
b'M\x06\x08\x1f' # header
|
||||
|
||||
b'\x02' # n_qstr
|
||||
b'\x00' # n_obj
|
||||
|
@ -48,7 +48,6 @@ MP_CODE_NATIVE_VIPER = 4
|
||||
MP_SCOPE_FLAG_VIPERRELOC = 0x10
|
||||
MP_SCOPE_FLAG_VIPERRODATA = 0x20
|
||||
MP_SCOPE_FLAG_VIPERBSS = 0x40
|
||||
MICROPY_PY_BUILTINS_STR_UNICODE = 2
|
||||
MP_SMALL_INT_BITS = 31
|
||||
|
||||
# ELF constants
|
||||
@ -116,7 +115,7 @@ class ArchData:
|
||||
ARCH_DATA = {
|
||||
"x86": ArchData(
|
||||
"EM_386",
|
||||
MP_NATIVE_ARCH_X86 << 2 | MICROPY_PY_BUILTINS_STR_UNICODE,
|
||||
MP_NATIVE_ARCH_X86 << 2,
|
||||
2,
|
||||
4,
|
||||
(R_386_PC32, R_386_GOT32, R_386_GOT32X),
|
||||
@ -124,7 +123,7 @@ ARCH_DATA = {
|
||||
),
|
||||
"x64": ArchData(
|
||||
"EM_X86_64",
|
||||
MP_NATIVE_ARCH_X64 << 2 | MICROPY_PY_BUILTINS_STR_UNICODE,
|
||||
MP_NATIVE_ARCH_X64 << 2,
|
||||
2,
|
||||
8,
|
||||
(R_X86_64_GOTPCREL, R_X86_64_REX_GOTPCRELX),
|
||||
@ -132,7 +131,7 @@ ARCH_DATA = {
|
||||
),
|
||||
"armv7m": ArchData(
|
||||
"EM_ARM",
|
||||
MP_NATIVE_ARCH_ARMV7M << 2 | MICROPY_PY_BUILTINS_STR_UNICODE,
|
||||
MP_NATIVE_ARCH_ARMV7M << 2,
|
||||
2,
|
||||
4,
|
||||
(R_ARM_GOT_BREL,),
|
||||
@ -140,7 +139,7 @@ ARCH_DATA = {
|
||||
),
|
||||
"armv7emsp": ArchData(
|
||||
"EM_ARM",
|
||||
MP_NATIVE_ARCH_ARMV7EMSP << 2 | MICROPY_PY_BUILTINS_STR_UNICODE,
|
||||
MP_NATIVE_ARCH_ARMV7EMSP << 2,
|
||||
2,
|
||||
4,
|
||||
(R_ARM_GOT_BREL,),
|
||||
@ -148,7 +147,7 @@ ARCH_DATA = {
|
||||
),
|
||||
"armv7emdp": ArchData(
|
||||
"EM_ARM",
|
||||
MP_NATIVE_ARCH_ARMV7EMDP << 2 | MICROPY_PY_BUILTINS_STR_UNICODE,
|
||||
MP_NATIVE_ARCH_ARMV7EMDP << 2,
|
||||
2,
|
||||
4,
|
||||
(R_ARM_GOT_BREL,),
|
||||
@ -156,7 +155,7 @@ ARCH_DATA = {
|
||||
),
|
||||
"xtensa": ArchData(
|
||||
"EM_XTENSA",
|
||||
MP_NATIVE_ARCH_XTENSA << 2 | MICROPY_PY_BUILTINS_STR_UNICODE,
|
||||
MP_NATIVE_ARCH_XTENSA << 2,
|
||||
2,
|
||||
4,
|
||||
(R_XTENSA_32, R_XTENSA_PLT),
|
||||
@ -164,7 +163,7 @@ ARCH_DATA = {
|
||||
),
|
||||
"xtensawin": ArchData(
|
||||
"EM_XTENSA",
|
||||
MP_NATIVE_ARCH_XTENSAWIN << 2 | MICROPY_PY_BUILTINS_STR_UNICODE,
|
||||
MP_NATIVE_ARCH_XTENSAWIN << 2,
|
||||
4,
|
||||
4,
|
||||
(R_XTENSA_32, R_XTENSA_PLT),
|
||||
|
Loading…
Reference in New Issue
Block a user