From e0bf4611c3a8b23b3c52e6a7804aac341ac3a87d Mon Sep 17 00:00:00 2001 From: Jim Mussared Date: Sat, 11 Dec 2021 22:40:21 +1100 Subject: [PATCH] py: Only search frozen modules when '.frozen' is found in sys.path. This changes makemanifest.py & mpy-tool.py to merge string and mpy names into the same list (now mp_frozen_names). The various paths for loading a frozen module (mp_find_frozen_module) and checking existence of a frozen module (mp_frozen_stat) use a common function that searches this list. In addition, the frozen lookup will now only take place if the path starts with ".frozen", which needs to be added to sys.path. This fixes issues #1804, #2322, #3509, #6419. Signed-off-by: Jim Mussared --- py/builtinhelp.c | 17 ++--- py/builtinimport.c | 48 +++++++----- py/frozenmod.c | 165 ++++++++++++++++++---------------------- py/frozenmod.h | 4 +- py/qstrdefs.h | 4 + shared/runtime/pyexec.c | 5 +- tools/makemanifest.py | 45 ++++++----- tools/mpy-tool.py | 6 +- 8 files changed, 143 insertions(+), 151 deletions(-) diff --git a/py/builtinhelp.c b/py/builtinhelp.c index 13735635e3..84d69caf35 100644 --- a/py/builtinhelp.c +++ b/py/builtinhelp.c @@ -67,10 +67,10 @@ STATIC void mp_help_add_from_map(mp_obj_t list, const mp_map_t *map) { #if MICROPY_MODULE_FROZEN STATIC void mp_help_add_from_names(mp_obj_t list, const char *name) { while (*name) { - size_t l = strlen(name); + size_t len = strlen(name); // name should end in '.py' and we strip it off - mp_obj_list_append(list, mp_obj_new_str(name, l - 3)); - name += l + 1; + mp_obj_list_append(list, mp_obj_new_str(name, len - 3)); + name += len + 1; } } #endif @@ -80,14 +80,9 @@ STATIC void mp_help_print_modules(void) { mp_help_add_from_map(list, &mp_builtin_module_map); - #if MICROPY_MODULE_FROZEN_STR - extern const char mp_frozen_str_names[]; - mp_help_add_from_names(list, mp_frozen_str_names); - #endif - - #if MICROPY_MODULE_FROZEN_MPY - extern const char mp_frozen_mpy_names[]; - mp_help_add_from_names(list, mp_frozen_mpy_names); + #if MICROPY_MODULE_FROZEN + extern const char mp_frozen_names[]; + mp_help_add_from_names(list, mp_frozen_names); #endif // sort the list so it's printed in alphabetical order diff --git a/py/builtinimport.c b/py/builtinimport.c index 755ce779a7..3e336633d9 100644 --- a/py/builtinimport.c +++ b/py/builtinimport.c @@ -50,6 +50,9 @@ // Must be a string of one byte. #define PATH_SEP_CHAR "/" +// Virtual sys.path entry that maps to the frozen modules. +#define MP_FROZEN_PATH_PREFIX ".frozen/" + bool mp_obj_is_package(mp_obj_t module) { mp_obj_t dest[2]; mp_load_method_maybe(module, MP_QSTR___path__, dest); @@ -62,9 +65,10 @@ bool mp_obj_is_package(mp_obj_t module) { // will return whether the path is a file, directory, or doesn't exist. STATIC mp_import_stat_t stat_path_or_frozen(const char *path) { #if MICROPY_MODULE_FROZEN - mp_import_stat_t st = mp_frozen_stat(path); - if (st != MP_IMPORT_STAT_NO_EXIST) { - return st; + // Only try and load as a frozen module if it starts with .frozen/. + const int frozen_path_prefix_len = strlen(MP_FROZEN_PATH_PREFIX); + if (strncmp(path, MP_FROZEN_PATH_PREFIX, frozen_path_prefix_len) == 0) { + return mp_find_frozen_module(path + frozen_path_prefix_len, NULL, NULL); } #endif return mp_import_stat(path); @@ -193,32 +197,36 @@ STATIC void do_execute_raw_code(mp_obj_t module_obj, mp_raw_code_t *raw_code, co STATIC void do_load(mp_obj_t module_obj, vstr_t *file) { #if MICROPY_MODULE_FROZEN || MICROPY_ENABLE_COMPILER || (MICROPY_PERSISTENT_CODE_LOAD && MICROPY_HAS_FILE_READER) - char *file_str = vstr_null_terminated_str(file); + const char *file_str = vstr_null_terminated_str(file); #endif // If we support frozen modules (either as str or mpy) then try to find the // requested filename in the list of frozen module filenames. #if MICROPY_MODULE_FROZEN void *modref; - int frozen_type = mp_find_frozen_module(file_str, file->len, &modref); + int frozen_type; + const int frozen_path_prefix_len = strlen(MP_FROZEN_PATH_PREFIX); + if (strncmp(file_str, MP_FROZEN_PATH_PREFIX, frozen_path_prefix_len) == 0) { + mp_find_frozen_module(file_str + frozen_path_prefix_len, &frozen_type, &modref); - // If we support frozen str modules and the compiler is enabled, and we - // found the filename in the list of frozen files, then load and execute it. - #if MICROPY_MODULE_FROZEN_STR - if (frozen_type == MP_FROZEN_STR) { - do_load_from_lexer(module_obj, modref); - return; - } - #endif + // If we support frozen str modules and the compiler is enabled, and we + // found the filename in the list of frozen files, then load and execute it. + #if MICROPY_MODULE_FROZEN_STR + if (frozen_type == MP_FROZEN_STR) { + do_load_from_lexer(module_obj, modref); + return; + } + #endif - // If we support frozen mpy modules and we found a corresponding file (and - // its data) in the list of frozen files, execute it. - #if MICROPY_MODULE_FROZEN_MPY - if (frozen_type == MP_FROZEN_MPY) { - do_execute_raw_code(module_obj, modref, file_str); - return; + // If we support frozen mpy modules and we found a corresponding file (and + // its data) in the list of frozen files, execute it. + #if MICROPY_MODULE_FROZEN_MPY + if (frozen_type == MP_FROZEN_MPY) { + do_execute_raw_code(module_obj, modref, file_str + frozen_path_prefix_len); + return; + } + #endif } - #endif #endif // MICROPY_MODULE_FROZEN diff --git a/py/frozenmod.c b/py/frozenmod.c index a250c02151..6cb68d1ec0 100644 --- a/py/frozenmod.c +++ b/py/frozenmod.c @@ -5,6 +5,7 @@ * * Copyright (c) 2015 Paul Sokolovsky * Copyright (c) 2016 Damien P. George + * Copyright (c) 2021 Jim Mussared * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -31,6 +32,13 @@ #include "py/lexer.h" #include "py/frozenmod.h" +#if MICROPY_MODULE_FROZEN + +// Null-separated frozen file names. All string-type entries are listed first, +// followed by mpy-type entries. Use mp_frozen_str_sizes to determine how +// many string entries. +extern const char mp_frozen_names[]; + #if MICROPY_MODULE_FROZEN_STR #ifndef MICROPY_MODULE_FROZEN_LEXER @@ -39,118 +47,89 @@ mp_lexer_t *MICROPY_MODULE_FROZEN_LEXER(qstr src_name, const char *str, mp_uint_t len, mp_uint_t free_len); #endif -extern const char mp_frozen_str_names[]; +// Size in bytes of each string entry, followed by a zero (terminator). extern const uint32_t mp_frozen_str_sizes[]; +// Null-separated string content. extern const char mp_frozen_str_content[]; - -// On input, *len contains size of name, on output - size of content -const char *mp_find_frozen_str(const char *str, size_t *len) { - const char *name = mp_frozen_str_names; - - size_t offset = 0; - for (int i = 0; *name != 0; i++) { - size_t l = strlen(name); - if (l == *len && !memcmp(str, name, l)) { - *len = mp_frozen_str_sizes[i]; - return mp_frozen_str_content + offset; - } - name += l + 1; - offset += mp_frozen_str_sizes[i] + 1; - } - return NULL; -} - -STATIC mp_lexer_t *mp_lexer_frozen_str(const char *str, size_t len) { - size_t name_len = len; - const char *content = mp_find_frozen_str(str, &len); - - if (content == NULL) { - return NULL; - } - - qstr source = qstr_from_strn(str, name_len); - mp_lexer_t *lex = MICROPY_MODULE_FROZEN_LEXER(source, content, len, 0); - return lex; -} - -#endif +#endif // MICROPY_MODULE_FROZEN_STR #if MICROPY_MODULE_FROZEN_MPY #include "py/emitglue.h" -extern const char mp_frozen_mpy_names[]; extern const mp_raw_code_t *const mp_frozen_mpy_content[]; -STATIC const mp_raw_code_t *mp_find_frozen_mpy(const char *str, size_t len) { - const char *name = mp_frozen_mpy_names; - for (size_t i = 0; *name != 0; i++) { - size_t l = strlen(name); - if (l == len && !memcmp(str, name, l)) { - return mp_frozen_mpy_content[i]; - } - name += l + 1; - } - return NULL; -} +#endif // MICROPY_MODULE_FROZEN_MPY -#endif - -#if MICROPY_MODULE_FROZEN - -STATIC mp_import_stat_t mp_frozen_stat_helper(const char *name, const char *str) { +// Search for "str" as a frozen entry, returning the stat result +// (no-exist/file/dir), as well as the type (none/str/mpy) and data. +// frozen_type can be NULL if its value isn't needed (and then data is assumed to be NULL). +mp_import_stat_t mp_find_frozen_module(const char *str, int *frozen_type, void **data) { size_t len = strlen(str); + const char *name = mp_frozen_names; + + if (frozen_type != NULL) { + *frozen_type = MP_FROZEN_NONE; + } + + // Count the number of str lengths we have to find how many str entries. + size_t num_str = 0; + #if MICROPY_MODULE_FROZEN_STR && MICROPY_MODULE_FROZEN_MPY + for (const uint32_t *s = mp_frozen_str_sizes; *s != 0; ++s) { + ++num_str; + } + #endif + + for (size_t i = 0; *name != 0; i++) { + size_t entry_len = strlen(name); + if (entry_len >= len && memcmp(str, name, len) == 0) { + // Query is a prefix of the current entry. + if (entry_len == len) { + // Exact match --> file. + + if (frozen_type != NULL) { + #if MICROPY_MODULE_FROZEN_STR + if (i < num_str) { + *frozen_type = MP_FROZEN_STR; + // Use the size table to figure out where this index starts. + size_t offset = 0; + for (size_t j = 0; j < i; ++j) { + offset += mp_frozen_str_sizes[j] + 1; + } + size_t content_len = mp_frozen_str_sizes[i]; + const char *content = &mp_frozen_str_content[offset]; + + // Note: str & len have been updated by find_frozen_entry to strip + // the ".frozen/" prefix (to avoid this being a distinct qstr to + // the original path QSTR in frozen_content.c). + qstr source = qstr_from_strn(str, len); + mp_lexer_t *lex = MICROPY_MODULE_FROZEN_LEXER(source, content, content_len, 0); + *data = lex; + } + #endif + + #if MICROPY_MODULE_FROZEN_MPY + if (i >= num_str) { + *frozen_type = MP_FROZEN_MPY; + // Load the corresponding index as a raw_code, taking + // into account any string entries to offset by. + *data = (void *)mp_frozen_mpy_content[i - num_str]; + } + #endif + } - for (int i = 0; *name != 0; i++) { - size_t l = strlen(name); - if (l >= len && !memcmp(str, name, len)) { - if (name[len] == 0) { return MP_IMPORT_STAT_FILE; } else if (name[len] == '/') { + // Matches up to directory separator, this is a valid + // directory path. return MP_IMPORT_STAT_DIR; } } - name += l + 1; + // Skip null separator. + name += entry_len + 1; } - return MP_IMPORT_STAT_NO_EXIST; -} - -mp_import_stat_t mp_frozen_stat(const char *str) { - mp_import_stat_t stat; - - #if MICROPY_MODULE_FROZEN_STR - stat = mp_frozen_stat_helper(mp_frozen_str_names, str); - if (stat != MP_IMPORT_STAT_NO_EXIST) { - return stat; - } - #endif - - #if MICROPY_MODULE_FROZEN_MPY - stat = mp_frozen_stat_helper(mp_frozen_mpy_names, str); - if (stat != MP_IMPORT_STAT_NO_EXIST) { - return stat; - } - #endif return MP_IMPORT_STAT_NO_EXIST; } -int mp_find_frozen_module(const char *str, size_t len, void **data) { - #if MICROPY_MODULE_FROZEN_STR - mp_lexer_t *lex = mp_lexer_frozen_str(str, len); - if (lex != NULL) { - *data = lex; - return MP_FROZEN_STR; - } - #endif - #if MICROPY_MODULE_FROZEN_MPY - const mp_raw_code_t *rc = mp_find_frozen_mpy(str, len); - if (rc != NULL) { - *data = (void *)rc; - return MP_FROZEN_MPY; - } - #endif - return MP_FROZEN_NONE; -} - -#endif +#endif // MICROPY_MODULE_FROZEN diff --git a/py/frozenmod.h b/py/frozenmod.h index 8a477d028e..be735e85bd 100644 --- a/py/frozenmod.h +++ b/py/frozenmod.h @@ -35,8 +35,6 @@ enum { MP_FROZEN_MPY, }; -int mp_find_frozen_module(const char *str, size_t len, void **data); -const char *mp_find_frozen_str(const char *str, size_t *len); -mp_import_stat_t mp_frozen_stat(const char *str); +mp_import_stat_t mp_find_frozen_module(const char *str, int *frozen_type, void **data); #endif // MICROPY_INCLUDED_PY_FROZENMOD_H diff --git a/py/qstrdefs.h b/py/qstrdefs.h index 5b4e0dc48e..405813941b 100644 --- a/py/qstrdefs.h +++ b/py/qstrdefs.h @@ -60,6 +60,10 @@ Q() Q() Q(utf-8) +#if MICROPY_MODULE_FROZEN +Q(.frozen) +#endif + #if MICROPY_ENABLE_PYSTACK Q(pystack exhausted) #endif diff --git a/shared/runtime/pyexec.c b/shared/runtime/pyexec.c index 006ec096f7..2dceb647c3 100644 --- a/shared/runtime/pyexec.c +++ b/shared/runtime/pyexec.c @@ -674,7 +674,7 @@ int pyexec_file(const char *filename) { int pyexec_file_if_exists(const char *filename) { #if MICROPY_MODULE_FROZEN - if (mp_frozen_stat(filename) == MP_IMPORT_STAT_FILE) { + if (mp_find_frozen_module(filename, NULL, NULL) == MP_IMPORT_STAT_FILE) { return pyexec_frozen_module(filename); } #endif @@ -687,7 +687,8 @@ int pyexec_file_if_exists(const char *filename) { #if MICROPY_MODULE_FROZEN int pyexec_frozen_module(const char *name) { void *frozen_data; - int frozen_type = mp_find_frozen_module(name, strlen(name), &frozen_data); + int frozen_type; + mp_find_frozen_module(name, &frozen_type, &frozen_data); switch (frozen_type) { #if MICROPY_MODULE_FROZEN_STR diff --git a/tools/makemanifest.py b/tools/makemanifest.py index e37ae74cfd..8cdc3eb774 100644 --- a/tools/makemanifest.py +++ b/tools/makemanifest.py @@ -233,12 +233,17 @@ def freeze_internal(kind, path, script, opt): manifest_list.append((kind, path, script, opt)) +# Formerly make-frozen.py. +# This generates: +# - MP_FROZEN_STR_NAMES macro +# - mp_frozen_str_sizes +# - mp_frozen_str_content def generate_frozen_str_content(paths): def module_name(f): return f modules = [] - output = [] + output = [b"#include \n"] for path in paths: root = path.rstrip("/") @@ -250,21 +255,19 @@ def generate_frozen_str_content(paths): st = os.stat(fullpath) modules.append((path, fullpath[root_len + 1 :], st)) - output.append("#include \n") - output.append("const char mp_frozen_str_names[] = {\n") + output.append(b"#define MP_FROZEN_STR_NAMES \\\n") for _path, f, st in modules: m = module_name(f) - output.append('"%s\\0"\n' % m) - output.append('"\\0"};\n') + output.append(b'"%s\\0" \\\n' % m.encode()) + output.append(b"\n") - output.append("const uint32_t mp_frozen_str_sizes[] = {\n") + output.append(b"const uint32_t mp_frozen_str_sizes[] = { ") for _path, f, st in modules: - output.append("%d," % st.st_size) + output.append(b"%d, " % st.st_size) + output.append(b"0 };\n") - output.append("0};\n") - - output.append("const char mp_frozen_str_content[] = {\n") + output.append(b"const char mp_frozen_str_content[] = {\n") for path, f, st in modules: data = open(path + "/" + f, "rb").read() @@ -276,8 +279,8 @@ def generate_frozen_str_content(paths): # to be able to read the resulting C code as ASCII when possible. data = bytearray(data) # so Python2 extracts each byte as an integer - esc_dict = {ord("\n"): "\\n", ord("\r"): "\\r", ord('"'): '\\"', ord("\\"): "\\\\"} - output.append('"') + esc_dict = {ord("\n"): b"\\n", ord("\r"): b"\\r", ord('"'): b'\\"', ord("\\"): b"\\\\"} + output.append(b'"') break_str = False for c in data: try: @@ -285,16 +288,16 @@ def generate_frozen_str_content(paths): except KeyError: if 32 <= c <= 126: if break_str: - output.append('" "') + output.append(b'" "') break_str = False - output.append(chr(c)) + output.append(chr(c).encode()) else: - output.append("\\x%02x" % c) + output.append(b"\\x%02x" % c) break_str = True - output.append('\\0"\n') + output.append(b'\\0"\n') - output.append('"\\0"};\n') - return "".join(output) + output.append(b'"\\0"\n};\n\n') + return b"".join(output) def main(): @@ -414,8 +417,8 @@ def main(): b"const qstr_pool_t mp_qstr_frozen_const_pool = {\n" b" (qstr_pool_t*)&mp_qstr_const_pool, MP_QSTRnumber_of, 0, 0\n" b"};\n" - b'const char mp_frozen_mpy_names[1] = {"\\0"};\n' - b"const mp_raw_code_t *const mp_frozen_mpy_content[1] = {NULL};\n" + b'const char mp_frozen_names[] = { MP_FROZEN_STR_NAMES "\\0"};\n' + b"const mp_raw_code_t *const mp_frozen_mpy_content[] = {NULL};\n" ) # Generate output @@ -423,7 +426,7 @@ def main(): mkdir(args.output) with open(args.output, "wb") as f: f.write(b"//\n// Content for MICROPY_MODULE_FROZEN_STR\n//\n") - f.write(output_str.encode()) + f.write(output_str) f.write(b"//\n// Content for MICROPY_MODULE_FROZEN_MPY\n//\n") f.write(output_mpy) diff --git a/tools/mpy-tool.py b/tools/mpy-tool.py index 6868ed5d4e..aa0272111c 100755 --- a/tools/mpy-tool.py +++ b/tools/mpy-tool.py @@ -886,7 +886,11 @@ def freeze_mpy(base_qstrs, raw_codes): rc.freeze(rc.source_file.str.replace("/", "_")[:-3] + "_") print() - print("const char mp_frozen_mpy_names[] = {") + print("const char mp_frozen_names[] = {") + print("#ifdef MP_FROZEN_STR_NAMES") + # makemanifest.py might also include some frozen string content. + print("MP_FROZEN_STR_NAMES") + print("#endif") for rc in raw_codes: module_name = rc.source_file.str print('"%s\\0"' % module_name)