Switch translate() to the header file

This allows the compile stage to optimize most of the translate()
function away and saves a ton of space (~40k on ESP). *However*, it
requires us to wait for the qstr output before we compile the rest
of our .o files. (Only qstr.o used to wait.)

This isn't as good as the current setup with LTO though. Trinket M0
loses <1k with this setup.

So, we should probably conditionalize this along with LTO.
This commit is contained in:
Scott Shawcroft 2022-05-26 16:44:48 -07:00
parent dc5565a5ce
commit c676253834
No known key found for this signature in database
GPG Key ID: 0DFD512649C052DA
4 changed files with 21 additions and 22 deletions

View File

@ -6,9 +6,6 @@ USB_MANUFACTURER = "Adafruit"
IDF_TARGET = esp32s3
# Make room for build
CIRCUITPY_ULAB = 0
INTERNAL_FLASH_FILESYSTEM = 1
LONGINT_IMPL = MPZ

View File

@ -58,7 +58,7 @@ $(Q)$(CXX) $(CXXFLAGS) -c -MD -o $@ $<
endef
vpath %.c . $(TOP) $(USER_C_MODULES) $(DEVICES_MODULES)
$(BUILD)/%.o: %.c
$(BUILD)/%.o: %.c | $(HEADER_BUILD)/qstrdefs.generated.h $(HEADER_BUILD)/qstrdefs.enum.h
$(call compile_c)
vpath %.cpp . $(TOP) $(USER_C_MODULES)
@ -89,7 +89,7 @@ $(BUILD)/%.pp: %.c
# the right .o's to get recompiled if the generated.h file changes. Adding
# an order-only dependency to all of the .o's will cause the generated .h
# to get built before we try to compile any of them.
$(OBJ): | $(HEADER_BUILD)/qstrdefs.enum.h $(HEADER_BUILD)/mpversion.h
$(OBJ): | $(HEADER_BUILD)/mpversion.h
# The logic for qstr regeneration (applied by makeqstrdefs.py) is:
# - if anything in QSTR_GLOBAL_DEPENDENCIES is newer, then process all source files ($^)

View File

@ -134,19 +134,3 @@ char *decompress(const compressed_string_t *compressed, char *decompressed) {
decompressed[length - 1] = '\0';
return decompressed;
}
inline
// gcc10 -flto has issues with this being always_inline for debug builds.
#if CIRCUITPY_DEBUG < 1
__attribute__((always_inline))
#endif
const compressed_string_t *translate(const char *original) {
#ifndef NO_QSTR
#define QDEF(id, hash, len, str)
#define TRANSLATION(id, firstbyte, ...) if (strcmp(original, id) == 0) { static const compressed_string_t v = { .data = firstbyte, .tail = { __VA_ARGS__ } }; return &v; } else
#include "genhdr/qstrdefs.generated.h"
#undef TRANSLATION
#undef QDEF
#endif
return NULL;
}

View File

@ -27,7 +27,9 @@
#ifndef MICROPY_INCLUDED_SUPERVISOR_TRANSLATE_H
#define MICROPY_INCLUDED_SUPERVISOR_TRANSLATE_H
#include <stddef.h>
#include <stdint.h>
#include <string.h>
// The format of the compressed data is:
// - the size of the uncompressed string in UTF-8 bytes, encoded as a
@ -77,7 +79,7 @@ typedef struct compressed_string {
// Return the compressed, translated version of a source string
// Usually, due to LTO, this is optimized into a load of a constant
// pointer.
const compressed_string_t *translate(const char *c);
// const compressed_string_t *translate(const char *c);
void serial_write_compressed(const compressed_string_t *compressed);
char *decompress(const compressed_string_t *compressed, char *decompressed);
uint16_t decompress_length(const compressed_string_t *compressed);
@ -90,4 +92,20 @@ uint16_t decompress_length(const compressed_string_t *compressed);
#define MP_ERROR_TEXT(x) translate(x)
#endif
static inline
// gcc10 -flto has issues with this being always_inline for debug builds.
#if CIRCUITPY_DEBUG < 1
__attribute__((always_inline))
#endif
const compressed_string_t *translate(const char *original) {
#ifndef NO_QSTR
#define QDEF(id, hash, len, str)
#define TRANSLATION(id, firstbyte, ...) if (strcmp(original, id) == 0) { static const compressed_string_t v = { .data = firstbyte, .tail = { __VA_ARGS__ } }; return &v; } else
#include "genhdr/qstrdefs.generated.h"
#undef TRANSLATION
#undef QDEF
#endif
return NULL;
}
#endif // MICROPY_INCLUDED_SUPERVISOR_TRANSLATE_H