Add CIRCUITPY_MESSAGE_COMPRESSION_LEVEL

to trade compile speed & flash size

Initially enable the faster mode on rp2040 and espressif, where there's
usually plenty of flash available (these advanced techniques save hundreds
to thousands of bytes, which is important on a lot of old samd21 boards
but is a drop in the lake of a 4MB flash chip)
This commit is contained in:
Jeff Epler 2023-10-20 19:02:12 +01:00
parent 770f22e5a7
commit 7ab5252cdd
No known key found for this signature in database
GPG Key ID: D5BF15AB975AB4DE
8 changed files with 32 additions and 6 deletions

View File

@ -63,6 +63,8 @@ endif
OBJ = $(PY_CORE_O) OBJ = $(PY_CORE_O)
OBJ += $(addprefix $(BUILD)/, $(SRC_C:.c=.o)) OBJ += $(addprefix $(BUILD)/, $(SRC_C:.c=.o))
# CIRCUITPY
$(BUILD)/supervisor/shared/translate/translate.o: $(HEADER_BUILD)/qstrdefs.generated.h $(HEADER_BUILD)/compressed_translations.generated.h $(BUILD)/supervisor/shared/translate/translate.o: $(HEADER_BUILD)/qstrdefs.generated.h $(HEADER_BUILD)/compressed_translations.generated.h
CIRCUITPY_MESSAGE_COMPRESSION_LEVEL = 1
include $(TOP)/py/mkrules.mk include $(TOP)/py/mkrules.mk

View File

@ -148,3 +148,6 @@ endif
# only if something else is turned off, such as HID. # only if something else is turned off, such as HID.
USB_NUM_ENDPOINT_PAIRS = 7 USB_NUM_ENDPOINT_PAIRS = 7
USB_NUM_IN_ENDPOINTS = 5 USB_NUM_IN_ENDPOINTS = 5
# Usually lots of flash space available
CIRCUITPY_MESSAGE_COMPRESSION_LEVEL ?= 1

View File

@ -52,3 +52,6 @@ USB_NUM_ENDPOINT_PAIRS = 8
INTERNAL_FLASH_FILESYSTEM = 1 INTERNAL_FLASH_FILESYSTEM = 1
CIRCUITPY_SETTABLE_PROCESSOR_FREQUENCY = 1 CIRCUITPY_SETTABLE_PROCESSOR_FREQUENCY = 1
# Usually lots of flash space available
CIRCUITPY_MESSAGE_COMPRESSION_LEVEL ?= 1

View File

@ -50,5 +50,6 @@ MICROPY_VFS_LFS2 = 0
# CIRCUITPY # CIRCUITPY
CIRCUITPY_ULAB = 1 CIRCUITPY_ULAB = 1
CIRCUITPY_MESSAGE_COMPRESSION_LEVEL = 1
MICROPY_EMIT_NATIVE = 0 MICROPY_EMIT_NATIVE = 0
CFLAGS += -DCIRCUITPY=1 CFLAGS += -DCIRCUITPY=1

View File

@ -92,3 +92,4 @@ CFLAGS += \
SRC_C += coverage.c SRC_C += coverage.c
SRC_CXX += coveragecpp.cpp SRC_CXX += coveragecpp.cpp
CIRCUITPY_MESSAGE_COMPRESSION_LEVEL = 1

View File

@ -52,6 +52,10 @@ CFLAGS += -DCIRCUITPY=$(CIRCUITPY)
CIRCUITPY_FULL_BUILD ?= 1 CIRCUITPY_FULL_BUILD ?= 1
CFLAGS += -DCIRCUITPY_FULL_BUILD=$(CIRCUITPY_FULL_BUILD) CFLAGS += -DCIRCUITPY_FULL_BUILD=$(CIRCUITPY_FULL_BUILD)
# By default, aggressively reduce the size of in-flash messages, at the cost of
# increased build time
CIRCUITPY_MESSAGE_COMPRESSION_LEVEL ?= 9
# Reduce the size of in-flash properties. Requires support in the .ld linker # Reduce the size of in-flash properties. Requires support in the .ld linker
# file, so not enabled by default. # file, so not enabled by default.
CIRCUITPY_OPTIMIZE_PROPERTY_FLASH_SIZE ?= 0 CIRCUITPY_OPTIMIZE_PROPERTY_FLASH_SIZE ?= 0

View File

@ -174,7 +174,7 @@ class EncodingTable:
qstrs_inv: object qstrs_inv: object
def compute_huffman_coding(qstrs, translation_name, translations, f): def compute_huffman_coding(qstrs, translation_name, translations, f, compression_level):
# possible future improvement: some languages are better when consider len(k) > 2. try both? # possible future improvement: some languages are better when consider len(k) > 2. try both?
qstrs = dict((k, v) for k, v in qstrs.items() if len(k) > 3) qstrs = dict((k, v) for k, v in qstrs.items() if len(k) > 3)
qstr_strs = list(qstrs.keys()) qstr_strs = list(qstrs.keys())
@ -209,6 +209,8 @@ def compute_huffman_coding(qstrs, translation_name, translations, f):
if 0x80 <= ord_c < 0xFF: if 0x80 <= ord_c < 0xFF:
end_unused = min(ord_c, end_unused) end_unused = min(ord_c, end_unused)
max_words = end_unused - 0x80 max_words = end_unused - 0x80
if compression_level < 5:
max_words = 0
bits_per_codepoint = 16 if max_ord > 255 else 8 bits_per_codepoint = 16 if max_ord > 255 else 8
values_type = "uint16_t" if max_ord > 255 else "uint8_t" values_type = "uint16_t" if max_ord > 255 else "uint8_t"
@ -298,8 +300,12 @@ def compute_huffman_coding(qstrs, translation_name, translations, f):
word = scores[0][0] word = scores[0][0]
words.append(word) words.append(word)
splitters = words[:]
if compression_level > 3:
splitters.extend(qstr_strs)
words.sort(key=len) words.sort(key=len)
extractor = TextSplitter(words + qstr_strs) extractor = TextSplitter(splitters)
counter = collections.Counter() counter = collections.Counter()
used_qstr = 0 used_qstr = 0
for t in texts: for t in texts:
@ -356,8 +362,8 @@ def compute_huffman_coding(qstrs, translation_name, translations, f):
len(translation.encode("utf-8")) for (original, translation) in translations len(translation.encode("utf-8")) for (original, translation) in translations
) )
maxlen = len(words[-1]) maxlen = len(words[-1]) if words else 0
minlen = len(words[0]) minlen = len(words[0]) if words else 0
wlencount = [len([None for w in words if len(w) == l]) for l in range(minlen, maxlen + 1)] wlencount = [len([None for w in words if len(w) == l]) for l in range(minlen, maxlen + 1)]
translation_qstr_bits = used_qstr.bit_length() translation_qstr_bits = used_qstr.bit_length()
@ -596,6 +602,12 @@ if __name__ == "__main__":
parser.add_argument( parser.add_argument(
"--translation", default=None, type=str, help="translations for i18n() items" "--translation", default=None, type=str, help="translations for i18n() items"
) )
parser.add_argument(
"--compression_level",
type=int,
default=9,
help="degree of compression (>5: construct dictionary; >3: use qstrs)",
)
parser.add_argument( parser.add_argument(
"--compression_filename", "--compression_filename",
type=argparse.FileType("w", encoding="UTF-8"), type=argparse.FileType("w", encoding="UTF-8"),
@ -619,6 +631,6 @@ if __name__ == "__main__":
i18ns = sorted(i18ns) i18ns = sorted(i18ns)
translations = translate(args.translation, i18ns) translations = translate(args.translation, i18ns)
encoding_table = compute_huffman_coding( encoding_table = compute_huffman_coding(
qstrs, args.translation, translations, args.compression_filename qstrs, args.translation, translations, args.compression_filename, args.compression_level
) )
output_translation_data(encoding_table, translations, args.translation_filename) output_translation_data(encoding_table, translations, args.translation_filename)

View File

@ -269,7 +269,7 @@ $(PY_BUILD)/translations-$(TRANSLATION).c: $(HEADER_BUILD)/compressed_translatio
$(HEADER_BUILD)/compressed_translations.generated.h: $(PY_SRC)/maketranslationdata.py $(HEADER_BUILD)/$(TRANSLATION).mo $(HEADER_BUILD)/qstrdefs.generated.h $(HEADER_BUILD)/compressed_translations.generated.h: $(PY_SRC)/maketranslationdata.py $(HEADER_BUILD)/$(TRANSLATION).mo $(HEADER_BUILD)/qstrdefs.generated.h
$(STEPECHO) "GEN $@" $(STEPECHO) "GEN $@"
$(Q)mkdir -p $(PY_BUILD) $(Q)mkdir -p $(PY_BUILD)
$(Q)$(PYTHON) $(PY_SRC)/maketranslationdata.py --compression_filename $(HEADER_BUILD)/compressed_translations.generated.h --translation $(HEADER_BUILD)/$(TRANSLATION).mo --translation_filename $(PY_BUILD)/translations-$(TRANSLATION).c --qstrdefs_filename $(HEADER_BUILD)/qstrdefs.generated.h $(HEADER_BUILD)/qstrdefs.preprocessed.h $(Q)$(PYTHON) $(PY_SRC)/maketranslationdata.py --compression_filename $(HEADER_BUILD)/compressed_translations.generated.h --translation $(HEADER_BUILD)/$(TRANSLATION).mo --translation_filename $(PY_BUILD)/translations-$(TRANSLATION).c --qstrdefs_filename $(HEADER_BUILD)/qstrdefs.generated.h --compression_level $(CIRCUITPY_MESSAGE_COMPRESSION_LEVEL) $(HEADER_BUILD)/qstrdefs.preprocessed.h
PY_CORE_O += $(PY_BUILD)/translations-$(TRANSLATION).o PY_CORE_O += $(PY_BUILD)/translations-$(TRANSLATION).o