string compression: save a few bits per string

Length was stored as a 16-bit number always.  Most translations have
a max length far less.  For example, US English translation lengths
always fit in just 8 bits.  probably all languages fit in 9 bits.

This also has the side effect of reducing the alignment of
compressed_string_t from 2 bytes to 1.

testing performed: ran in german and english on pyruler, printed messages
looked right.

Firmware size, en_US
Before: 3044 bytes free in flash
After: 3408 bytes free in flash

Firmware size, de_DE (with #2967 merged to restore translations)
Before: 1236 bytes free in flash
After: 1600 bytes free in flash
This commit is contained in:
Jeff Epler 2020-05-28 07:40:56 -05:00
parent 0db8b888d3
commit fe3e8d1589
8 changed files with 74 additions and 27 deletions

2
main.c
View File

@ -185,7 +185,7 @@ bool maybe_run_list(const char ** filenames, pyexec_result_t* exec_result) {
}
mp_hal_stdout_tx_str(filename);
const compressed_string_t* compressed = translate(" output:\n");
char decompressed[compressed->length];
char decompressed[decompress_length(compressed)];
decompress(compressed, decompressed);
mp_hal_stdout_tx_str(decompressed);
pyexec_file(filename, exec_result);

View File

@ -135,7 +135,7 @@ STATIC void mp_help_print_modules(void) {
// let the user know there may be other modules available from the filesystem
const compressed_string_t* compressed = translate("Plus any modules on the filesystem\n");
char decompressed[compressed->length];
char decompressed[decompress_length(compressed)];
decompress(compressed, decompressed);
mp_print_str(MP_PYTHON_PRINTER, decompressed);
}
@ -181,7 +181,7 @@ STATIC mp_obj_t mp_builtin_help(size_t n_args, const mp_obj_t *args) {
// print a general help message. Translate only works on single strings on one line.
const compressed_string_t* compressed =
translate("Welcome to Adafruit CircuitPython %s!\n\nPlease visit learn.adafruit.com/category/circuitpython for project guides.\n\nTo list built-in modules please do `help(\"modules\")`.\n");
char decompressed[compressed->length];
char decompressed[decompress_length(compressed)];
decompress(compressed, decompressed);
mp_printf(MP_PYTHON_PRINTER, decompressed, MICROPY_GIT_TAG);
} else {

View File

@ -1,7 +1,7 @@
"""
Process raw qstr file and output qstr data with length, hash and data bytes.
This script works with Python 2.6, 2.7, 3.3 and 3.4.
This script works with Python 2.7, 3.3 and 3.4.
"""
from __future__ import print_function
@ -132,19 +132,37 @@ def compute_huffman_coding(translations, qstrs, compression_filename):
print("// estimated total memory size", len(lengths) + 2*len(values) + sum(len(cb[u]) for u in all_strings_concat))
print("//", values, lengths)
values_type = "uint16_t" if max(ord(u) for u in values) > 255 else "uint8_t"
max_translation_encoded_length = max(len(translation.encode("utf-8")) for original,translation in translations)
with open(compression_filename, "w") as f:
f.write("const uint8_t lengths[] = {{ {} }};\n".format(", ".join(map(str, lengths))))
f.write("const {} values[] = {{ {} }};\n".format(values_type, ", ".join(str(ord(u)) for u in values)))
f.write("#define compress_max_length_bits ({})\n".format(max_translation_encoded_length.bit_length()))
return values, lengths
def decompress(encoding_table, length, encoded):
def decompress(encoding_table, encoded, encoded_length_bits):
values, lengths = encoding_table
#print(l, encoded)
dec = []
this_byte = 0
this_bit = 7
b = encoded[this_byte]
for i in range(length):
bits = 0
for i in range(encoded_length_bits):
bits <<= 1
if 0x80 & b:
bits |= 1
b <<= 1
if this_bit == 0:
this_bit = 7
this_byte += 1
if this_byte < len(encoded):
b = encoded[this_byte]
else:
this_bit -= 1
length = bits
i = 0
while i < length:
bits = 0
bit_length = 0
max_code = lengths[0]
@ -170,10 +188,11 @@ def decompress(encoding_table, length, encoded):
searched_length += lengths[bit_length]
v = values[searched_length + bits - max_code]
i += len(v.encode('utf-8'))
dec.append(v)
return ''.join(dec)
def compress(encoding_table, decompressed):
def compress(encoding_table, decompressed, encoded_length_bits, len_translation_encoded):
if not isinstance(decompressed, str):
raise TypeError()
values, lengths = encoding_table
@ -182,6 +201,19 @@ def compress(encoding_table, decompressed):
#print(lengths)
current_bit = 7
current_byte = 0
code = len_translation_encoded
bits = encoded_length_bits+1
for i in range(bits - 1, 0, -1):
if len_translation_encoded & (1 << (i - 1)):
enc[current_byte] |= 1 << current_bit
if current_bit == 0:
current_bit = 7
#print("packed {0:0{width}b}".format(enc[current_byte], width=8))
current_byte += 1
else:
current_bit -= 1
for c in decompressed:
#print()
#print("char", c, values.index(c))
@ -342,14 +374,17 @@ def print_qstr_data(encoding_table, qcfgs, qstrs, i18ns):
total_text_size = 0
total_text_compressed_size = 0
max_translation_encoded_length = max(len(translation.encode("utf-8")) for original, translation in i18ns)
encoded_length_bits = max_translation_encoded_length.bit_length()
for original, translation in i18ns:
translation_encoded = translation.encode("utf-8")
compressed = compress(encoding_table, translation)
compressed = compress(encoding_table, translation, encoded_length_bits, len(translation_encoded))
total_text_compressed_size += len(compressed)
decompressed = decompress(encoding_table, len(translation_encoded), compressed)
decompressed = decompress(encoding_table, compressed, encoded_length_bits)
assert decompressed == translation
for c in C_ESCAPES:
decompressed = decompressed.replace(c, C_ESCAPES[c])
print("TRANSLATION(\"{}\", {}, {{ {} }}) // {}".format(original, len(translation_encoded)+1, ", ".join(["0x{:02x}".format(x) for x in compressed]), decompressed))
print("TRANSLATION(\"{}\", {}) // {}".format(original, ", ".join(["{:d}".format(x) for x in compressed]), decompressed))
total_text_size += len(translation.encode("utf-8"))
print()
@ -385,6 +420,7 @@ if __name__ == "__main__":
qcfgs, qstrs, i18ns = parse_input_headers(args.infiles)
if args.translation:
i18ns = sorted(i18ns)
translations = translate(args.translation, i18ns)
encoding_table = compute_huffman_coding(translations, qstrs, args.compression_filename)
print_qstr_data(encoding_table, qcfgs, qstrs, translations)

View File

@ -158,7 +158,7 @@ const char *mp_common_errno_to_str(mp_obj_t errno_val, char *buf, size_t len) {
case ENOSPC: desc = translate("No space left on device"); break;
case EROFS: desc = translate("Read-only filesystem"); break;
}
if (desc != NULL && desc->length <= len) {
if (desc != NULL && decompress_length(desc) <= len) {
decompress(desc, buf);
return buf;
}

View File

@ -94,17 +94,17 @@ void mp_obj_print_exception(const mp_print_t *print, mp_obj_t exc) {
assert(n % 3 == 0);
// Decompress the format strings
const compressed_string_t* traceback = translate("Traceback (most recent call last):\n");
char decompressed[traceback->length];
char decompressed[decompress_length(traceback)];
decompress(traceback, decompressed);
#if MICROPY_ENABLE_SOURCE_LINE
const compressed_string_t* frame = translate(" File \"%q\", line %d");
#else
const compressed_string_t* frame = translate(" File \"%q\"");
#endif
char decompressed_frame[frame->length];
char decompressed_frame[decompress_length(frame)];
decompress(frame, decompressed_frame);
const compressed_string_t* block_fmt = translate(", in %q\n");
char decompressed_block[block_fmt->length];
char decompressed_block[decompress_length(block_fmt)];
decompress(block_fmt, decompressed_block);
// Print the traceback

View File

@ -400,7 +400,7 @@ mp_obj_t mp_obj_new_exception_msg_vlist(const mp_obj_type_t *exc_type, const com
// Try to allocate memory for the message
mp_obj_str_t *o_str = m_new_obj_maybe(mp_obj_str_t);
size_t o_str_alloc = fmt->length + 1;
size_t o_str_alloc = decompress_length(fmt);
byte *o_str_buf = m_new_maybe(byte, o_str_alloc);
bool used_emg_buf = false;
@ -433,7 +433,7 @@ mp_obj_t mp_obj_new_exception_msg_vlist(const mp_obj_type_t *exc_type, const com
// We have some memory to format the string
struct _exc_printer_t exc_pr = {!used_emg_buf, o_str_alloc, 0, o_str_buf};
mp_print_t print = {&exc_pr, exc_add_strn};
char fmt_decompressed[fmt->length];
char fmt_decompressed[decompress_length(fmt)];
decompress(fmt, fmt_decompressed);
mp_vprintf(&print, fmt_decompressed, ap);
exc_pr.buf[exc_pr.len] = '\0';

View File

@ -37,7 +37,7 @@
#include "supervisor/serial.h"
void serial_write_compressed(const compressed_string_t* compressed) {
char decompressed[compressed->length];
char decompressed[decompress_length(compressed)];
decompress(compressed, decompressed);
serial_write(decompressed);
}
@ -58,12 +58,22 @@ STATIC int put_utf8(char *buf, int u) {
}
}
uint16_t decompress_length(const compressed_string_t* compressed) {
if (compress_max_length_bits <= 8) {
return 1 + (compressed->data >> (8 - compress_max_length_bits));
} else {
return 1 + ((compressed->data * 256 + compressed->tail[0]) >> (16 - compress_max_length_bits));
}
}
char* decompress(const compressed_string_t* compressed, char* decompressed) {
uint8_t this_byte = 0;
uint8_t this_bit = 7;
uint8_t b = compressed->data[this_byte];
uint8_t this_byte = compress_max_length_bits / 8;
uint8_t this_bit = 7 - compress_max_length_bits % 8;
uint8_t b = (&compressed->data)[this_byte];
uint16_t length = decompress_length(compressed);
// Stop one early because the last byte is always NULL.
for (uint16_t i = 0; i < compressed->length - 1;) {
for (uint16_t i = 0; i < length - 1;) {
uint32_t bits = 0;
uint8_t bit_length = 0;
uint32_t max_code = lengths[0];
@ -78,7 +88,7 @@ char* decompress(const compressed_string_t* compressed, char* decompressed) {
if (this_bit == 0) {
this_bit = 7;
this_byte += 1;
b = compressed->data[this_byte]; // This may read past the end but its never used.
b = (&compressed->data)[this_byte]; // This may read past the end but its never used.
} else {
this_bit -= 1;
}
@ -91,14 +101,14 @@ char* decompress(const compressed_string_t* compressed, char* decompressed) {
i += put_utf8(decompressed + i, values[searched_length + bits - max_code]);
}
decompressed[compressed->length-1] = '\0';
decompressed[length-1] = '\0';
return decompressed;
}
inline __attribute__((always_inline)) const compressed_string_t* translate(const char* original) {
#ifndef NO_QSTR
#define QDEF(id, str)
#define TRANSLATION(id, len, compressed...) if (strcmp(original, id) == 0) { static const compressed_string_t v = {.length = len, .data = compressed}; return &v; } else
#define TRANSLATION(id, firstbyte, ...) if (strcmp(original, id) == 0) { static const compressed_string_t v = { .data = firstbyte, .tail = { __VA_ARGS__ } }; return &v; } else
#include "genhdr/qstrdefs.generated.h"
#undef TRANSLATION
#undef QDEF

View File

@ -30,12 +30,13 @@
#include <stdint.h>
typedef struct {
uint16_t length;
const uint8_t data[];
uint8_t data;
const uint8_t tail[];
} compressed_string_t;
const compressed_string_t* translate(const char* c);
void serial_write_compressed(const compressed_string_t* compressed);
char* decompress(const compressed_string_t* compressed, char* decompressed);
uint16_t decompress_length(const compressed_string_t* compressed);
#endif // MICROPY_INCLUDED_SUPERVISOR_TRANSLATE_H