string compression: save a few bits per string
Length was stored as a 16-bit number always. Most translations have a max length far less. For example, US English translation lengths always fit in just 8 bits. probably all languages fit in 9 bits. This also has the side effect of reducing the alignment of compressed_string_t from 2 bytes to 1. testing performed: ran in german and english on pyruler, printed messages looked right. Firmware size, en_US Before: 3044 bytes free in flash After: 3408 bytes free in flash Firmware size, de_DE (with #2967 merged to restore translations) Before: 1236 bytes free in flash After: 1600 bytes free in flash
This commit is contained in:
parent
0db8b888d3
commit
fe3e8d1589
2
main.c
2
main.c
@ -185,7 +185,7 @@ bool maybe_run_list(const char ** filenames, pyexec_result_t* exec_result) {
|
|||||||
}
|
}
|
||||||
mp_hal_stdout_tx_str(filename);
|
mp_hal_stdout_tx_str(filename);
|
||||||
const compressed_string_t* compressed = translate(" output:\n");
|
const compressed_string_t* compressed = translate(" output:\n");
|
||||||
char decompressed[compressed->length];
|
char decompressed[decompress_length(compressed)];
|
||||||
decompress(compressed, decompressed);
|
decompress(compressed, decompressed);
|
||||||
mp_hal_stdout_tx_str(decompressed);
|
mp_hal_stdout_tx_str(decompressed);
|
||||||
pyexec_file(filename, exec_result);
|
pyexec_file(filename, exec_result);
|
||||||
|
@ -135,7 +135,7 @@ STATIC void mp_help_print_modules(void) {
|
|||||||
|
|
||||||
// let the user know there may be other modules available from the filesystem
|
// let the user know there may be other modules available from the filesystem
|
||||||
const compressed_string_t* compressed = translate("Plus any modules on the filesystem\n");
|
const compressed_string_t* compressed = translate("Plus any modules on the filesystem\n");
|
||||||
char decompressed[compressed->length];
|
char decompressed[decompress_length(compressed)];
|
||||||
decompress(compressed, decompressed);
|
decompress(compressed, decompressed);
|
||||||
mp_print_str(MP_PYTHON_PRINTER, decompressed);
|
mp_print_str(MP_PYTHON_PRINTER, decompressed);
|
||||||
}
|
}
|
||||||
@ -181,7 +181,7 @@ STATIC mp_obj_t mp_builtin_help(size_t n_args, const mp_obj_t *args) {
|
|||||||
// print a general help message. Translate only works on single strings on one line.
|
// print a general help message. Translate only works on single strings on one line.
|
||||||
const compressed_string_t* compressed =
|
const compressed_string_t* compressed =
|
||||||
translate("Welcome to Adafruit CircuitPython %s!\n\nPlease visit learn.adafruit.com/category/circuitpython for project guides.\n\nTo list built-in modules please do `help(\"modules\")`.\n");
|
translate("Welcome to Adafruit CircuitPython %s!\n\nPlease visit learn.adafruit.com/category/circuitpython for project guides.\n\nTo list built-in modules please do `help(\"modules\")`.\n");
|
||||||
char decompressed[compressed->length];
|
char decompressed[decompress_length(compressed)];
|
||||||
decompress(compressed, decompressed);
|
decompress(compressed, decompressed);
|
||||||
mp_printf(MP_PYTHON_PRINTER, decompressed, MICROPY_GIT_TAG);
|
mp_printf(MP_PYTHON_PRINTER, decompressed, MICROPY_GIT_TAG);
|
||||||
} else {
|
} else {
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
"""
|
"""
|
||||||
Process raw qstr file and output qstr data with length, hash and data bytes.
|
Process raw qstr file and output qstr data with length, hash and data bytes.
|
||||||
|
|
||||||
This script works with Python 2.6, 2.7, 3.3 and 3.4.
|
This script works with Python 2.7, 3.3 and 3.4.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from __future__ import print_function
|
from __future__ import print_function
|
||||||
@ -132,19 +132,37 @@ def compute_huffman_coding(translations, qstrs, compression_filename):
|
|||||||
print("// estimated total memory size", len(lengths) + 2*len(values) + sum(len(cb[u]) for u in all_strings_concat))
|
print("// estimated total memory size", len(lengths) + 2*len(values) + sum(len(cb[u]) for u in all_strings_concat))
|
||||||
print("//", values, lengths)
|
print("//", values, lengths)
|
||||||
values_type = "uint16_t" if max(ord(u) for u in values) > 255 else "uint8_t"
|
values_type = "uint16_t" if max(ord(u) for u in values) > 255 else "uint8_t"
|
||||||
|
max_translation_encoded_length = max(len(translation.encode("utf-8")) for original,translation in translations)
|
||||||
with open(compression_filename, "w") as f:
|
with open(compression_filename, "w") as f:
|
||||||
f.write("const uint8_t lengths[] = {{ {} }};\n".format(", ".join(map(str, lengths))))
|
f.write("const uint8_t lengths[] = {{ {} }};\n".format(", ".join(map(str, lengths))))
|
||||||
f.write("const {} values[] = {{ {} }};\n".format(values_type, ", ".join(str(ord(u)) for u in values)))
|
f.write("const {} values[] = {{ {} }};\n".format(values_type, ", ".join(str(ord(u)) for u in values)))
|
||||||
|
f.write("#define compress_max_length_bits ({})\n".format(max_translation_encoded_length.bit_length()))
|
||||||
return values, lengths
|
return values, lengths
|
||||||
|
|
||||||
def decompress(encoding_table, length, encoded):
|
def decompress(encoding_table, encoded, encoded_length_bits):
|
||||||
values, lengths = encoding_table
|
values, lengths = encoding_table
|
||||||
#print(l, encoded)
|
|
||||||
dec = []
|
dec = []
|
||||||
this_byte = 0
|
this_byte = 0
|
||||||
this_bit = 7
|
this_bit = 7
|
||||||
b = encoded[this_byte]
|
b = encoded[this_byte]
|
||||||
for i in range(length):
|
bits = 0
|
||||||
|
for i in range(encoded_length_bits):
|
||||||
|
bits <<= 1
|
||||||
|
if 0x80 & b:
|
||||||
|
bits |= 1
|
||||||
|
|
||||||
|
b <<= 1
|
||||||
|
if this_bit == 0:
|
||||||
|
this_bit = 7
|
||||||
|
this_byte += 1
|
||||||
|
if this_byte < len(encoded):
|
||||||
|
b = encoded[this_byte]
|
||||||
|
else:
|
||||||
|
this_bit -= 1
|
||||||
|
length = bits
|
||||||
|
|
||||||
|
i = 0
|
||||||
|
while i < length:
|
||||||
bits = 0
|
bits = 0
|
||||||
bit_length = 0
|
bit_length = 0
|
||||||
max_code = lengths[0]
|
max_code = lengths[0]
|
||||||
@ -170,10 +188,11 @@ def decompress(encoding_table, length, encoded):
|
|||||||
searched_length += lengths[bit_length]
|
searched_length += lengths[bit_length]
|
||||||
|
|
||||||
v = values[searched_length + bits - max_code]
|
v = values[searched_length + bits - max_code]
|
||||||
|
i += len(v.encode('utf-8'))
|
||||||
dec.append(v)
|
dec.append(v)
|
||||||
return ''.join(dec)
|
return ''.join(dec)
|
||||||
|
|
||||||
def compress(encoding_table, decompressed):
|
def compress(encoding_table, decompressed, encoded_length_bits, len_translation_encoded):
|
||||||
if not isinstance(decompressed, str):
|
if not isinstance(decompressed, str):
|
||||||
raise TypeError()
|
raise TypeError()
|
||||||
values, lengths = encoding_table
|
values, lengths = encoding_table
|
||||||
@ -182,6 +201,19 @@ def compress(encoding_table, decompressed):
|
|||||||
#print(lengths)
|
#print(lengths)
|
||||||
current_bit = 7
|
current_bit = 7
|
||||||
current_byte = 0
|
current_byte = 0
|
||||||
|
|
||||||
|
code = len_translation_encoded
|
||||||
|
bits = encoded_length_bits+1
|
||||||
|
for i in range(bits - 1, 0, -1):
|
||||||
|
if len_translation_encoded & (1 << (i - 1)):
|
||||||
|
enc[current_byte] |= 1 << current_bit
|
||||||
|
if current_bit == 0:
|
||||||
|
current_bit = 7
|
||||||
|
#print("packed {0:0{width}b}".format(enc[current_byte], width=8))
|
||||||
|
current_byte += 1
|
||||||
|
else:
|
||||||
|
current_bit -= 1
|
||||||
|
|
||||||
for c in decompressed:
|
for c in decompressed:
|
||||||
#print()
|
#print()
|
||||||
#print("char", c, values.index(c))
|
#print("char", c, values.index(c))
|
||||||
@ -342,14 +374,17 @@ def print_qstr_data(encoding_table, qcfgs, qstrs, i18ns):
|
|||||||
|
|
||||||
total_text_size = 0
|
total_text_size = 0
|
||||||
total_text_compressed_size = 0
|
total_text_compressed_size = 0
|
||||||
|
max_translation_encoded_length = max(len(translation.encode("utf-8")) for original, translation in i18ns)
|
||||||
|
encoded_length_bits = max_translation_encoded_length.bit_length()
|
||||||
for original, translation in i18ns:
|
for original, translation in i18ns:
|
||||||
translation_encoded = translation.encode("utf-8")
|
translation_encoded = translation.encode("utf-8")
|
||||||
compressed = compress(encoding_table, translation)
|
compressed = compress(encoding_table, translation, encoded_length_bits, len(translation_encoded))
|
||||||
total_text_compressed_size += len(compressed)
|
total_text_compressed_size += len(compressed)
|
||||||
decompressed = decompress(encoding_table, len(translation_encoded), compressed)
|
decompressed = decompress(encoding_table, compressed, encoded_length_bits)
|
||||||
|
assert decompressed == translation
|
||||||
for c in C_ESCAPES:
|
for c in C_ESCAPES:
|
||||||
decompressed = decompressed.replace(c, C_ESCAPES[c])
|
decompressed = decompressed.replace(c, C_ESCAPES[c])
|
||||||
print("TRANSLATION(\"{}\", {}, {{ {} }}) // {}".format(original, len(translation_encoded)+1, ", ".join(["0x{:02x}".format(x) for x in compressed]), decompressed))
|
print("TRANSLATION(\"{}\", {}) // {}".format(original, ", ".join(["{:d}".format(x) for x in compressed]), decompressed))
|
||||||
total_text_size += len(translation.encode("utf-8"))
|
total_text_size += len(translation.encode("utf-8"))
|
||||||
|
|
||||||
print()
|
print()
|
||||||
@ -385,6 +420,7 @@ if __name__ == "__main__":
|
|||||||
|
|
||||||
qcfgs, qstrs, i18ns = parse_input_headers(args.infiles)
|
qcfgs, qstrs, i18ns = parse_input_headers(args.infiles)
|
||||||
if args.translation:
|
if args.translation:
|
||||||
|
i18ns = sorted(i18ns)
|
||||||
translations = translate(args.translation, i18ns)
|
translations = translate(args.translation, i18ns)
|
||||||
encoding_table = compute_huffman_coding(translations, qstrs, args.compression_filename)
|
encoding_table = compute_huffman_coding(translations, qstrs, args.compression_filename)
|
||||||
print_qstr_data(encoding_table, qcfgs, qstrs, translations)
|
print_qstr_data(encoding_table, qcfgs, qstrs, translations)
|
||||||
|
@ -158,7 +158,7 @@ const char *mp_common_errno_to_str(mp_obj_t errno_val, char *buf, size_t len) {
|
|||||||
case ENOSPC: desc = translate("No space left on device"); break;
|
case ENOSPC: desc = translate("No space left on device"); break;
|
||||||
case EROFS: desc = translate("Read-only filesystem"); break;
|
case EROFS: desc = translate("Read-only filesystem"); break;
|
||||||
}
|
}
|
||||||
if (desc != NULL && desc->length <= len) {
|
if (desc != NULL && decompress_length(desc) <= len) {
|
||||||
decompress(desc, buf);
|
decompress(desc, buf);
|
||||||
return buf;
|
return buf;
|
||||||
}
|
}
|
||||||
|
6
py/obj.c
6
py/obj.c
@ -94,17 +94,17 @@ void mp_obj_print_exception(const mp_print_t *print, mp_obj_t exc) {
|
|||||||
assert(n % 3 == 0);
|
assert(n % 3 == 0);
|
||||||
// Decompress the format strings
|
// Decompress the format strings
|
||||||
const compressed_string_t* traceback = translate("Traceback (most recent call last):\n");
|
const compressed_string_t* traceback = translate("Traceback (most recent call last):\n");
|
||||||
char decompressed[traceback->length];
|
char decompressed[decompress_length(traceback)];
|
||||||
decompress(traceback, decompressed);
|
decompress(traceback, decompressed);
|
||||||
#if MICROPY_ENABLE_SOURCE_LINE
|
#if MICROPY_ENABLE_SOURCE_LINE
|
||||||
const compressed_string_t* frame = translate(" File \"%q\", line %d");
|
const compressed_string_t* frame = translate(" File \"%q\", line %d");
|
||||||
#else
|
#else
|
||||||
const compressed_string_t* frame = translate(" File \"%q\"");
|
const compressed_string_t* frame = translate(" File \"%q\"");
|
||||||
#endif
|
#endif
|
||||||
char decompressed_frame[frame->length];
|
char decompressed_frame[decompress_length(frame)];
|
||||||
decompress(frame, decompressed_frame);
|
decompress(frame, decompressed_frame);
|
||||||
const compressed_string_t* block_fmt = translate(", in %q\n");
|
const compressed_string_t* block_fmt = translate(", in %q\n");
|
||||||
char decompressed_block[block_fmt->length];
|
char decompressed_block[decompress_length(block_fmt)];
|
||||||
decompress(block_fmt, decompressed_block);
|
decompress(block_fmt, decompressed_block);
|
||||||
|
|
||||||
// Print the traceback
|
// Print the traceback
|
||||||
|
@ -400,7 +400,7 @@ mp_obj_t mp_obj_new_exception_msg_vlist(const mp_obj_type_t *exc_type, const com
|
|||||||
|
|
||||||
// Try to allocate memory for the message
|
// Try to allocate memory for the message
|
||||||
mp_obj_str_t *o_str = m_new_obj_maybe(mp_obj_str_t);
|
mp_obj_str_t *o_str = m_new_obj_maybe(mp_obj_str_t);
|
||||||
size_t o_str_alloc = fmt->length + 1;
|
size_t o_str_alloc = decompress_length(fmt);
|
||||||
byte *o_str_buf = m_new_maybe(byte, o_str_alloc);
|
byte *o_str_buf = m_new_maybe(byte, o_str_alloc);
|
||||||
|
|
||||||
bool used_emg_buf = false;
|
bool used_emg_buf = false;
|
||||||
@ -433,7 +433,7 @@ mp_obj_t mp_obj_new_exception_msg_vlist(const mp_obj_type_t *exc_type, const com
|
|||||||
// We have some memory to format the string
|
// We have some memory to format the string
|
||||||
struct _exc_printer_t exc_pr = {!used_emg_buf, o_str_alloc, 0, o_str_buf};
|
struct _exc_printer_t exc_pr = {!used_emg_buf, o_str_alloc, 0, o_str_buf};
|
||||||
mp_print_t print = {&exc_pr, exc_add_strn};
|
mp_print_t print = {&exc_pr, exc_add_strn};
|
||||||
char fmt_decompressed[fmt->length];
|
char fmt_decompressed[decompress_length(fmt)];
|
||||||
decompress(fmt, fmt_decompressed);
|
decompress(fmt, fmt_decompressed);
|
||||||
mp_vprintf(&print, fmt_decompressed, ap);
|
mp_vprintf(&print, fmt_decompressed, ap);
|
||||||
exc_pr.buf[exc_pr.len] = '\0';
|
exc_pr.buf[exc_pr.len] = '\0';
|
||||||
|
@ -37,7 +37,7 @@
|
|||||||
#include "supervisor/serial.h"
|
#include "supervisor/serial.h"
|
||||||
|
|
||||||
void serial_write_compressed(const compressed_string_t* compressed) {
|
void serial_write_compressed(const compressed_string_t* compressed) {
|
||||||
char decompressed[compressed->length];
|
char decompressed[decompress_length(compressed)];
|
||||||
decompress(compressed, decompressed);
|
decompress(compressed, decompressed);
|
||||||
serial_write(decompressed);
|
serial_write(decompressed);
|
||||||
}
|
}
|
||||||
@ -58,12 +58,22 @@ STATIC int put_utf8(char *buf, int u) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
uint16_t decompress_length(const compressed_string_t* compressed) {
|
||||||
|
if (compress_max_length_bits <= 8) {
|
||||||
|
return 1 + (compressed->data >> (8 - compress_max_length_bits));
|
||||||
|
} else {
|
||||||
|
return 1 + ((compressed->data * 256 + compressed->tail[0]) >> (16 - compress_max_length_bits));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
char* decompress(const compressed_string_t* compressed, char* decompressed) {
|
char* decompress(const compressed_string_t* compressed, char* decompressed) {
|
||||||
uint8_t this_byte = 0;
|
uint8_t this_byte = compress_max_length_bits / 8;
|
||||||
uint8_t this_bit = 7;
|
uint8_t this_bit = 7 - compress_max_length_bits % 8;
|
||||||
uint8_t b = compressed->data[this_byte];
|
uint8_t b = (&compressed->data)[this_byte];
|
||||||
|
uint16_t length = decompress_length(compressed);
|
||||||
|
|
||||||
// Stop one early because the last byte is always NULL.
|
// Stop one early because the last byte is always NULL.
|
||||||
for (uint16_t i = 0; i < compressed->length - 1;) {
|
for (uint16_t i = 0; i < length - 1;) {
|
||||||
uint32_t bits = 0;
|
uint32_t bits = 0;
|
||||||
uint8_t bit_length = 0;
|
uint8_t bit_length = 0;
|
||||||
uint32_t max_code = lengths[0];
|
uint32_t max_code = lengths[0];
|
||||||
@ -78,7 +88,7 @@ char* decompress(const compressed_string_t* compressed, char* decompressed) {
|
|||||||
if (this_bit == 0) {
|
if (this_bit == 0) {
|
||||||
this_bit = 7;
|
this_bit = 7;
|
||||||
this_byte += 1;
|
this_byte += 1;
|
||||||
b = compressed->data[this_byte]; // This may read past the end but its never used.
|
b = (&compressed->data)[this_byte]; // This may read past the end but its never used.
|
||||||
} else {
|
} else {
|
||||||
this_bit -= 1;
|
this_bit -= 1;
|
||||||
}
|
}
|
||||||
@ -91,14 +101,14 @@ char* decompress(const compressed_string_t* compressed, char* decompressed) {
|
|||||||
i += put_utf8(decompressed + i, values[searched_length + bits - max_code]);
|
i += put_utf8(decompressed + i, values[searched_length + bits - max_code]);
|
||||||
}
|
}
|
||||||
|
|
||||||
decompressed[compressed->length-1] = '\0';
|
decompressed[length-1] = '\0';
|
||||||
return decompressed;
|
return decompressed;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline __attribute__((always_inline)) const compressed_string_t* translate(const char* original) {
|
inline __attribute__((always_inline)) const compressed_string_t* translate(const char* original) {
|
||||||
#ifndef NO_QSTR
|
#ifndef NO_QSTR
|
||||||
#define QDEF(id, str)
|
#define QDEF(id, str)
|
||||||
#define TRANSLATION(id, len, compressed...) if (strcmp(original, id) == 0) { static const compressed_string_t v = {.length = len, .data = compressed}; return &v; } else
|
#define TRANSLATION(id, firstbyte, ...) if (strcmp(original, id) == 0) { static const compressed_string_t v = { .data = firstbyte, .tail = { __VA_ARGS__ } }; return &v; } else
|
||||||
#include "genhdr/qstrdefs.generated.h"
|
#include "genhdr/qstrdefs.generated.h"
|
||||||
#undef TRANSLATION
|
#undef TRANSLATION
|
||||||
#undef QDEF
|
#undef QDEF
|
||||||
|
@ -30,12 +30,13 @@
|
|||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
uint16_t length;
|
uint8_t data;
|
||||||
const uint8_t data[];
|
const uint8_t tail[];
|
||||||
} compressed_string_t;
|
} compressed_string_t;
|
||||||
|
|
||||||
const compressed_string_t* translate(const char* c);
|
const compressed_string_t* translate(const char* c);
|
||||||
void serial_write_compressed(const compressed_string_t* compressed);
|
void serial_write_compressed(const compressed_string_t* compressed);
|
||||||
char* decompress(const compressed_string_t* compressed, char* decompressed);
|
char* decompress(const compressed_string_t* compressed, char* decompressed);
|
||||||
|
uint16_t decompress_length(const compressed_string_t* compressed);
|
||||||
|
|
||||||
#endif // MICROPY_INCLUDED_SUPERVISOR_TRANSLATE_H
|
#endif // MICROPY_INCLUDED_SUPERVISOR_TRANSLATE_H
|
||||||
|
Loading…
Reference in New Issue
Block a user