circuitpython/supervisor/shared/translate.c

/*
 * This file is part of the MicroPython project, http://micropython.org/
 *
 * The MIT License (MIT)
 *
 * Copyright (c) 2018 Scott Shawcroft for Adafruit Industries
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */

#include "supervisor/shared/translate.h"

#include <stdbool.h>
#include <stdint.h>
#include <string.h>

#ifndef NO_QSTR
#include "genhdr/compression.generated.h"
#endif

#include "supervisor/serial.h"

void serial_write_compressed(const compressed_string_t* compressed) {
    char decompressed[compressed->length];
    decompress(compressed, decompressed);
    serial_write(decompressed);
}

STATIC int put_utf8(char *buf, int u) {
    if(u <= 0x7f) {
        *buf = u;
        return 1;
    } else if(u <= 0x07ff) {
        *buf++ = 0b11000000 | (u >> 6);
        *buf   = 0b10000000 | (u & 0b00111111);
        return 2;
    } else { // u <= 0xffff)
        *buf++ = 0b11000000 | (u >> 12);
        *buf   = 0b10000000 | ((u >> 6) & 0b00111111);
        *buf   = 0b10000000 | (u & 0b00111111);
        return 3;
    }
}

char* decompress(const compressed_string_t* compressed, char* decompressed) {
    uint8_t this_byte = 0;
    uint8_t this_bit = 7;
    uint8_t b = compressed->data[this_byte];
    // Stop one early because the last byte is always NULL.
    for (uint16_t i = 0; i < compressed->length - 1;) {
        uint32_t bits = 0;
        uint8_t bit_length = 0;
        uint32_t max_code = lengths[0];
        uint32_t searched_length = lengths[0];
        while (true) {
            bits <<= 1;
            if ((0x80 & b) != 0) {
                bits |= 1;
            }
            b <<= 1;
            bit_length += 1;
            if (this_bit == 0) {
                this_bit = 7;
                this_byte += 1;
                b = compressed->data[this_byte]; // This may read past the end but its never used.
            } else {
                this_bit -= 1;
            }
            if (max_code > 0 && bits < max_code) {
                break;
            }
            max_code = (max_code << 1) + lengths[bit_length];
            searched_length += lengths[bit_length];
        }
        i += put_utf8(decompressed + i, values[searched_length + bits - max_code]);
    }

    decompressed[compressed->length-1] = '\0';
    return decompressed;
}

inline __attribute__((always_inline)) const compressed_string_t* translate(const char* original) {
    #ifndef NO_QSTR
    #define QDEF(id, str)
    #define TRANSLATION(id, len, compressed...) if (strcmp(original, id) == 0) { static const compressed_string_t v = {.length = len, .data = compressed}; return &v; } else
    #include "genhdr/qstrdefs.generated.h"
    #undef TRANSLATION
    #undef QDEF
    #endif
    return NULL;
}
atmel-samd: Support auto-reset based on USB write activity. It will soft-reboot micropython after a burst of writes to the file system. This means that after you save files on your computer they will be automatically rerun. This can be disabled in the build by unsetting AUTORESET_TIMER in mpconfigboard.h. Using the REPL will also prevent the soft resets until you reset with CTRL-D manually. 2016-10-25 17:27:59 -04:00			`/*`
Modernize module and class static dicts; update freetouch 2017-08-27 15:02:50 -04:00			`* This file is part of the MicroPython project, http://micropython.org/`
atmel-samd: Support auto-reset based on USB write activity. It will soft-reboot micropython after a burst of writes to the file system. This means that after you save files on your computer they will be automatically rerun. This can be disabled in the build by unsetting AUTORESET_TIMER in mpconfigboard.h. Using the REPL will also prevent the soft resets until you reset with CTRL-D manually. 2016-10-25 17:27:59 -04:00			`*`
			`* The MIT License (MIT)`
			`*`
Support internationalisation. 2018-07-31 19:53:54 -04:00			`* Copyright (c) 2018 Scott Shawcroft for Adafruit Industries`
atmel-samd: Support auto-reset based on USB write activity. It will soft-reboot micropython after a burst of writes to the file system. This means that after you save files on your computer they will be automatically rerun. This can be disabled in the build by unsetting AUTORESET_TIMER in mpconfigboard.h. Using the REPL will also prevent the soft resets until you reset with CTRL-D manually. 2016-10-25 17:27:59 -04:00			`*`
			`* Permission is hereby granted, free of charge, to any person obtaining a copy`
			`* of this software and associated documentation files (the "Software"), to deal`
			`* in the Software without restriction, including without limitation the rights`
			`* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell`
			`* copies of the Software, and to permit persons to whom the Software is`
			`* furnished to do so, subject to the following conditions:`
			`*`
			`* The above copyright notice and this permission notice shall be included in`
			`* all copies or substantial portions of the Software.`
			`*`
			`* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR`
			`* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,`
			`* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE`
			`* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER`
			`* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,`
			`* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN`
			`* THE SOFTWARE.`
			`*/`

Support internationalisation. 2018-07-31 19:53:54 -04:00			`#include "supervisor/shared/translate.h"`
atmel-samd: Support auto-reset based on USB write activity. It will soft-reboot micropython after a burst of writes to the file system. This means that after you save files on your computer they will be automatically rerun. This can be disabled in the build by unsetting AUTORESET_TIMER in mpconfigboard.h. Using the REPL will also prevent the soft resets until you reset with CTRL-D manually. 2016-10-25 17:27:59 -04:00
Compress all translated strings with Huffman coding. This saves code space in builds which use link-time optimization. The optimization drops the untranslated strings and replaces them with a compressed_string_t struct. It can then be decompressed to a c string. Builds without LTO work as well but include both untranslated strings and compressed strings. This work could be expanded to include QSTRs and loaded strings if a compress method is added to C. Its tracked in #531. 2018-08-15 21:32:37 -04:00			`#include <stdbool.h>`
			`#include <stdint.h>`
Support internationalisation. 2018-07-31 19:53:54 -04:00			`#include <string.h>`
atmel-samd: Rework tick timer to use TC5 and support neopixel status LED. The tick timer needed to be reworked because the ASF delay functions also use the SysTick timer. Now, it uses TC5 and calls out to the autoreset logic every tick. Fixes #43. Added neopixel status colors and corrected the latch time from ms to us. Fixes #42. 2016-10-28 23:16:39 -04:00
Compress all translated strings with Huffman coding. This saves code space in builds which use link-time optimization. The optimization drops the untranslated strings and replaces them with a compressed_string_t struct. It can then be decompressed to a c string. Builds without LTO work as well but include both untranslated strings and compressed strings. This work could be expanded to include QSTRs and loaded strings if a compress method is added to C. Its tracked in #531. 2018-08-15 21:32:37 -04:00			`#ifndef NO_QSTR`
			`#include "genhdr/compression.generated.h"`
			`#endif`

Rework safe mode and have heap overwrite trigger it. This creates a common safe mode mechanic that ports can share. As a result, the nRF52 now has safe mode support as well. The common safe mode adds a 700ms delay at startup where a reset during that window will cause a reset into safe mode. This window is designated by a yellow status pixel and flashing the single led three times. A couple NeoPixel fixes are included for the nRF52 as well. Fixes #1034. Fixes #990. Fixes #615. 2018-12-06 17:24:20 -05:00			`#include "supervisor/serial.h"`

			`void serial_write_compressed(const compressed_string_t* compressed) {`
			`char decompressed[compressed->length];`
			`decompress(compressed, decompressed);`
			`serial_write(decompressed);`
			`}`

translation: Compress as unicode, not bytes By treating each unicode code-point as a single entity for huffman compression, the overall compression rate can be somewhat improved without changing the algorithm. On the decompression side, when compressed values above 127 are encountered, they need to be converted from a 16-bit Unicode code point into a UTF-8 byte sequence. Doing this returns approximately 1.5kB of flash storage with the zh_Latn_pinyin translation. (292 -> 1768 bytes remaining in my build of trinket_m0) Other "more ASCII" translations benefit less, and in fact zh_Latn_pinyin is no longer the most constrained translation! (de_DE 1156 -> 1384 bytes free in flash, I didn't check others before pushing for CI) English is slightly pessimized, 2840 -> 2788 bytes, probably mostly because the "values" array was changed from uint8_t to uint16_t, which is strictly not required for an all-ASCII translation. This could probably be avoided in this case, but as English is not the most constrained translation it doesn't really matter. Testing performed: built for feather nRF52840 express and trinket m0 in English and zh_Latn_pinyin; ran and verified the localized messages such as Àn xià rènhé jiàn jìnrù REPL. Shǐyòng CTRL-D chóngxīn jiāzài. and Press any key to enter the REPL. Use CTRL-D to reload. were properly displayed. 2019-12-02 09:41:03 -05:00			`STATIC int put_utf8(char *buf, int u) {`
			`if(u <= 0x7f) {`
			`*buf = u;`
			`return 1;`
			`} else if(u <= 0x07ff) {`
			`*buf++ = 0b11000000 \| (u >> 6);`
			`*buf = 0b10000000 \| (u & 0b00111111);`
			`return 2;`
			`} else { // u <= 0xffff)`
			`*buf++ = 0b11000000 \| (u >> 12);`
			`*buf = 0b10000000 \| ((u >> 6) & 0b00111111);`
			`*buf = 0b10000000 \| (u & 0b00111111);`
			`return 3;`
			`}`
			`}`

Compress all translated strings with Huffman coding. This saves code space in builds which use link-time optimization. The optimization drops the untranslated strings and replaces them with a compressed_string_t struct. It can then be decompressed to a c string. Builds without LTO work as well but include both untranslated strings and compressed strings. This work could be expanded to include QSTRs and loaded strings if a compress method is added to C. Its tracked in #531. 2018-08-15 21:32:37 -04:00			`char* decompress(const compressed_string_t* compressed, char* decompressed) {`
			`uint8_t this_byte = 0;`
			`uint8_t this_bit = 7;`
			`uint8_t b = compressed->data[this_byte];`
			`// Stop one early because the last byte is always NULL.`
translation: Compress as unicode, not bytes By treating each unicode code-point as a single entity for huffman compression, the overall compression rate can be somewhat improved without changing the algorithm. On the decompression side, when compressed values above 127 are encountered, they need to be converted from a 16-bit Unicode code point into a UTF-8 byte sequence. Doing this returns approximately 1.5kB of flash storage with the zh_Latn_pinyin translation. (292 -> 1768 bytes remaining in my build of trinket_m0) Other "more ASCII" translations benefit less, and in fact zh_Latn_pinyin is no longer the most constrained translation! (de_DE 1156 -> 1384 bytes free in flash, I didn't check others before pushing for CI) English is slightly pessimized, 2840 -> 2788 bytes, probably mostly because the "values" array was changed from uint8_t to uint16_t, which is strictly not required for an all-ASCII translation. This could probably be avoided in this case, but as English is not the most constrained translation it doesn't really matter. Testing performed: built for feather nRF52840 express and trinket m0 in English and zh_Latn_pinyin; ran and verified the localized messages such as Àn xià rènhé jiàn jìnrù REPL. Shǐyòng CTRL-D chóngxīn jiāzài. and Press any key to enter the REPL. Use CTRL-D to reload. were properly displayed. 2019-12-02 09:41:03 -05:00			`for (uint16_t i = 0; i < compressed->length - 1;) {`
Compress all translated strings with Huffman coding. This saves code space in builds which use link-time optimization. The optimization drops the untranslated strings and replaces them with a compressed_string_t struct. It can then be decompressed to a c string. Builds without LTO work as well but include both untranslated strings and compressed strings. This work could be expanded to include QSTRs and loaded strings if a compress method is added to C. Its tracked in #531. 2018-08-15 21:32:37 -04:00			`uint32_t bits = 0;`
			`uint8_t bit_length = 0;`
			`uint32_t max_code = lengths[0];`
			`uint32_t searched_length = lengths[0];`
			`while (true) {`
			`bits <<= 1;`
			`if ((0x80 & b) != 0) {`
			`bits \|= 1;`
			`}`
			`b <<= 1;`
			`bit_length += 1;`
			`if (this_bit == 0) {`
			`this_bit = 7;`
			`this_byte += 1;`
			`b = compressed->data[this_byte]; // This may read past the end but its never used.`
			`} else {`
			`this_bit -= 1;`
			`}`
			`if (max_code > 0 && bits < max_code) {`
			`break;`
			`}`
			`max_code = (max_code << 1) + lengths[bit_length];`
			`searched_length += lengths[bit_length];`
			`}`
translation: Compress as unicode, not bytes By treating each unicode code-point as a single entity for huffman compression, the overall compression rate can be somewhat improved without changing the algorithm. On the decompression side, when compressed values above 127 are encountered, they need to be converted from a 16-bit Unicode code point into a UTF-8 byte sequence. Doing this returns approximately 1.5kB of flash storage with the zh_Latn_pinyin translation. (292 -> 1768 bytes remaining in my build of trinket_m0) Other "more ASCII" translations benefit less, and in fact zh_Latn_pinyin is no longer the most constrained translation! (de_DE 1156 -> 1384 bytes free in flash, I didn't check others before pushing for CI) English is slightly pessimized, 2840 -> 2788 bytes, probably mostly because the "values" array was changed from uint8_t to uint16_t, which is strictly not required for an all-ASCII translation. This could probably be avoided in this case, but as English is not the most constrained translation it doesn't really matter. Testing performed: built for feather nRF52840 express and trinket m0 in English and zh_Latn_pinyin; ran and verified the localized messages such as Àn xià rènhé jiàn jìnrù REPL. Shǐyòng CTRL-D chóngxīn jiāzài. and Press any key to enter the REPL. Use CTRL-D to reload. were properly displayed. 2019-12-02 09:41:03 -05:00			`i += put_utf8(decompressed + i, values[searched_length + bits - max_code]);`
Compress all translated strings with Huffman coding. This saves code space in builds which use link-time optimization. The optimization drops the untranslated strings and replaces them with a compressed_string_t struct. It can then be decompressed to a c string. Builds without LTO work as well but include both untranslated strings and compressed strings. This work could be expanded to include QSTRs and loaded strings if a compress method is added to C. Its tracked in #531. 2018-08-15 21:32:37 -04:00			`}`

			`decompressed[compressed->length-1] = '\0';`
			`return decompressed;`
			`}`

			`inline __attribute__((always_inline)) const compressed_string_t* translate(const char* original) {`
Support internationalisation. 2018-07-31 19:53:54 -04:00			`#ifndef NO_QSTR`
			`#define QDEF(id, str)`
Fix esp and samd 2018-08-16 16:34:12 -04:00			`#define TRANSLATION(id, len, compressed...) if (strcmp(original, id) == 0) { static const compressed_string_t v = {.length = len, .data = compressed}; return &v; } else`
Support internationalisation. 2018-07-31 19:53:54 -04:00			`#include "genhdr/qstrdefs.generated.h"`
			`#undef TRANSLATION`
			`#undef QDEF`
			`#endif`
Compress all translated strings with Huffman coding. This saves code space in builds which use link-time optimization. The optimization drops the untranslated strings and replaces them with a compressed_string_t struct. It can then be decompressed to a c string. Builds without LTO work as well but include both untranslated strings and compressed strings. This work could be expanded to include QSTRs and loaded strings if a compress method is added to C. Its tracked in #531. 2018-08-15 21:32:37 -04:00			`return NULL;`
Support internationalisation. 2018-07-31 19:53:54 -04:00			`}`