2014-05-03 23:27:38 +01:00
|
|
|
/*
|
2017-06-30 09:22:17 +02:00
|
|
|
* This file is part of the MicroPython project, http://micropython.org/
|
2014-05-03 23:27:38 +01:00
|
|
|
*
|
|
|
|
* The MIT License (MIT)
|
|
|
|
*
|
|
|
|
* Copyright (c) 2013, 2014 Damien P. George
|
|
|
|
*
|
|
|
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
|
|
* of this software and associated documentation files (the "Software"), to deal
|
|
|
|
* in the Software without restriction, including without limitation the rights
|
|
|
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
|
|
* copies of the Software, and to permit persons to whom the Software is
|
|
|
|
* furnished to do so, subject to the following conditions:
|
|
|
|
*
|
|
|
|
* The above copyright notice and this permission notice shall be included in
|
|
|
|
* all copies or substantial portions of the Software.
|
|
|
|
*
|
|
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
|
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
|
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
|
|
* THE SOFTWARE.
|
|
|
|
*/
|
all: Unify header guard usage.
The code conventions suggest using header guards, but do not define how
those should look like and instead point to existing files. However, not
all existing files follow the same scheme, sometimes omitting header guards
altogether, sometimes using non-standard names, making it easy to
accidentally pick a "wrong" example.
This commit ensures that all header files of the MicroPython project (that
were not simply copied from somewhere else) follow the same pattern, that
was already present in the majority of files, especially in the py folder.
The rules are as follows.
Naming convention:
* start with the words MICROPY_INCLUDED
* contain the full path to the file
* replace special characters with _
In addition, there are no empty lines before #ifndef, between #ifndef and
one empty line before #endif. #endif is followed by a comment containing
the name of the guard macro.
py/grammar.h cannot use header guards by design, since it has to be
included multiple times in a single C file. Several other files also do not
need header guards as they are only used internally and guaranteed to be
included only once:
* MICROPY_MPHALPORT_H
* mpconfigboard.h
* mpconfigport.h
* mpthreadport.h
* pin_defs_*.h
* qstrdefs*.h
2017-06-29 23:14:58 +02:00
|
|
|
#ifndef MICROPY_INCLUDED_PY_EMIT_H
|
|
|
|
#define MICROPY_INCLUDED_PY_EMIT_H
|
2015-01-01 20:27:54 +00:00
|
|
|
|
|
|
|
#include "py/lexer.h"
|
|
|
|
#include "py/scope.h"
|
|
|
|
|
2013-10-04 19:53:11 +01:00
|
|
|
/* Notes on passes:
|
|
|
|
* We don't know exactly the opcodes in pass 1 because they depend on the
|
|
|
|
* closing over of variables (LOAD_CLOSURE, BUILD_TUPLE, MAKE_CLOSURE), which
|
|
|
|
* depends on determining the scope of variables in each function, and this
|
|
|
|
* is not known until the end of pass 1.
|
|
|
|
* As a consequence, we don't know the maximum stack size until the end of pass 2.
|
|
|
|
* This is problematic for some emitters (x64) since they need to know the maximum
|
2014-01-06 17:49:21 +02:00
|
|
|
* stack size to compile the entry to the function, and this affects code size.
|
2013-10-04 19:53:11 +01:00
|
|
|
*/
|
|
|
|
|
|
|
|
typedef enum {
|
2014-05-07 17:24:22 +01:00
|
|
|
MP_PASS_SCOPE = 1, // work out id's and their kind, and number of labels
|
|
|
|
MP_PASS_STACK_SIZE = 2, // work out maximum stack size
|
|
|
|
MP_PASS_CODE_SIZE = 3, // work out code size and label offsets
|
2022-03-16 09:37:58 +11:00
|
|
|
MP_PASS_EMIT = 4, // emit code (may be run multiple times if the emitter requests it)
|
2013-10-04 19:53:11 +01:00
|
|
|
} pass_kind_t;
|
|
|
|
|
2014-04-09 12:43:17 +01:00
|
|
|
#define MP_EMIT_STAR_FLAG_SINGLE (0x01)
|
|
|
|
#define MP_EMIT_STAR_FLAG_DOUBLE (0x02)
|
|
|
|
|
2014-05-30 15:20:41 +01:00
|
|
|
#define MP_EMIT_BREAK_FROM_FOR (0x8000)
|
|
|
|
|
2018-05-19 00:11:04 +10:00
|
|
|
// Kind for emit_id_ops->local()
|
|
|
|
#define MP_EMIT_IDOP_LOCAL_FAST (0)
|
|
|
|
#define MP_EMIT_IDOP_LOCAL_DEREF (1)
|
|
|
|
|
2018-05-22 21:16:30 +10:00
|
|
|
// Kind for emit_id_ops->global()
|
|
|
|
#define MP_EMIT_IDOP_GLOBAL_NAME (0)
|
|
|
|
#define MP_EMIT_IDOP_GLOBAL_GLOBAL (1)
|
|
|
|
|
2018-05-22 21:58:25 +10:00
|
|
|
// Kind for emit->import()
|
|
|
|
#define MP_EMIT_IMPORT_NAME (0)
|
|
|
|
#define MP_EMIT_IMPORT_FROM (1)
|
|
|
|
#define MP_EMIT_IMPORT_STAR (2)
|
|
|
|
|
2018-05-22 21:31:56 +10:00
|
|
|
// Kind for emit->subscr()
|
|
|
|
#define MP_EMIT_SUBSCR_LOAD (0)
|
|
|
|
#define MP_EMIT_SUBSCR_STORE (1)
|
|
|
|
#define MP_EMIT_SUBSCR_DELETE (2)
|
|
|
|
|
2018-05-22 21:43:41 +10:00
|
|
|
// Kind for emit->attr()
|
|
|
|
#define MP_EMIT_ATTR_LOAD (0)
|
|
|
|
#define MP_EMIT_ATTR_STORE (1)
|
|
|
|
#define MP_EMIT_ATTR_DELETE (2)
|
|
|
|
|
2018-05-22 22:33:26 +10:00
|
|
|
// Kind for emit->setup_block()
|
|
|
|
#define MP_EMIT_SETUP_BLOCK_WITH (0)
|
2019-09-02 20:24:01 +10:00
|
|
|
#define MP_EMIT_SETUP_BLOCK_EXCEPT (1)
|
|
|
|
#define MP_EMIT_SETUP_BLOCK_FINALLY (2)
|
2018-05-22 22:33:26 +10:00
|
|
|
|
2018-05-19 00:41:40 +10:00
|
|
|
// Kind for emit->build()
|
|
|
|
#define MP_EMIT_BUILD_TUPLE (0)
|
|
|
|
#define MP_EMIT_BUILD_LIST (1)
|
2019-09-02 20:24:01 +10:00
|
|
|
#define MP_EMIT_BUILD_MAP (2)
|
|
|
|
#define MP_EMIT_BUILD_SET (3)
|
|
|
|
#define MP_EMIT_BUILD_SLICE (4)
|
2018-05-19 00:41:40 +10:00
|
|
|
|
2018-05-19 00:30:42 +10:00
|
|
|
// Kind for emit->yield()
|
|
|
|
#define MP_EMIT_YIELD_VALUE (0)
|
|
|
|
#define MP_EMIT_YIELD_FROM (1)
|
|
|
|
|
2013-10-05 12:19:06 +01:00
|
|
|
typedef struct _emit_t emit_t;
|
|
|
|
|
py: Rework bytecode and .mpy file format to be mostly static data.
Background: .mpy files are precompiled .py files, built using mpy-cross,
that contain compiled bytecode functions (and can also contain machine
code). The benefit of using an .mpy file over a .py file is that they are
faster to import and take less memory when importing. They are also
smaller on disk.
But the real benefit of .mpy files comes when they are frozen into the
firmware. This is done by loading the .mpy file during compilation of the
firmware and turning it into a set of big C data structures (the job of
mpy-tool.py), which are then compiled and downloaded into the ROM of a
device. These C data structures can be executed in-place, ie directly from
ROM. This makes importing even faster because there is very little to do,
and also means such frozen modules take up much less RAM (because their
bytecode stays in ROM).
The downside of frozen code is that it requires recompiling and reflashing
the entire firmware. This can be a big barrier to entry, slows down
development time, and makes it harder to do OTA updates of frozen code
(because the whole firmware must be updated).
This commit attempts to solve this problem by providing a solution that
sits between loading .mpy files into RAM and freezing them into the
firmware. The .mpy file format has been reworked so that it consists of
data and bytecode which is mostly static and ready to run in-place. If
these new .mpy files are located in flash/ROM which is memory addressable,
the .mpy file can be executed (mostly) in-place.
With this approach there is still a small amount of unpacking and linking
of the .mpy file that needs to be done when it's imported, but it's still
much better than loading an .mpy from disk into RAM (although not as good
as freezing .mpy files into the firmware).
The main trick to make static .mpy files is to adjust the bytecode so any
qstrs that it references now go through a lookup table to convert from
local qstr number in the module to global qstr number in the firmware.
That means the bytecode does not need linking/rewriting of qstrs when it's
loaded. Instead only a small qstr table needs to be built (and put in RAM)
at import time. This means the bytecode itself is static/constant and can
be used directly if it's in addressable memory. Also the qstr string data
in the .mpy file, and some constant object data, can be used directly.
Note that the qstr table is global to the module (ie not per function).
In more detail, in the VM what used to be (schematically):
qst = DECODE_QSTR_VALUE;
is now (schematically):
idx = DECODE_QSTR_INDEX;
qst = qstr_table[idx];
That allows the bytecode to be fixed at compile time and not need
relinking/rewriting of the qstr values. Only qstr_table needs to be linked
when the .mpy is loaded.
Incidentally, this helps to reduce the size of bytecode because what used
to be 2-byte qstr values in the bytecode are now (mostly) 1-byte indices.
If the module uses the same qstr more than two times then the bytecode is
smaller than before.
The following changes are measured for this commit compared to the
previous (the baseline):
- average 7%-9% reduction in size of .mpy files
- frozen code size is reduced by about 5%-7%
- importing .py files uses about 5% less RAM in total
- importing .mpy files uses about 4% less RAM in total
- importing .py and .mpy files takes about the same time as before
The qstr indirection in the bytecode has only a small impact on VM
performance. For stm32 on PYBv1.0 the performance change of this commit
is:
diff of scores (higher is better)
N=100 M=100 baseline -> this-commit diff diff% (error%)
bm_chaos.py 371.07 -> 357.39 : -13.68 = -3.687% (+/-0.02%)
bm_fannkuch.py 78.72 -> 77.49 : -1.23 = -1.563% (+/-0.01%)
bm_fft.py 2591.73 -> 2539.28 : -52.45 = -2.024% (+/-0.00%)
bm_float.py 6034.93 -> 5908.30 : -126.63 = -2.098% (+/-0.01%)
bm_hexiom.py 48.96 -> 47.93 : -1.03 = -2.104% (+/-0.00%)
bm_nqueens.py 4510.63 -> 4459.94 : -50.69 = -1.124% (+/-0.00%)
bm_pidigits.py 650.28 -> 644.96 : -5.32 = -0.818% (+/-0.23%)
core_import_mpy_multi.py 564.77 -> 581.49 : +16.72 = +2.960% (+/-0.01%)
core_import_mpy_single.py 68.67 -> 67.16 : -1.51 = -2.199% (+/-0.01%)
core_qstr.py 64.16 -> 64.12 : -0.04 = -0.062% (+/-0.00%)
core_yield_from.py 362.58 -> 354.50 : -8.08 = -2.228% (+/-0.00%)
misc_aes.py 429.69 -> 405.59 : -24.10 = -5.609% (+/-0.01%)
misc_mandel.py 3485.13 -> 3416.51 : -68.62 = -1.969% (+/-0.00%)
misc_pystone.py 2496.53 -> 2405.56 : -90.97 = -3.644% (+/-0.01%)
misc_raytrace.py 381.47 -> 374.01 : -7.46 = -1.956% (+/-0.01%)
viper_call0.py 576.73 -> 572.49 : -4.24 = -0.735% (+/-0.04%)
viper_call1a.py 550.37 -> 546.21 : -4.16 = -0.756% (+/-0.09%)
viper_call1b.py 438.23 -> 435.68 : -2.55 = -0.582% (+/-0.06%)
viper_call1c.py 442.84 -> 440.04 : -2.80 = -0.632% (+/-0.08%)
viper_call2a.py 536.31 -> 532.35 : -3.96 = -0.738% (+/-0.06%)
viper_call2b.py 382.34 -> 377.07 : -5.27 = -1.378% (+/-0.03%)
And for unix on x64:
diff of scores (higher is better)
N=2000 M=2000 baseline -> this-commit diff diff% (error%)
bm_chaos.py 13594.20 -> 13073.84 : -520.36 = -3.828% (+/-5.44%)
bm_fannkuch.py 60.63 -> 59.58 : -1.05 = -1.732% (+/-3.01%)
bm_fft.py 112009.15 -> 111603.32 : -405.83 = -0.362% (+/-4.03%)
bm_float.py 246202.55 -> 247923.81 : +1721.26 = +0.699% (+/-2.79%)
bm_hexiom.py 615.65 -> 617.21 : +1.56 = +0.253% (+/-1.64%)
bm_nqueens.py 215807.95 -> 215600.96 : -206.99 = -0.096% (+/-3.52%)
bm_pidigits.py 8246.74 -> 8422.82 : +176.08 = +2.135% (+/-3.64%)
misc_aes.py 16133.00 -> 16452.74 : +319.74 = +1.982% (+/-1.50%)
misc_mandel.py 128146.69 -> 130796.43 : +2649.74 = +2.068% (+/-3.18%)
misc_pystone.py 83811.49 -> 83124.85 : -686.64 = -0.819% (+/-1.03%)
misc_raytrace.py 21688.02 -> 21385.10 : -302.92 = -1.397% (+/-3.20%)
The code size change is (firmware with a lot of frozen code benefits the
most):
bare-arm: +396 +0.697%
minimal x86: +1595 +0.979% [incl +32(data)]
unix x64: +2408 +0.470% [incl +800(data)]
unix nanbox: +1396 +0.309% [incl -96(data)]
stm32: -1256 -0.318% PYBV10
cc3200: +288 +0.157%
esp8266: -260 -0.037% GENERIC
esp32: -216 -0.014% GENERIC[incl -1072(data)]
nrf: +116 +0.067% pca10040
rp2: -664 -0.135% PICO
samd: +844 +0.607% ADAFRUIT_ITSYBITSY_M4_EXPRESS
As part of this change the .mpy file format version is bumped to version 6.
And mpy-tool.py has been improved to provide a good visualisation of the
contents of .mpy files.
In summary: this commit changes the bytecode to use qstr indirection, and
reworks the .mpy file format to be simpler and allow .mpy files to be
executed in-place. Performance is not impacted too much. Eventually it
will be possible to store such .mpy files in a linear, read-only, memory-
mappable filesystem so they can be executed from flash/ROM. This will
essentially be able to replace frozen code for most applications.
Signed-off-by: Damien George <damien@micropython.org>
2021-10-22 22:22:47 +11:00
|
|
|
typedef struct _mp_emit_common_t {
|
|
|
|
pass_kind_t pass;
|
|
|
|
uint16_t ct_cur_child;
|
|
|
|
mp_raw_code_t **children;
|
|
|
|
#if MICROPY_EMIT_BYTECODE_USES_QSTR_TABLE
|
|
|
|
mp_map_t qstr_map;
|
|
|
|
#endif
|
2022-05-07 15:51:41 +10:00
|
|
|
mp_obj_list_t const_obj_list;
|
py: Rework bytecode and .mpy file format to be mostly static data.
Background: .mpy files are precompiled .py files, built using mpy-cross,
that contain compiled bytecode functions (and can also contain machine
code). The benefit of using an .mpy file over a .py file is that they are
faster to import and take less memory when importing. They are also
smaller on disk.
But the real benefit of .mpy files comes when they are frozen into the
firmware. This is done by loading the .mpy file during compilation of the
firmware and turning it into a set of big C data structures (the job of
mpy-tool.py), which are then compiled and downloaded into the ROM of a
device. These C data structures can be executed in-place, ie directly from
ROM. This makes importing even faster because there is very little to do,
and also means such frozen modules take up much less RAM (because their
bytecode stays in ROM).
The downside of frozen code is that it requires recompiling and reflashing
the entire firmware. This can be a big barrier to entry, slows down
development time, and makes it harder to do OTA updates of frozen code
(because the whole firmware must be updated).
This commit attempts to solve this problem by providing a solution that
sits between loading .mpy files into RAM and freezing them into the
firmware. The .mpy file format has been reworked so that it consists of
data and bytecode which is mostly static and ready to run in-place. If
these new .mpy files are located in flash/ROM which is memory addressable,
the .mpy file can be executed (mostly) in-place.
With this approach there is still a small amount of unpacking and linking
of the .mpy file that needs to be done when it's imported, but it's still
much better than loading an .mpy from disk into RAM (although not as good
as freezing .mpy files into the firmware).
The main trick to make static .mpy files is to adjust the bytecode so any
qstrs that it references now go through a lookup table to convert from
local qstr number in the module to global qstr number in the firmware.
That means the bytecode does not need linking/rewriting of qstrs when it's
loaded. Instead only a small qstr table needs to be built (and put in RAM)
at import time. This means the bytecode itself is static/constant and can
be used directly if it's in addressable memory. Also the qstr string data
in the .mpy file, and some constant object data, can be used directly.
Note that the qstr table is global to the module (ie not per function).
In more detail, in the VM what used to be (schematically):
qst = DECODE_QSTR_VALUE;
is now (schematically):
idx = DECODE_QSTR_INDEX;
qst = qstr_table[idx];
That allows the bytecode to be fixed at compile time and not need
relinking/rewriting of the qstr values. Only qstr_table needs to be linked
when the .mpy is loaded.
Incidentally, this helps to reduce the size of bytecode because what used
to be 2-byte qstr values in the bytecode are now (mostly) 1-byte indices.
If the module uses the same qstr more than two times then the bytecode is
smaller than before.
The following changes are measured for this commit compared to the
previous (the baseline):
- average 7%-9% reduction in size of .mpy files
- frozen code size is reduced by about 5%-7%
- importing .py files uses about 5% less RAM in total
- importing .mpy files uses about 4% less RAM in total
- importing .py and .mpy files takes about the same time as before
The qstr indirection in the bytecode has only a small impact on VM
performance. For stm32 on PYBv1.0 the performance change of this commit
is:
diff of scores (higher is better)
N=100 M=100 baseline -> this-commit diff diff% (error%)
bm_chaos.py 371.07 -> 357.39 : -13.68 = -3.687% (+/-0.02%)
bm_fannkuch.py 78.72 -> 77.49 : -1.23 = -1.563% (+/-0.01%)
bm_fft.py 2591.73 -> 2539.28 : -52.45 = -2.024% (+/-0.00%)
bm_float.py 6034.93 -> 5908.30 : -126.63 = -2.098% (+/-0.01%)
bm_hexiom.py 48.96 -> 47.93 : -1.03 = -2.104% (+/-0.00%)
bm_nqueens.py 4510.63 -> 4459.94 : -50.69 = -1.124% (+/-0.00%)
bm_pidigits.py 650.28 -> 644.96 : -5.32 = -0.818% (+/-0.23%)
core_import_mpy_multi.py 564.77 -> 581.49 : +16.72 = +2.960% (+/-0.01%)
core_import_mpy_single.py 68.67 -> 67.16 : -1.51 = -2.199% (+/-0.01%)
core_qstr.py 64.16 -> 64.12 : -0.04 = -0.062% (+/-0.00%)
core_yield_from.py 362.58 -> 354.50 : -8.08 = -2.228% (+/-0.00%)
misc_aes.py 429.69 -> 405.59 : -24.10 = -5.609% (+/-0.01%)
misc_mandel.py 3485.13 -> 3416.51 : -68.62 = -1.969% (+/-0.00%)
misc_pystone.py 2496.53 -> 2405.56 : -90.97 = -3.644% (+/-0.01%)
misc_raytrace.py 381.47 -> 374.01 : -7.46 = -1.956% (+/-0.01%)
viper_call0.py 576.73 -> 572.49 : -4.24 = -0.735% (+/-0.04%)
viper_call1a.py 550.37 -> 546.21 : -4.16 = -0.756% (+/-0.09%)
viper_call1b.py 438.23 -> 435.68 : -2.55 = -0.582% (+/-0.06%)
viper_call1c.py 442.84 -> 440.04 : -2.80 = -0.632% (+/-0.08%)
viper_call2a.py 536.31 -> 532.35 : -3.96 = -0.738% (+/-0.06%)
viper_call2b.py 382.34 -> 377.07 : -5.27 = -1.378% (+/-0.03%)
And for unix on x64:
diff of scores (higher is better)
N=2000 M=2000 baseline -> this-commit diff diff% (error%)
bm_chaos.py 13594.20 -> 13073.84 : -520.36 = -3.828% (+/-5.44%)
bm_fannkuch.py 60.63 -> 59.58 : -1.05 = -1.732% (+/-3.01%)
bm_fft.py 112009.15 -> 111603.32 : -405.83 = -0.362% (+/-4.03%)
bm_float.py 246202.55 -> 247923.81 : +1721.26 = +0.699% (+/-2.79%)
bm_hexiom.py 615.65 -> 617.21 : +1.56 = +0.253% (+/-1.64%)
bm_nqueens.py 215807.95 -> 215600.96 : -206.99 = -0.096% (+/-3.52%)
bm_pidigits.py 8246.74 -> 8422.82 : +176.08 = +2.135% (+/-3.64%)
misc_aes.py 16133.00 -> 16452.74 : +319.74 = +1.982% (+/-1.50%)
misc_mandel.py 128146.69 -> 130796.43 : +2649.74 = +2.068% (+/-3.18%)
misc_pystone.py 83811.49 -> 83124.85 : -686.64 = -0.819% (+/-1.03%)
misc_raytrace.py 21688.02 -> 21385.10 : -302.92 = -1.397% (+/-3.20%)
The code size change is (firmware with a lot of frozen code benefits the
most):
bare-arm: +396 +0.697%
minimal x86: +1595 +0.979% [incl +32(data)]
unix x64: +2408 +0.470% [incl +800(data)]
unix nanbox: +1396 +0.309% [incl -96(data)]
stm32: -1256 -0.318% PYBV10
cc3200: +288 +0.157%
esp8266: -260 -0.037% GENERIC
esp32: -216 -0.014% GENERIC[incl -1072(data)]
nrf: +116 +0.067% pca10040
rp2: -664 -0.135% PICO
samd: +844 +0.607% ADAFRUIT_ITSYBITSY_M4_EXPRESS
As part of this change the .mpy file format version is bumped to version 6.
And mpy-tool.py has been improved to provide a good visualisation of the
contents of .mpy files.
In summary: this commit changes the bytecode to use qstr indirection, and
reworks the .mpy file format to be simpler and allow .mpy files to be
executed in-place. Performance is not impacted too much. Eventually it
will be possible to store such .mpy files in a linear, read-only, memory-
mappable filesystem so they can be executed from flash/ROM. This will
essentially be able to replace frozen code for most applications.
Signed-off-by: Damien George <damien@micropython.org>
2021-10-22 22:22:47 +11:00
|
|
|
} mp_emit_common_t;
|
|
|
|
|
2015-03-26 14:42:40 +00:00
|
|
|
typedef struct _mp_emit_method_table_id_ops_t {
|
2018-05-19 00:11:04 +10:00
|
|
|
void (*local)(emit_t *emit, qstr qst, mp_uint_t local_num, int kind);
|
2018-05-22 21:16:30 +10:00
|
|
|
void (*global)(emit_t *emit, qstr qst, int kind);
|
2015-03-26 14:42:40 +00:00
|
|
|
} mp_emit_method_table_id_ops_t;
|
|
|
|
|
2013-10-05 12:19:06 +01:00
|
|
|
typedef struct _emit_method_table_t {
|
2019-03-09 10:59:25 +11:00
|
|
|
#if MICROPY_DYNAMIC_COMPILER
|
py: Rework bytecode and .mpy file format to be mostly static data.
Background: .mpy files are precompiled .py files, built using mpy-cross,
that contain compiled bytecode functions (and can also contain machine
code). The benefit of using an .mpy file over a .py file is that they are
faster to import and take less memory when importing. They are also
smaller on disk.
But the real benefit of .mpy files comes when they are frozen into the
firmware. This is done by loading the .mpy file during compilation of the
firmware and turning it into a set of big C data structures (the job of
mpy-tool.py), which are then compiled and downloaded into the ROM of a
device. These C data structures can be executed in-place, ie directly from
ROM. This makes importing even faster because there is very little to do,
and also means such frozen modules take up much less RAM (because their
bytecode stays in ROM).
The downside of frozen code is that it requires recompiling and reflashing
the entire firmware. This can be a big barrier to entry, slows down
development time, and makes it harder to do OTA updates of frozen code
(because the whole firmware must be updated).
This commit attempts to solve this problem by providing a solution that
sits between loading .mpy files into RAM and freezing them into the
firmware. The .mpy file format has been reworked so that it consists of
data and bytecode which is mostly static and ready to run in-place. If
these new .mpy files are located in flash/ROM which is memory addressable,
the .mpy file can be executed (mostly) in-place.
With this approach there is still a small amount of unpacking and linking
of the .mpy file that needs to be done when it's imported, but it's still
much better than loading an .mpy from disk into RAM (although not as good
as freezing .mpy files into the firmware).
The main trick to make static .mpy files is to adjust the bytecode so any
qstrs that it references now go through a lookup table to convert from
local qstr number in the module to global qstr number in the firmware.
That means the bytecode does not need linking/rewriting of qstrs when it's
loaded. Instead only a small qstr table needs to be built (and put in RAM)
at import time. This means the bytecode itself is static/constant and can
be used directly if it's in addressable memory. Also the qstr string data
in the .mpy file, and some constant object data, can be used directly.
Note that the qstr table is global to the module (ie not per function).
In more detail, in the VM what used to be (schematically):
qst = DECODE_QSTR_VALUE;
is now (schematically):
idx = DECODE_QSTR_INDEX;
qst = qstr_table[idx];
That allows the bytecode to be fixed at compile time and not need
relinking/rewriting of the qstr values. Only qstr_table needs to be linked
when the .mpy is loaded.
Incidentally, this helps to reduce the size of bytecode because what used
to be 2-byte qstr values in the bytecode are now (mostly) 1-byte indices.
If the module uses the same qstr more than two times then the bytecode is
smaller than before.
The following changes are measured for this commit compared to the
previous (the baseline):
- average 7%-9% reduction in size of .mpy files
- frozen code size is reduced by about 5%-7%
- importing .py files uses about 5% less RAM in total
- importing .mpy files uses about 4% less RAM in total
- importing .py and .mpy files takes about the same time as before
The qstr indirection in the bytecode has only a small impact on VM
performance. For stm32 on PYBv1.0 the performance change of this commit
is:
diff of scores (higher is better)
N=100 M=100 baseline -> this-commit diff diff% (error%)
bm_chaos.py 371.07 -> 357.39 : -13.68 = -3.687% (+/-0.02%)
bm_fannkuch.py 78.72 -> 77.49 : -1.23 = -1.563% (+/-0.01%)
bm_fft.py 2591.73 -> 2539.28 : -52.45 = -2.024% (+/-0.00%)
bm_float.py 6034.93 -> 5908.30 : -126.63 = -2.098% (+/-0.01%)
bm_hexiom.py 48.96 -> 47.93 : -1.03 = -2.104% (+/-0.00%)
bm_nqueens.py 4510.63 -> 4459.94 : -50.69 = -1.124% (+/-0.00%)
bm_pidigits.py 650.28 -> 644.96 : -5.32 = -0.818% (+/-0.23%)
core_import_mpy_multi.py 564.77 -> 581.49 : +16.72 = +2.960% (+/-0.01%)
core_import_mpy_single.py 68.67 -> 67.16 : -1.51 = -2.199% (+/-0.01%)
core_qstr.py 64.16 -> 64.12 : -0.04 = -0.062% (+/-0.00%)
core_yield_from.py 362.58 -> 354.50 : -8.08 = -2.228% (+/-0.00%)
misc_aes.py 429.69 -> 405.59 : -24.10 = -5.609% (+/-0.01%)
misc_mandel.py 3485.13 -> 3416.51 : -68.62 = -1.969% (+/-0.00%)
misc_pystone.py 2496.53 -> 2405.56 : -90.97 = -3.644% (+/-0.01%)
misc_raytrace.py 381.47 -> 374.01 : -7.46 = -1.956% (+/-0.01%)
viper_call0.py 576.73 -> 572.49 : -4.24 = -0.735% (+/-0.04%)
viper_call1a.py 550.37 -> 546.21 : -4.16 = -0.756% (+/-0.09%)
viper_call1b.py 438.23 -> 435.68 : -2.55 = -0.582% (+/-0.06%)
viper_call1c.py 442.84 -> 440.04 : -2.80 = -0.632% (+/-0.08%)
viper_call2a.py 536.31 -> 532.35 : -3.96 = -0.738% (+/-0.06%)
viper_call2b.py 382.34 -> 377.07 : -5.27 = -1.378% (+/-0.03%)
And for unix on x64:
diff of scores (higher is better)
N=2000 M=2000 baseline -> this-commit diff diff% (error%)
bm_chaos.py 13594.20 -> 13073.84 : -520.36 = -3.828% (+/-5.44%)
bm_fannkuch.py 60.63 -> 59.58 : -1.05 = -1.732% (+/-3.01%)
bm_fft.py 112009.15 -> 111603.32 : -405.83 = -0.362% (+/-4.03%)
bm_float.py 246202.55 -> 247923.81 : +1721.26 = +0.699% (+/-2.79%)
bm_hexiom.py 615.65 -> 617.21 : +1.56 = +0.253% (+/-1.64%)
bm_nqueens.py 215807.95 -> 215600.96 : -206.99 = -0.096% (+/-3.52%)
bm_pidigits.py 8246.74 -> 8422.82 : +176.08 = +2.135% (+/-3.64%)
misc_aes.py 16133.00 -> 16452.74 : +319.74 = +1.982% (+/-1.50%)
misc_mandel.py 128146.69 -> 130796.43 : +2649.74 = +2.068% (+/-3.18%)
misc_pystone.py 83811.49 -> 83124.85 : -686.64 = -0.819% (+/-1.03%)
misc_raytrace.py 21688.02 -> 21385.10 : -302.92 = -1.397% (+/-3.20%)
The code size change is (firmware with a lot of frozen code benefits the
most):
bare-arm: +396 +0.697%
minimal x86: +1595 +0.979% [incl +32(data)]
unix x64: +2408 +0.470% [incl +800(data)]
unix nanbox: +1396 +0.309% [incl -96(data)]
stm32: -1256 -0.318% PYBV10
cc3200: +288 +0.157%
esp8266: -260 -0.037% GENERIC
esp32: -216 -0.014% GENERIC[incl -1072(data)]
nrf: +116 +0.067% pca10040
rp2: -664 -0.135% PICO
samd: +844 +0.607% ADAFRUIT_ITSYBITSY_M4_EXPRESS
As part of this change the .mpy file format version is bumped to version 6.
And mpy-tool.py has been improved to provide a good visualisation of the
contents of .mpy files.
In summary: this commit changes the bytecode to use qstr indirection, and
reworks the .mpy file format to be simpler and allow .mpy files to be
executed in-place. Performance is not impacted too much. Eventually it
will be possible to store such .mpy files in a linear, read-only, memory-
mappable filesystem so they can be executed from flash/ROM. This will
essentially be able to replace frozen code for most applications.
Signed-off-by: Damien George <damien@micropython.org>
2021-10-22 22:22:47 +11:00
|
|
|
emit_t *(*emit_new)(mp_emit_common_t * emit_common, mp_obj_t *error_slot, uint *label_slot, mp_uint_t max_num_labels);
|
2019-03-09 10:59:25 +11:00
|
|
|
void (*emit_free)(emit_t *emit);
|
|
|
|
#endif
|
|
|
|
|
2013-10-05 12:19:06 +01:00
|
|
|
void (*start_pass)(emit_t *emit, pass_kind_t pass, scope_t *scope);
|
2022-03-16 09:37:58 +11:00
|
|
|
bool (*end_pass)(emit_t *emit);
|
2014-09-08 23:05:16 +01:00
|
|
|
void (*adjust_stack_size)(emit_t *emit, mp_int_t delta);
|
2015-03-26 16:44:14 +00:00
|
|
|
void (*set_source_line)(emit_t *emit, mp_uint_t line);
|
2013-10-05 12:19:06 +01:00
|
|
|
|
2015-03-26 14:42:40 +00:00
|
|
|
mp_emit_method_table_id_ops_t load_id;
|
|
|
|
mp_emit_method_table_id_ops_t store_id;
|
|
|
|
mp_emit_method_table_id_ops_t delete_id;
|
2013-10-05 14:17:09 +01:00
|
|
|
|
2014-09-08 23:05:16 +01:00
|
|
|
void (*label_assign)(emit_t *emit, mp_uint_t l);
|
2018-05-22 21:58:25 +10:00
|
|
|
void (*import)(emit_t *emit, qstr qst, int kind);
|
2013-12-21 18:17:45 +00:00
|
|
|
void (*load_const_tok)(emit_t *emit, mp_token_kind_t tok);
|
2014-07-03 13:25:24 +01:00
|
|
|
void (*load_const_small_int)(emit_t *emit, mp_int_t arg);
|
2015-06-25 14:42:13 +00:00
|
|
|
void (*load_const_str)(emit_t *emit, qstr qst);
|
2015-11-27 12:41:25 +00:00
|
|
|
void (*load_const_obj)(emit_t *emit, mp_obj_t obj);
|
2014-04-20 17:50:40 +01:00
|
|
|
void (*load_null)(emit_t *emit);
|
2017-04-19 09:45:59 +10:00
|
|
|
void (*load_method)(emit_t *emit, qstr qst, bool is_super);
|
2013-10-05 12:19:06 +01:00
|
|
|
void (*load_build_class)(emit_t *emit);
|
2018-05-22 21:31:56 +10:00
|
|
|
void (*subscr)(emit_t *emit, int kind);
|
2018-05-22 21:43:41 +10:00
|
|
|
void (*attr)(emit_t *emit, qstr qst, int kind);
|
2013-10-05 12:19:06 +01:00
|
|
|
void (*dup_top)(emit_t *emit);
|
|
|
|
void (*dup_top_two)(emit_t *emit);
|
|
|
|
void (*pop_top)(emit_t *emit);
|
|
|
|
void (*rot_two)(emit_t *emit);
|
|
|
|
void (*rot_three)(emit_t *emit);
|
2014-09-08 23:05:16 +01:00
|
|
|
void (*jump)(emit_t *emit, mp_uint_t label);
|
2015-02-28 15:04:06 +00:00
|
|
|
void (*pop_jump_if)(emit_t *emit, bool cond, mp_uint_t label);
|
|
|
|
void (*jump_if_or_pop)(emit_t *emit, bool cond, mp_uint_t label);
|
2018-05-22 21:50:22 +10:00
|
|
|
void (*unwind_jump)(emit_t *emit, mp_uint_t label, mp_uint_t except_depth);
|
2018-05-22 22:33:26 +10:00
|
|
|
void (*setup_block)(emit_t *emit, mp_uint_t label, int kind);
|
2016-04-07 08:50:38 +01:00
|
|
|
void (*with_cleanup)(emit_t *emit, mp_uint_t label);
|
2013-10-05 12:19:06 +01:00
|
|
|
void (*end_finally)(emit_t *emit);
|
2016-01-09 23:59:52 +00:00
|
|
|
void (*get_iter)(emit_t *emit, bool use_stack);
|
2014-09-08 23:05:16 +01:00
|
|
|
void (*for_iter)(emit_t *emit, mp_uint_t label);
|
2017-01-17 15:30:18 +11:00
|
|
|
void (*for_iter_end)(emit_t *emit);
|
2019-02-15 12:18:59 +11:00
|
|
|
void (*pop_except_jump)(emit_t *emit, mp_uint_t label, bool within_exc_handler);
|
2014-03-30 13:35:08 +01:00
|
|
|
void (*unary_op)(emit_t *emit, mp_unary_op_t op);
|
|
|
|
void (*binary_op)(emit_t *emit, mp_binary_op_t op);
|
2018-05-19 00:41:40 +10:00
|
|
|
void (*build)(emit_t *emit, mp_uint_t n_args, int kind);
|
2013-10-05 12:19:06 +01:00
|
|
|
void (*store_map)(emit_t *emit);
|
2016-09-18 23:59:47 +10:00
|
|
|
void (*store_comp)(emit_t *emit, scope_kind_t kind, mp_uint_t set_stack_index);
|
2014-09-08 23:05:16 +01:00
|
|
|
void (*unpack_sequence)(emit_t *emit, mp_uint_t n_args);
|
|
|
|
void (*unpack_ex)(emit_t *emit, mp_uint_t n_left, mp_uint_t n_right);
|
|
|
|
void (*make_function)(emit_t *emit, scope_t *scope, mp_uint_t n_pos_defaults, mp_uint_t n_kw_defaults);
|
|
|
|
void (*make_closure)(emit_t *emit, scope_t *scope, mp_uint_t n_closed_over, mp_uint_t n_pos_defaults, mp_uint_t n_kw_defaults);
|
|
|
|
void (*call_function)(emit_t *emit, mp_uint_t n_positional, mp_uint_t n_keyword, mp_uint_t star_flags);
|
|
|
|
void (*call_method)(emit_t *emit, mp_uint_t n_positional, mp_uint_t n_keyword, mp_uint_t star_flags);
|
2013-10-05 12:19:06 +01:00
|
|
|
void (*return_value)(emit_t *emit);
|
2014-09-08 23:05:16 +01:00
|
|
|
void (*raise_varargs)(emit_t *emit, mp_uint_t n_args);
|
2018-05-19 00:30:42 +10:00
|
|
|
void (*yield)(emit_t *emit, int kind);
|
2014-04-20 18:02:27 +01:00
|
|
|
|
2014-06-30 05:17:25 +01:00
|
|
|
// these methods are used to control entry to/exit from an exception handler
|
|
|
|
// they may or may not emit code
|
|
|
|
void (*start_except_handler)(emit_t *emit);
|
|
|
|
void (*end_except_handler)(emit_t *emit);
|
2013-10-05 12:19:06 +01:00
|
|
|
} emit_method_table_t;
|
|
|
|
|
py: Rework bytecode and .mpy file format to be mostly static data.
Background: .mpy files are precompiled .py files, built using mpy-cross,
that contain compiled bytecode functions (and can also contain machine
code). The benefit of using an .mpy file over a .py file is that they are
faster to import and take less memory when importing. They are also
smaller on disk.
But the real benefit of .mpy files comes when they are frozen into the
firmware. This is done by loading the .mpy file during compilation of the
firmware and turning it into a set of big C data structures (the job of
mpy-tool.py), which are then compiled and downloaded into the ROM of a
device. These C data structures can be executed in-place, ie directly from
ROM. This makes importing even faster because there is very little to do,
and also means such frozen modules take up much less RAM (because their
bytecode stays in ROM).
The downside of frozen code is that it requires recompiling and reflashing
the entire firmware. This can be a big barrier to entry, slows down
development time, and makes it harder to do OTA updates of frozen code
(because the whole firmware must be updated).
This commit attempts to solve this problem by providing a solution that
sits between loading .mpy files into RAM and freezing them into the
firmware. The .mpy file format has been reworked so that it consists of
data and bytecode which is mostly static and ready to run in-place. If
these new .mpy files are located in flash/ROM which is memory addressable,
the .mpy file can be executed (mostly) in-place.
With this approach there is still a small amount of unpacking and linking
of the .mpy file that needs to be done when it's imported, but it's still
much better than loading an .mpy from disk into RAM (although not as good
as freezing .mpy files into the firmware).
The main trick to make static .mpy files is to adjust the bytecode so any
qstrs that it references now go through a lookup table to convert from
local qstr number in the module to global qstr number in the firmware.
That means the bytecode does not need linking/rewriting of qstrs when it's
loaded. Instead only a small qstr table needs to be built (and put in RAM)
at import time. This means the bytecode itself is static/constant and can
be used directly if it's in addressable memory. Also the qstr string data
in the .mpy file, and some constant object data, can be used directly.
Note that the qstr table is global to the module (ie not per function).
In more detail, in the VM what used to be (schematically):
qst = DECODE_QSTR_VALUE;
is now (schematically):
idx = DECODE_QSTR_INDEX;
qst = qstr_table[idx];
That allows the bytecode to be fixed at compile time and not need
relinking/rewriting of the qstr values. Only qstr_table needs to be linked
when the .mpy is loaded.
Incidentally, this helps to reduce the size of bytecode because what used
to be 2-byte qstr values in the bytecode are now (mostly) 1-byte indices.
If the module uses the same qstr more than two times then the bytecode is
smaller than before.
The following changes are measured for this commit compared to the
previous (the baseline):
- average 7%-9% reduction in size of .mpy files
- frozen code size is reduced by about 5%-7%
- importing .py files uses about 5% less RAM in total
- importing .mpy files uses about 4% less RAM in total
- importing .py and .mpy files takes about the same time as before
The qstr indirection in the bytecode has only a small impact on VM
performance. For stm32 on PYBv1.0 the performance change of this commit
is:
diff of scores (higher is better)
N=100 M=100 baseline -> this-commit diff diff% (error%)
bm_chaos.py 371.07 -> 357.39 : -13.68 = -3.687% (+/-0.02%)
bm_fannkuch.py 78.72 -> 77.49 : -1.23 = -1.563% (+/-0.01%)
bm_fft.py 2591.73 -> 2539.28 : -52.45 = -2.024% (+/-0.00%)
bm_float.py 6034.93 -> 5908.30 : -126.63 = -2.098% (+/-0.01%)
bm_hexiom.py 48.96 -> 47.93 : -1.03 = -2.104% (+/-0.00%)
bm_nqueens.py 4510.63 -> 4459.94 : -50.69 = -1.124% (+/-0.00%)
bm_pidigits.py 650.28 -> 644.96 : -5.32 = -0.818% (+/-0.23%)
core_import_mpy_multi.py 564.77 -> 581.49 : +16.72 = +2.960% (+/-0.01%)
core_import_mpy_single.py 68.67 -> 67.16 : -1.51 = -2.199% (+/-0.01%)
core_qstr.py 64.16 -> 64.12 : -0.04 = -0.062% (+/-0.00%)
core_yield_from.py 362.58 -> 354.50 : -8.08 = -2.228% (+/-0.00%)
misc_aes.py 429.69 -> 405.59 : -24.10 = -5.609% (+/-0.01%)
misc_mandel.py 3485.13 -> 3416.51 : -68.62 = -1.969% (+/-0.00%)
misc_pystone.py 2496.53 -> 2405.56 : -90.97 = -3.644% (+/-0.01%)
misc_raytrace.py 381.47 -> 374.01 : -7.46 = -1.956% (+/-0.01%)
viper_call0.py 576.73 -> 572.49 : -4.24 = -0.735% (+/-0.04%)
viper_call1a.py 550.37 -> 546.21 : -4.16 = -0.756% (+/-0.09%)
viper_call1b.py 438.23 -> 435.68 : -2.55 = -0.582% (+/-0.06%)
viper_call1c.py 442.84 -> 440.04 : -2.80 = -0.632% (+/-0.08%)
viper_call2a.py 536.31 -> 532.35 : -3.96 = -0.738% (+/-0.06%)
viper_call2b.py 382.34 -> 377.07 : -5.27 = -1.378% (+/-0.03%)
And for unix on x64:
diff of scores (higher is better)
N=2000 M=2000 baseline -> this-commit diff diff% (error%)
bm_chaos.py 13594.20 -> 13073.84 : -520.36 = -3.828% (+/-5.44%)
bm_fannkuch.py 60.63 -> 59.58 : -1.05 = -1.732% (+/-3.01%)
bm_fft.py 112009.15 -> 111603.32 : -405.83 = -0.362% (+/-4.03%)
bm_float.py 246202.55 -> 247923.81 : +1721.26 = +0.699% (+/-2.79%)
bm_hexiom.py 615.65 -> 617.21 : +1.56 = +0.253% (+/-1.64%)
bm_nqueens.py 215807.95 -> 215600.96 : -206.99 = -0.096% (+/-3.52%)
bm_pidigits.py 8246.74 -> 8422.82 : +176.08 = +2.135% (+/-3.64%)
misc_aes.py 16133.00 -> 16452.74 : +319.74 = +1.982% (+/-1.50%)
misc_mandel.py 128146.69 -> 130796.43 : +2649.74 = +2.068% (+/-3.18%)
misc_pystone.py 83811.49 -> 83124.85 : -686.64 = -0.819% (+/-1.03%)
misc_raytrace.py 21688.02 -> 21385.10 : -302.92 = -1.397% (+/-3.20%)
The code size change is (firmware with a lot of frozen code benefits the
most):
bare-arm: +396 +0.697%
minimal x86: +1595 +0.979% [incl +32(data)]
unix x64: +2408 +0.470% [incl +800(data)]
unix nanbox: +1396 +0.309% [incl -96(data)]
stm32: -1256 -0.318% PYBV10
cc3200: +288 +0.157%
esp8266: -260 -0.037% GENERIC
esp32: -216 -0.014% GENERIC[incl -1072(data)]
nrf: +116 +0.067% pca10040
rp2: -664 -0.135% PICO
samd: +844 +0.607% ADAFRUIT_ITSYBITSY_M4_EXPRESS
As part of this change the .mpy file format version is bumped to version 6.
And mpy-tool.py has been improved to provide a good visualisation of the
contents of .mpy files.
In summary: this commit changes the bytecode to use qstr indirection, and
reworks the .mpy file format to be simpler and allow .mpy files to be
executed in-place. Performance is not impacted too much. Eventually it
will be possible to store such .mpy files in a linear, read-only, memory-
mappable filesystem so they can be executed from flash/ROM. This will
essentially be able to replace frozen code for most applications.
Signed-off-by: Damien George <damien@micropython.org>
2021-10-22 22:22:47 +11:00
|
|
|
#if MICROPY_EMIT_BYTECODE_USES_QSTR_TABLE
|
|
|
|
qstr_short_t mp_emit_common_use_qstr(mp_emit_common_t *emit, qstr qst);
|
|
|
|
#else
|
|
|
|
static inline qstr_short_t mp_emit_common_use_qstr(mp_emit_common_t *emit, qstr qst) {
|
|
|
|
return qst;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2022-05-07 15:51:41 +10:00
|
|
|
size_t mp_emit_common_use_const_obj(mp_emit_common_t *emit, mp_obj_t const_obj);
|
py: Rework bytecode and .mpy file format to be mostly static data.
Background: .mpy files are precompiled .py files, built using mpy-cross,
that contain compiled bytecode functions (and can also contain machine
code). The benefit of using an .mpy file over a .py file is that they are
faster to import and take less memory when importing. They are also
smaller on disk.
But the real benefit of .mpy files comes when they are frozen into the
firmware. This is done by loading the .mpy file during compilation of the
firmware and turning it into a set of big C data structures (the job of
mpy-tool.py), which are then compiled and downloaded into the ROM of a
device. These C data structures can be executed in-place, ie directly from
ROM. This makes importing even faster because there is very little to do,
and also means such frozen modules take up much less RAM (because their
bytecode stays in ROM).
The downside of frozen code is that it requires recompiling and reflashing
the entire firmware. This can be a big barrier to entry, slows down
development time, and makes it harder to do OTA updates of frozen code
(because the whole firmware must be updated).
This commit attempts to solve this problem by providing a solution that
sits between loading .mpy files into RAM and freezing them into the
firmware. The .mpy file format has been reworked so that it consists of
data and bytecode which is mostly static and ready to run in-place. If
these new .mpy files are located in flash/ROM which is memory addressable,
the .mpy file can be executed (mostly) in-place.
With this approach there is still a small amount of unpacking and linking
of the .mpy file that needs to be done when it's imported, but it's still
much better than loading an .mpy from disk into RAM (although not as good
as freezing .mpy files into the firmware).
The main trick to make static .mpy files is to adjust the bytecode so any
qstrs that it references now go through a lookup table to convert from
local qstr number in the module to global qstr number in the firmware.
That means the bytecode does not need linking/rewriting of qstrs when it's
loaded. Instead only a small qstr table needs to be built (and put in RAM)
at import time. This means the bytecode itself is static/constant and can
be used directly if it's in addressable memory. Also the qstr string data
in the .mpy file, and some constant object data, can be used directly.
Note that the qstr table is global to the module (ie not per function).
In more detail, in the VM what used to be (schematically):
qst = DECODE_QSTR_VALUE;
is now (schematically):
idx = DECODE_QSTR_INDEX;
qst = qstr_table[idx];
That allows the bytecode to be fixed at compile time and not need
relinking/rewriting of the qstr values. Only qstr_table needs to be linked
when the .mpy is loaded.
Incidentally, this helps to reduce the size of bytecode because what used
to be 2-byte qstr values in the bytecode are now (mostly) 1-byte indices.
If the module uses the same qstr more than two times then the bytecode is
smaller than before.
The following changes are measured for this commit compared to the
previous (the baseline):
- average 7%-9% reduction in size of .mpy files
- frozen code size is reduced by about 5%-7%
- importing .py files uses about 5% less RAM in total
- importing .mpy files uses about 4% less RAM in total
- importing .py and .mpy files takes about the same time as before
The qstr indirection in the bytecode has only a small impact on VM
performance. For stm32 on PYBv1.0 the performance change of this commit
is:
diff of scores (higher is better)
N=100 M=100 baseline -> this-commit diff diff% (error%)
bm_chaos.py 371.07 -> 357.39 : -13.68 = -3.687% (+/-0.02%)
bm_fannkuch.py 78.72 -> 77.49 : -1.23 = -1.563% (+/-0.01%)
bm_fft.py 2591.73 -> 2539.28 : -52.45 = -2.024% (+/-0.00%)
bm_float.py 6034.93 -> 5908.30 : -126.63 = -2.098% (+/-0.01%)
bm_hexiom.py 48.96 -> 47.93 : -1.03 = -2.104% (+/-0.00%)
bm_nqueens.py 4510.63 -> 4459.94 : -50.69 = -1.124% (+/-0.00%)
bm_pidigits.py 650.28 -> 644.96 : -5.32 = -0.818% (+/-0.23%)
core_import_mpy_multi.py 564.77 -> 581.49 : +16.72 = +2.960% (+/-0.01%)
core_import_mpy_single.py 68.67 -> 67.16 : -1.51 = -2.199% (+/-0.01%)
core_qstr.py 64.16 -> 64.12 : -0.04 = -0.062% (+/-0.00%)
core_yield_from.py 362.58 -> 354.50 : -8.08 = -2.228% (+/-0.00%)
misc_aes.py 429.69 -> 405.59 : -24.10 = -5.609% (+/-0.01%)
misc_mandel.py 3485.13 -> 3416.51 : -68.62 = -1.969% (+/-0.00%)
misc_pystone.py 2496.53 -> 2405.56 : -90.97 = -3.644% (+/-0.01%)
misc_raytrace.py 381.47 -> 374.01 : -7.46 = -1.956% (+/-0.01%)
viper_call0.py 576.73 -> 572.49 : -4.24 = -0.735% (+/-0.04%)
viper_call1a.py 550.37 -> 546.21 : -4.16 = -0.756% (+/-0.09%)
viper_call1b.py 438.23 -> 435.68 : -2.55 = -0.582% (+/-0.06%)
viper_call1c.py 442.84 -> 440.04 : -2.80 = -0.632% (+/-0.08%)
viper_call2a.py 536.31 -> 532.35 : -3.96 = -0.738% (+/-0.06%)
viper_call2b.py 382.34 -> 377.07 : -5.27 = -1.378% (+/-0.03%)
And for unix on x64:
diff of scores (higher is better)
N=2000 M=2000 baseline -> this-commit diff diff% (error%)
bm_chaos.py 13594.20 -> 13073.84 : -520.36 = -3.828% (+/-5.44%)
bm_fannkuch.py 60.63 -> 59.58 : -1.05 = -1.732% (+/-3.01%)
bm_fft.py 112009.15 -> 111603.32 : -405.83 = -0.362% (+/-4.03%)
bm_float.py 246202.55 -> 247923.81 : +1721.26 = +0.699% (+/-2.79%)
bm_hexiom.py 615.65 -> 617.21 : +1.56 = +0.253% (+/-1.64%)
bm_nqueens.py 215807.95 -> 215600.96 : -206.99 = -0.096% (+/-3.52%)
bm_pidigits.py 8246.74 -> 8422.82 : +176.08 = +2.135% (+/-3.64%)
misc_aes.py 16133.00 -> 16452.74 : +319.74 = +1.982% (+/-1.50%)
misc_mandel.py 128146.69 -> 130796.43 : +2649.74 = +2.068% (+/-3.18%)
misc_pystone.py 83811.49 -> 83124.85 : -686.64 = -0.819% (+/-1.03%)
misc_raytrace.py 21688.02 -> 21385.10 : -302.92 = -1.397% (+/-3.20%)
The code size change is (firmware with a lot of frozen code benefits the
most):
bare-arm: +396 +0.697%
minimal x86: +1595 +0.979% [incl +32(data)]
unix x64: +2408 +0.470% [incl +800(data)]
unix nanbox: +1396 +0.309% [incl -96(data)]
stm32: -1256 -0.318% PYBV10
cc3200: +288 +0.157%
esp8266: -260 -0.037% GENERIC
esp32: -216 -0.014% GENERIC[incl -1072(data)]
nrf: +116 +0.067% pca10040
rp2: -664 -0.135% PICO
samd: +844 +0.607% ADAFRUIT_ITSYBITSY_M4_EXPRESS
As part of this change the .mpy file format version is bumped to version 6.
And mpy-tool.py has been improved to provide a good visualisation of the
contents of .mpy files.
In summary: this commit changes the bytecode to use qstr indirection, and
reworks the .mpy file format to be simpler and allow .mpy files to be
executed in-place. Performance is not impacted too much. Eventually it
will be possible to store such .mpy files in a linear, read-only, memory-
mappable filesystem so they can be executed from flash/ROM. This will
essentially be able to replace frozen code for most applications.
Signed-off-by: Damien George <damien@micropython.org>
2021-10-22 22:22:47 +11:00
|
|
|
|
|
|
|
static inline size_t mp_emit_common_alloc_const_child(mp_emit_common_t *emit, mp_raw_code_t *rc) {
|
|
|
|
if (emit->pass == MP_PASS_EMIT) {
|
|
|
|
emit->children[emit->ct_cur_child] = rc;
|
|
|
|
}
|
|
|
|
return emit->ct_cur_child++;
|
|
|
|
}
|
|
|
|
|
2018-10-27 22:41:21 +11:00
|
|
|
static inline void mp_emit_common_get_id_for_load(scope_t *scope, qstr qst) {
|
|
|
|
scope_find_or_add_id(scope, qst, ID_INFO_KIND_GLOBAL_IMPLICIT);
|
|
|
|
}
|
|
|
|
|
2015-03-26 14:42:40 +00:00
|
|
|
void mp_emit_common_get_id_for_modification(scope_t *scope, qstr qst);
|
|
|
|
void mp_emit_common_id_op(emit_t *emit, const mp_emit_method_table_id_ops_t *emit_method_table, scope_t *scope, qstr qst);
|
2013-10-05 13:37:10 +01:00
|
|
|
|
2013-10-05 18:08:26 +01:00
|
|
|
extern const emit_method_table_t emit_bc_method_table;
|
2013-10-08 09:05:10 +01:00
|
|
|
extern const emit_method_table_t emit_native_x64_method_table;
|
2014-09-06 23:06:36 +01:00
|
|
|
extern const emit_method_table_t emit_native_x86_method_table;
|
2013-10-08 09:05:10 +01:00
|
|
|
extern const emit_method_table_t emit_native_thumb_method_table;
|
2014-08-16 22:55:53 +02:00
|
|
|
extern const emit_method_table_t emit_native_arm_method_table;
|
2016-12-09 16:39:39 +11:00
|
|
|
extern const emit_method_table_t emit_native_xtensa_method_table;
|
2019-09-13 13:15:12 +10:00
|
|
|
extern const emit_method_table_t emit_native_xtensawin_method_table;
|
2013-10-05 18:08:26 +01:00
|
|
|
|
2015-03-26 16:44:14 +00:00
|
|
|
extern const mp_emit_method_table_id_ops_t mp_emit_bc_method_table_load_id_ops;
|
|
|
|
extern const mp_emit_method_table_id_ops_t mp_emit_bc_method_table_store_id_ops;
|
|
|
|
extern const mp_emit_method_table_id_ops_t mp_emit_bc_method_table_delete_id_ops;
|
|
|
|
|
py: Rework bytecode and .mpy file format to be mostly static data.
Background: .mpy files are precompiled .py files, built using mpy-cross,
that contain compiled bytecode functions (and can also contain machine
code). The benefit of using an .mpy file over a .py file is that they are
faster to import and take less memory when importing. They are also
smaller on disk.
But the real benefit of .mpy files comes when they are frozen into the
firmware. This is done by loading the .mpy file during compilation of the
firmware and turning it into a set of big C data structures (the job of
mpy-tool.py), which are then compiled and downloaded into the ROM of a
device. These C data structures can be executed in-place, ie directly from
ROM. This makes importing even faster because there is very little to do,
and also means such frozen modules take up much less RAM (because their
bytecode stays in ROM).
The downside of frozen code is that it requires recompiling and reflashing
the entire firmware. This can be a big barrier to entry, slows down
development time, and makes it harder to do OTA updates of frozen code
(because the whole firmware must be updated).
This commit attempts to solve this problem by providing a solution that
sits between loading .mpy files into RAM and freezing them into the
firmware. The .mpy file format has been reworked so that it consists of
data and bytecode which is mostly static and ready to run in-place. If
these new .mpy files are located in flash/ROM which is memory addressable,
the .mpy file can be executed (mostly) in-place.
With this approach there is still a small amount of unpacking and linking
of the .mpy file that needs to be done when it's imported, but it's still
much better than loading an .mpy from disk into RAM (although not as good
as freezing .mpy files into the firmware).
The main trick to make static .mpy files is to adjust the bytecode so any
qstrs that it references now go through a lookup table to convert from
local qstr number in the module to global qstr number in the firmware.
That means the bytecode does not need linking/rewriting of qstrs when it's
loaded. Instead only a small qstr table needs to be built (and put in RAM)
at import time. This means the bytecode itself is static/constant and can
be used directly if it's in addressable memory. Also the qstr string data
in the .mpy file, and some constant object data, can be used directly.
Note that the qstr table is global to the module (ie not per function).
In more detail, in the VM what used to be (schematically):
qst = DECODE_QSTR_VALUE;
is now (schematically):
idx = DECODE_QSTR_INDEX;
qst = qstr_table[idx];
That allows the bytecode to be fixed at compile time and not need
relinking/rewriting of the qstr values. Only qstr_table needs to be linked
when the .mpy is loaded.
Incidentally, this helps to reduce the size of bytecode because what used
to be 2-byte qstr values in the bytecode are now (mostly) 1-byte indices.
If the module uses the same qstr more than two times then the bytecode is
smaller than before.
The following changes are measured for this commit compared to the
previous (the baseline):
- average 7%-9% reduction in size of .mpy files
- frozen code size is reduced by about 5%-7%
- importing .py files uses about 5% less RAM in total
- importing .mpy files uses about 4% less RAM in total
- importing .py and .mpy files takes about the same time as before
The qstr indirection in the bytecode has only a small impact on VM
performance. For stm32 on PYBv1.0 the performance change of this commit
is:
diff of scores (higher is better)
N=100 M=100 baseline -> this-commit diff diff% (error%)
bm_chaos.py 371.07 -> 357.39 : -13.68 = -3.687% (+/-0.02%)
bm_fannkuch.py 78.72 -> 77.49 : -1.23 = -1.563% (+/-0.01%)
bm_fft.py 2591.73 -> 2539.28 : -52.45 = -2.024% (+/-0.00%)
bm_float.py 6034.93 -> 5908.30 : -126.63 = -2.098% (+/-0.01%)
bm_hexiom.py 48.96 -> 47.93 : -1.03 = -2.104% (+/-0.00%)
bm_nqueens.py 4510.63 -> 4459.94 : -50.69 = -1.124% (+/-0.00%)
bm_pidigits.py 650.28 -> 644.96 : -5.32 = -0.818% (+/-0.23%)
core_import_mpy_multi.py 564.77 -> 581.49 : +16.72 = +2.960% (+/-0.01%)
core_import_mpy_single.py 68.67 -> 67.16 : -1.51 = -2.199% (+/-0.01%)
core_qstr.py 64.16 -> 64.12 : -0.04 = -0.062% (+/-0.00%)
core_yield_from.py 362.58 -> 354.50 : -8.08 = -2.228% (+/-0.00%)
misc_aes.py 429.69 -> 405.59 : -24.10 = -5.609% (+/-0.01%)
misc_mandel.py 3485.13 -> 3416.51 : -68.62 = -1.969% (+/-0.00%)
misc_pystone.py 2496.53 -> 2405.56 : -90.97 = -3.644% (+/-0.01%)
misc_raytrace.py 381.47 -> 374.01 : -7.46 = -1.956% (+/-0.01%)
viper_call0.py 576.73 -> 572.49 : -4.24 = -0.735% (+/-0.04%)
viper_call1a.py 550.37 -> 546.21 : -4.16 = -0.756% (+/-0.09%)
viper_call1b.py 438.23 -> 435.68 : -2.55 = -0.582% (+/-0.06%)
viper_call1c.py 442.84 -> 440.04 : -2.80 = -0.632% (+/-0.08%)
viper_call2a.py 536.31 -> 532.35 : -3.96 = -0.738% (+/-0.06%)
viper_call2b.py 382.34 -> 377.07 : -5.27 = -1.378% (+/-0.03%)
And for unix on x64:
diff of scores (higher is better)
N=2000 M=2000 baseline -> this-commit diff diff% (error%)
bm_chaos.py 13594.20 -> 13073.84 : -520.36 = -3.828% (+/-5.44%)
bm_fannkuch.py 60.63 -> 59.58 : -1.05 = -1.732% (+/-3.01%)
bm_fft.py 112009.15 -> 111603.32 : -405.83 = -0.362% (+/-4.03%)
bm_float.py 246202.55 -> 247923.81 : +1721.26 = +0.699% (+/-2.79%)
bm_hexiom.py 615.65 -> 617.21 : +1.56 = +0.253% (+/-1.64%)
bm_nqueens.py 215807.95 -> 215600.96 : -206.99 = -0.096% (+/-3.52%)
bm_pidigits.py 8246.74 -> 8422.82 : +176.08 = +2.135% (+/-3.64%)
misc_aes.py 16133.00 -> 16452.74 : +319.74 = +1.982% (+/-1.50%)
misc_mandel.py 128146.69 -> 130796.43 : +2649.74 = +2.068% (+/-3.18%)
misc_pystone.py 83811.49 -> 83124.85 : -686.64 = -0.819% (+/-1.03%)
misc_raytrace.py 21688.02 -> 21385.10 : -302.92 = -1.397% (+/-3.20%)
The code size change is (firmware with a lot of frozen code benefits the
most):
bare-arm: +396 +0.697%
minimal x86: +1595 +0.979% [incl +32(data)]
unix x64: +2408 +0.470% [incl +800(data)]
unix nanbox: +1396 +0.309% [incl -96(data)]
stm32: -1256 -0.318% PYBV10
cc3200: +288 +0.157%
esp8266: -260 -0.037% GENERIC
esp32: -216 -0.014% GENERIC[incl -1072(data)]
nrf: +116 +0.067% pca10040
rp2: -664 -0.135% PICO
samd: +844 +0.607% ADAFRUIT_ITSYBITSY_M4_EXPRESS
As part of this change the .mpy file format version is bumped to version 6.
And mpy-tool.py has been improved to provide a good visualisation of the
contents of .mpy files.
In summary: this commit changes the bytecode to use qstr indirection, and
reworks the .mpy file format to be simpler and allow .mpy files to be
executed in-place. Performance is not impacted too much. Eventually it
will be possible to store such .mpy files in a linear, read-only, memory-
mappable filesystem so they can be executed from flash/ROM. This will
essentially be able to replace frozen code for most applications.
Signed-off-by: Damien George <damien@micropython.org>
2021-10-22 22:22:47 +11:00
|
|
|
emit_t *emit_bc_new(mp_emit_common_t *emit_common);
|
|
|
|
emit_t *emit_native_x64_new(mp_emit_common_t *emit_common, mp_obj_t *error_slot, uint *label_slot, mp_uint_t max_num_labels);
|
|
|
|
emit_t *emit_native_x86_new(mp_emit_common_t *emit_common, mp_obj_t *error_slot, uint *label_slot, mp_uint_t max_num_labels);
|
|
|
|
emit_t *emit_native_thumb_new(mp_emit_common_t *emit_common, mp_obj_t *error_slot, uint *label_slot, mp_uint_t max_num_labels);
|
|
|
|
emit_t *emit_native_arm_new(mp_emit_common_t *emit_common, mp_obj_t *error_slot, uint *label_slot, mp_uint_t max_num_labels);
|
|
|
|
emit_t *emit_native_xtensa_new(mp_emit_common_t *emit_common, mp_obj_t *error_slot, uint *label_slot, mp_uint_t max_num_labels);
|
|
|
|
emit_t *emit_native_xtensawin_new(mp_emit_common_t *emit_common, mp_obj_t *error_slot, uint *label_slot, mp_uint_t max_num_labels);
|
2013-10-05 23:17:28 +01:00
|
|
|
|
2015-03-26 15:49:53 +00:00
|
|
|
void emit_bc_set_max_num_labels(emit_t *emit, mp_uint_t max_num_labels);
|
|
|
|
|
2014-01-24 22:42:28 +00:00
|
|
|
void emit_bc_free(emit_t *emit);
|
|
|
|
void emit_native_x64_free(emit_t *emit);
|
2014-09-06 23:06:36 +01:00
|
|
|
void emit_native_x86_free(emit_t *emit);
|
2014-01-24 22:42:28 +00:00
|
|
|
void emit_native_thumb_free(emit_t *emit);
|
2014-08-16 22:55:53 +02:00
|
|
|
void emit_native_arm_free(emit_t *emit);
|
2016-12-09 16:39:39 +11:00
|
|
|
void emit_native_xtensa_free(emit_t *emit);
|
2019-09-13 13:15:12 +10:00
|
|
|
void emit_native_xtensawin_free(emit_t *emit);
|
2014-01-24 22:42:28 +00:00
|
|
|
|
2015-03-26 16:44:14 +00:00
|
|
|
void mp_emit_bc_start_pass(emit_t *emit, pass_kind_t pass, scope_t *scope);
|
2022-03-16 09:37:58 +11:00
|
|
|
bool mp_emit_bc_end_pass(emit_t *emit);
|
2015-03-26 16:44:14 +00:00
|
|
|
void mp_emit_bc_adjust_stack_size(emit_t *emit, mp_int_t delta);
|
|
|
|
void mp_emit_bc_set_source_line(emit_t *emit, mp_uint_t line);
|
|
|
|
|
2018-05-19 00:11:04 +10:00
|
|
|
void mp_emit_bc_load_local(emit_t *emit, qstr qst, mp_uint_t local_num, int kind);
|
2018-05-22 21:16:30 +10:00
|
|
|
void mp_emit_bc_load_global(emit_t *emit, qstr qst, int kind);
|
2018-05-19 00:11:04 +10:00
|
|
|
void mp_emit_bc_store_local(emit_t *emit, qstr qst, mp_uint_t local_num, int kind);
|
2018-05-22 21:16:30 +10:00
|
|
|
void mp_emit_bc_store_global(emit_t *emit, qstr qst, int kind);
|
2018-05-19 00:11:04 +10:00
|
|
|
void mp_emit_bc_delete_local(emit_t *emit, qstr qst, mp_uint_t local_num, int kind);
|
2018-05-22 21:16:30 +10:00
|
|
|
void mp_emit_bc_delete_global(emit_t *emit, qstr qst, int kind);
|
2015-03-26 16:44:14 +00:00
|
|
|
|
|
|
|
void mp_emit_bc_label_assign(emit_t *emit, mp_uint_t l);
|
2018-05-22 21:58:25 +10:00
|
|
|
void mp_emit_bc_import(emit_t *emit, qstr qst, int kind);
|
2015-03-26 16:44:14 +00:00
|
|
|
void mp_emit_bc_load_const_tok(emit_t *emit, mp_token_kind_t tok);
|
|
|
|
void mp_emit_bc_load_const_small_int(emit_t *emit, mp_int_t arg);
|
2015-06-25 14:42:13 +00:00
|
|
|
void mp_emit_bc_load_const_str(emit_t *emit, qstr qst);
|
2015-11-27 12:41:25 +00:00
|
|
|
void mp_emit_bc_load_const_obj(emit_t *emit, mp_obj_t obj);
|
2015-03-26 16:44:14 +00:00
|
|
|
void mp_emit_bc_load_null(emit_t *emit);
|
2017-04-19 09:45:59 +10:00
|
|
|
void mp_emit_bc_load_method(emit_t *emit, qstr qst, bool is_super);
|
2015-03-26 16:44:14 +00:00
|
|
|
void mp_emit_bc_load_build_class(emit_t *emit);
|
2018-05-22 21:31:56 +10:00
|
|
|
void mp_emit_bc_subscr(emit_t *emit, int kind);
|
2018-05-22 21:43:41 +10:00
|
|
|
void mp_emit_bc_attr(emit_t *emit, qstr qst, int kind);
|
2015-03-26 16:44:14 +00:00
|
|
|
void mp_emit_bc_dup_top(emit_t *emit);
|
|
|
|
void mp_emit_bc_dup_top_two(emit_t *emit);
|
|
|
|
void mp_emit_bc_pop_top(emit_t *emit);
|
|
|
|
void mp_emit_bc_rot_two(emit_t *emit);
|
|
|
|
void mp_emit_bc_rot_three(emit_t *emit);
|
|
|
|
void mp_emit_bc_jump(emit_t *emit, mp_uint_t label);
|
|
|
|
void mp_emit_bc_pop_jump_if(emit_t *emit, bool cond, mp_uint_t label);
|
|
|
|
void mp_emit_bc_jump_if_or_pop(emit_t *emit, bool cond, mp_uint_t label);
|
|
|
|
void mp_emit_bc_unwind_jump(emit_t *emit, mp_uint_t label, mp_uint_t except_depth);
|
2018-05-22 22:33:26 +10:00
|
|
|
void mp_emit_bc_setup_block(emit_t *emit, mp_uint_t label, int kind);
|
2016-04-07 08:50:38 +01:00
|
|
|
void mp_emit_bc_with_cleanup(emit_t *emit, mp_uint_t label);
|
2015-03-26 16:44:14 +00:00
|
|
|
void mp_emit_bc_end_finally(emit_t *emit);
|
2016-01-09 23:59:52 +00:00
|
|
|
void mp_emit_bc_get_iter(emit_t *emit, bool use_stack);
|
2015-03-26 16:44:14 +00:00
|
|
|
void mp_emit_bc_for_iter(emit_t *emit, mp_uint_t label);
|
2017-01-17 15:30:18 +11:00
|
|
|
void mp_emit_bc_for_iter_end(emit_t *emit);
|
2019-02-15 12:18:59 +11:00
|
|
|
void mp_emit_bc_pop_except_jump(emit_t *emit, mp_uint_t label, bool within_exc_handler);
|
2015-03-26 16:44:14 +00:00
|
|
|
void mp_emit_bc_unary_op(emit_t *emit, mp_unary_op_t op);
|
|
|
|
void mp_emit_bc_binary_op(emit_t *emit, mp_binary_op_t op);
|
2018-05-19 00:41:40 +10:00
|
|
|
void mp_emit_bc_build(emit_t *emit, mp_uint_t n_args, int kind);
|
2015-03-26 16:44:14 +00:00
|
|
|
void mp_emit_bc_store_map(emit_t *emit);
|
2016-09-18 23:59:47 +10:00
|
|
|
void mp_emit_bc_store_comp(emit_t *emit, scope_kind_t kind, mp_uint_t list_stack_index);
|
2015-03-26 16:44:14 +00:00
|
|
|
void mp_emit_bc_unpack_sequence(emit_t *emit, mp_uint_t n_args);
|
|
|
|
void mp_emit_bc_unpack_ex(emit_t *emit, mp_uint_t n_left, mp_uint_t n_right);
|
|
|
|
void mp_emit_bc_make_function(emit_t *emit, scope_t *scope, mp_uint_t n_pos_defaults, mp_uint_t n_kw_defaults);
|
|
|
|
void mp_emit_bc_make_closure(emit_t *emit, scope_t *scope, mp_uint_t n_closed_over, mp_uint_t n_pos_defaults, mp_uint_t n_kw_defaults);
|
|
|
|
void mp_emit_bc_call_function(emit_t *emit, mp_uint_t n_positional, mp_uint_t n_keyword, mp_uint_t star_flags);
|
|
|
|
void mp_emit_bc_call_method(emit_t *emit, mp_uint_t n_positional, mp_uint_t n_keyword, mp_uint_t star_flags);
|
|
|
|
void mp_emit_bc_return_value(emit_t *emit);
|
|
|
|
void mp_emit_bc_raise_varargs(emit_t *emit, mp_uint_t n_args);
|
2018-05-19 00:30:42 +10:00
|
|
|
void mp_emit_bc_yield(emit_t *emit, int kind);
|
2015-03-26 16:44:14 +00:00
|
|
|
void mp_emit_bc_start_except_handler(emit_t *emit);
|
|
|
|
void mp_emit_bc_end_except_handler(emit_t *emit);
|
|
|
|
|
2013-10-05 23:17:28 +01:00
|
|
|
typedef struct _emit_inline_asm_t emit_inline_asm_t;
|
|
|
|
|
|
|
|
typedef struct _emit_inline_asm_method_table_t {
|
2019-03-09 12:32:09 +11:00
|
|
|
#if MICROPY_DYNAMIC_COMPILER
|
|
|
|
emit_inline_asm_t *(*asm_new)(mp_uint_t max_num_labels);
|
|
|
|
void (*asm_free)(emit_inline_asm_t *emit);
|
|
|
|
#endif
|
|
|
|
|
2016-12-09 21:23:17 +11:00
|
|
|
void (*start_pass)(emit_inline_asm_t *emit, pass_kind_t pass, mp_obj_t *error_slot);
|
2016-01-15 15:20:43 +00:00
|
|
|
void (*end_pass)(emit_inline_asm_t *emit, mp_uint_t type_sig);
|
2014-09-08 23:05:16 +01:00
|
|
|
mp_uint_t (*count_params)(emit_inline_asm_t *emit, mp_uint_t n_params, mp_parse_node_t *pn_params);
|
2015-03-03 17:08:02 +00:00
|
|
|
bool (*label)(emit_inline_asm_t *emit, mp_uint_t label_num, qstr label_id);
|
2014-09-08 23:05:16 +01:00
|
|
|
void (*op)(emit_inline_asm_t *emit, qstr op, mp_uint_t n_args, mp_parse_node_t *pn_args);
|
2013-10-05 23:17:28 +01:00
|
|
|
} emit_inline_asm_method_table_t;
|
|
|
|
|
|
|
|
extern const emit_inline_asm_method_table_t emit_inline_thumb_method_table;
|
py: Add inline Xtensa assembler.
This patch adds the MICROPY_EMIT_INLINE_XTENSA option, which, when
enabled, allows the @micropython.asm_xtensa decorator to be used.
The following opcodes are currently supported (ax is a register, a0-a15):
ret_n()
callx0(ax)
j(label)
jx(ax)
beqz(ax, label)
bnez(ax, label)
mov(ax, ay)
movi(ax, imm) # imm can be full 32-bit, uses l32r if needed
and_(ax, ay, az)
or_(ax, ay, az)
xor(ax, ay, az)
add(ax, ay, az)
sub(ax, ay, az)
mull(ax, ay, az)
l8ui(ax, ay, imm)
l16ui(ax, ay, imm)
l32i(ax, ay, imm)
s8i(ax, ay, imm)
s16i(ax, ay, imm)
s32i(ax, ay, imm)
l16si(ax, ay, imm)
addi(ax, ay, imm)
ball(ax, ay, label)
bany(ax, ay, label)
bbc(ax, ay, label)
bbs(ax, ay, label)
beq(ax, ay, label)
bge(ax, ay, label)
bgeu(ax, ay, label)
blt(ax, ay, label)
bnall(ax, ay, label)
bne(ax, ay, label)
bnone(ax, ay, label)
Upon entry to the assembly function the registers a0, a12, a13, a14 are
pushed to the stack and the stack pointer (a1) decreased by 16. Upon
exit, these registers and the stack pointer are restored, and ret.n is
executed to return to the caller (caller address is in a0).
Note that the ABI for the Xtensa emitters is non-windowing.
2016-12-09 17:03:33 +11:00
|
|
|
extern const emit_inline_asm_method_table_t emit_inline_xtensa_method_table;
|
2013-10-05 23:17:28 +01:00
|
|
|
|
2014-09-08 23:05:16 +01:00
|
|
|
emit_inline_asm_t *emit_inline_thumb_new(mp_uint_t max_num_labels);
|
py: Add inline Xtensa assembler.
This patch adds the MICROPY_EMIT_INLINE_XTENSA option, which, when
enabled, allows the @micropython.asm_xtensa decorator to be used.
The following opcodes are currently supported (ax is a register, a0-a15):
ret_n()
callx0(ax)
j(label)
jx(ax)
beqz(ax, label)
bnez(ax, label)
mov(ax, ay)
movi(ax, imm) # imm can be full 32-bit, uses l32r if needed
and_(ax, ay, az)
or_(ax, ay, az)
xor(ax, ay, az)
add(ax, ay, az)
sub(ax, ay, az)
mull(ax, ay, az)
l8ui(ax, ay, imm)
l16ui(ax, ay, imm)
l32i(ax, ay, imm)
s8i(ax, ay, imm)
s16i(ax, ay, imm)
s32i(ax, ay, imm)
l16si(ax, ay, imm)
addi(ax, ay, imm)
ball(ax, ay, label)
bany(ax, ay, label)
bbc(ax, ay, label)
bbs(ax, ay, label)
beq(ax, ay, label)
bge(ax, ay, label)
bgeu(ax, ay, label)
blt(ax, ay, label)
bnall(ax, ay, label)
bne(ax, ay, label)
bnone(ax, ay, label)
Upon entry to the assembly function the registers a0, a12, a13, a14 are
pushed to the stack and the stack pointer (a1) decreased by 16. Upon
exit, these registers and the stack pointer are restored, and ret.n is
executed to return to the caller (caller address is in a0).
Note that the ABI for the Xtensa emitters is non-windowing.
2016-12-09 17:03:33 +11:00
|
|
|
emit_inline_asm_t *emit_inline_xtensa_new(mp_uint_t max_num_labels);
|
|
|
|
|
2014-01-24 22:42:28 +00:00
|
|
|
void emit_inline_thumb_free(emit_inline_asm_t *emit);
|
py: Add inline Xtensa assembler.
This patch adds the MICROPY_EMIT_INLINE_XTENSA option, which, when
enabled, allows the @micropython.asm_xtensa decorator to be used.
The following opcodes are currently supported (ax is a register, a0-a15):
ret_n()
callx0(ax)
j(label)
jx(ax)
beqz(ax, label)
bnez(ax, label)
mov(ax, ay)
movi(ax, imm) # imm can be full 32-bit, uses l32r if needed
and_(ax, ay, az)
or_(ax, ay, az)
xor(ax, ay, az)
add(ax, ay, az)
sub(ax, ay, az)
mull(ax, ay, az)
l8ui(ax, ay, imm)
l16ui(ax, ay, imm)
l32i(ax, ay, imm)
s8i(ax, ay, imm)
s16i(ax, ay, imm)
s32i(ax, ay, imm)
l16si(ax, ay, imm)
addi(ax, ay, imm)
ball(ax, ay, label)
bany(ax, ay, label)
bbc(ax, ay, label)
bbs(ax, ay, label)
beq(ax, ay, label)
bge(ax, ay, label)
bgeu(ax, ay, label)
blt(ax, ay, label)
bnall(ax, ay, label)
bne(ax, ay, label)
bnone(ax, ay, label)
Upon entry to the assembly function the registers a0, a12, a13, a14 are
pushed to the stack and the stack pointer (a1) decreased by 16. Upon
exit, these registers and the stack pointer are restored, and ret.n is
executed to return to the caller (caller address is in a0).
Note that the ABI for the Xtensa emitters is non-windowing.
2016-12-09 17:03:33 +11:00
|
|
|
void emit_inline_xtensa_free(emit_inline_asm_t *emit);
|
2014-12-25 23:29:19 +02:00
|
|
|
|
2015-01-01 09:29:28 +02:00
|
|
|
#if MICROPY_WARNINGS
|
|
|
|
void mp_emitter_warning(pass_kind_t pass, const char *msg);
|
|
|
|
#else
|
|
|
|
#define mp_emitter_warning(pass, msg)
|
|
|
|
#endif
|
|
|
|
|
all: Unify header guard usage.
The code conventions suggest using header guards, but do not define how
those should look like and instead point to existing files. However, not
all existing files follow the same scheme, sometimes omitting header guards
altogether, sometimes using non-standard names, making it easy to
accidentally pick a "wrong" example.
This commit ensures that all header files of the MicroPython project (that
were not simply copied from somewhere else) follow the same pattern, that
was already present in the majority of files, especially in the py folder.
The rules are as follows.
Naming convention:
* start with the words MICROPY_INCLUDED
* contain the full path to the file
* replace special characters with _
In addition, there are no empty lines before #ifndef, between #ifndef and
one empty line before #endif. #endif is followed by a comment containing
the name of the guard macro.
py/grammar.h cannot use header guards by design, since it has to be
included multiple times in a single C file. Several other files also do not
need header guards as they are only used internally and guaranteed to be
included only once:
* MICROPY_MPHALPORT_H
* mpconfigboard.h
* mpconfigport.h
* mpthreadport.h
* pin_defs_*.h
* qstrdefs*.h
2017-06-29 23:14:58 +02:00
|
|
|
#endif // MICROPY_INCLUDED_PY_EMIT_H
|