2015-03-20 17:25:25 +00:00
|
|
|
File cmdline/cmd_showbc.py, code block '<module>' (descriptor: \.\+, bytecode @\.\+ bytes)
|
|
|
|
Raw bytecode (code_info_size=\\d\+, bytecode_size=\\d\+):
|
|
|
|
########
|
2019-09-02 21:35:17 +10:00
|
|
|
\.\+63
|
2015-03-14 14:06:20 +00:00
|
|
|
arg names:
|
|
|
|
(N_STATE 3)
|
|
|
|
(N_EXC_STACK 0)
|
2019-09-25 15:45:47 +10:00
|
|
|
bc=0 line=1
|
2015-03-20 17:25:25 +00:00
|
|
|
########
|
2019-09-20 09:16:34 +02:00
|
|
|
bc=\\d\+ line=160
|
2015-03-20 17:25:25 +00:00
|
|
|
00 MAKE_FUNCTION \.\+
|
|
|
|
\\d\+ STORE_NAME f
|
2016-10-17 11:23:37 +11:00
|
|
|
\\d\+ MAKE_FUNCTION \.\+
|
|
|
|
\\d\+ STORE_NAME f
|
2015-03-20 17:25:25 +00:00
|
|
|
\\d\+ LOAD_CONST_SMALL_INT 1
|
|
|
|
\\d\+ BUILD_TUPLE 1
|
|
|
|
\\d\+ LOAD_NULL
|
|
|
|
\\d\+ MAKE_FUNCTION_DEFARGS \.\+
|
|
|
|
\\d\+ STORE_NAME f
|
|
|
|
\\d\+ MAKE_FUNCTION \.\+
|
|
|
|
\\d\+ STORE_NAME f
|
|
|
|
\\d\+ LOAD_BUILD_CLASS
|
|
|
|
\\d\+ MAKE_FUNCTION \.\+
|
|
|
|
\\d\+ LOAD_CONST_STRING 'Class'
|
|
|
|
\\d\+ CALL_FUNCTION n=2 nkw=0
|
|
|
|
\\d\+ STORE_NAME Class
|
2016-10-17 11:23:37 +11:00
|
|
|
\\d\+ DELETE_NAME Class
|
2017-04-19 09:49:48 +10:00
|
|
|
\\d\+ MAKE_FUNCTION \.\+
|
|
|
|
\\d\+ STORE_NAME f
|
2019-09-20 09:16:34 +02:00
|
|
|
\\d\+ LOAD_CONST_SMALL_INT 0
|
|
|
|
\\d\+ LOAD_CONST_STRING '*'
|
|
|
|
\\d\+ BUILD_TUPLE 1
|
|
|
|
\\d\+ IMPORT_NAME 'sys'
|
|
|
|
\\d\+ IMPORT_STAR
|
2015-03-20 17:25:25 +00:00
|
|
|
\\d\+ LOAD_CONST_NONE
|
|
|
|
\\d\+ RETURN_VALUE
|
|
|
|
File cmdline/cmd_showbc.py, code block 'f' (descriptor: \.\+, bytecode @\.\+ bytes)
|
|
|
|
Raw bytecode (code_info_size=\\d\+, bytecode_size=\\d\+):
|
|
|
|
########
|
2016-10-05 12:34:18 +02:00
|
|
|
\.\+rg names:
|
2017-01-18 16:53:15 +11:00
|
|
|
(N_STATE 22)
|
2015-03-14 14:06:20 +00:00
|
|
|
(N_EXC_STACK 2)
|
|
|
|
(INIT_CELL 14)
|
|
|
|
(INIT_CELL 15)
|
|
|
|
(INIT_CELL 16)
|
2019-09-25 15:45:47 +10:00
|
|
|
bc=0 line=1
|
2015-03-20 17:25:25 +00:00
|
|
|
########
|
2016-10-17 11:23:37 +11:00
|
|
|
bc=\\d\+ line=127
|
2015-03-14 14:06:20 +00:00
|
|
|
00 LOAD_CONST_NONE
|
|
|
|
01 LOAD_CONST_FALSE
|
2019-07-25 13:15:54 +10:00
|
|
|
02 BINARY_OP 27 __add__
|
2015-03-14 14:06:20 +00:00
|
|
|
03 LOAD_CONST_TRUE
|
2019-07-25 13:15:54 +10:00
|
|
|
04 BINARY_OP 27 __add__
|
2015-05-05 23:58:52 +01:00
|
|
|
05 STORE_FAST 0
|
|
|
|
06 LOAD_CONST_SMALL_INT 0
|
2015-03-14 14:06:20 +00:00
|
|
|
07 STORE_FAST 0
|
2015-05-05 23:58:52 +01:00
|
|
|
08 LOAD_CONST_SMALL_INT 1000
|
|
|
|
11 STORE_FAST 0
|
|
|
|
12 LOAD_CONST_SMALL_INT -1000
|
|
|
|
15 STORE_FAST 0
|
|
|
|
16 LOAD_CONST_SMALL_INT 1
|
2015-03-14 14:06:20 +00:00
|
|
|
17 STORE_FAST 0
|
|
|
|
18 LOAD_CONST_SMALL_INT 1
|
2015-05-05 23:58:52 +01:00
|
|
|
19 LOAD_CONST_SMALL_INT 2
|
|
|
|
20 BUILD_TUPLE 2
|
|
|
|
22 STORE_DEREF 14
|
|
|
|
24 LOAD_CONST_SMALL_INT 1
|
|
|
|
25 LOAD_CONST_SMALL_INT 2
|
|
|
|
26 BUILD_LIST 2
|
|
|
|
28 STORE_FAST 1
|
|
|
|
29 LOAD_CONST_SMALL_INT 1
|
|
|
|
30 LOAD_CONST_SMALL_INT 2
|
|
|
|
31 BUILD_SET 2
|
|
|
|
33 STORE_FAST 2
|
|
|
|
34 BUILD_MAP 0
|
|
|
|
36 STORE_DEREF 15
|
|
|
|
38 BUILD_MAP 1
|
|
|
|
40 LOAD_CONST_SMALL_INT 2
|
|
|
|
41 LOAD_CONST_SMALL_INT 1
|
|
|
|
42 STORE_MAP
|
|
|
|
43 STORE_FAST 3
|
|
|
|
44 LOAD_CONST_STRING 'a'
|
|
|
|
47 STORE_FAST 4
|
2015-06-25 14:42:13 +00:00
|
|
|
48 LOAD_CONST_OBJ \.\+
|
|
|
|
\\d\+ STORE_FAST 5
|
|
|
|
\\d\+ LOAD_CONST_SMALL_INT 1
|
|
|
|
\\d\+ STORE_FAST 6
|
|
|
|
\\d\+ LOAD_CONST_SMALL_INT 2
|
|
|
|
\\d\+ STORE_FAST 7
|
|
|
|
\\d\+ LOAD_FAST 0
|
|
|
|
\\d\+ LOAD_DEREF 14
|
2019-07-25 13:15:54 +10:00
|
|
|
58 BINARY_OP 27 __add__
|
2015-06-25 14:42:13 +00:00
|
|
|
\\d\+ STORE_FAST 8
|
|
|
|
\\d\+ LOAD_FAST 0
|
2021-11-19 17:05:40 +11:00
|
|
|
\\d\+ UNARY_OP 1 __neg__
|
2015-06-25 14:42:13 +00:00
|
|
|
\\d\+ STORE_FAST 9
|
|
|
|
\\d\+ LOAD_FAST 0
|
2021-11-19 17:05:40 +11:00
|
|
|
\\d\+ UNARY_OP 3
|
2015-06-25 14:42:13 +00:00
|
|
|
\\d\+ STORE_FAST 10
|
|
|
|
\\d\+ LOAD_FAST 0
|
|
|
|
\\d\+ LOAD_DEREF 14
|
|
|
|
\\d\+ DUP_TOP
|
|
|
|
\\d\+ ROT_THREE
|
2017-09-07 10:55:43 +03:00
|
|
|
\\d\+ BINARY_OP 2 __eq__
|
2015-06-25 14:42:13 +00:00
|
|
|
\\d\+ JUMP_IF_FALSE_OR_POP \\d\+
|
|
|
|
\\d\+ LOAD_FAST 1
|
2017-09-07 10:55:43 +03:00
|
|
|
\\d\+ BINARY_OP 2 __eq__
|
2015-06-25 14:42:13 +00:00
|
|
|
\\d\+ JUMP \\d\+
|
|
|
|
\\d\+ ROT_TWO
|
|
|
|
\\d\+ POP_TOP
|
|
|
|
\\d\+ STORE_FAST 10
|
|
|
|
\\d\+ LOAD_FAST 0
|
|
|
|
\\d\+ LOAD_DEREF 14
|
2017-09-07 10:55:43 +03:00
|
|
|
\\d\+ BINARY_OP 2 __eq__
|
2015-06-25 14:42:13 +00:00
|
|
|
\\d\+ JUMP_IF_FALSE_OR_POP \\d\+
|
|
|
|
\\d\+ LOAD_DEREF 14
|
|
|
|
\\d\+ LOAD_FAST 1
|
2017-09-07 10:55:43 +03:00
|
|
|
\\d\+ BINARY_OP 2 __eq__
|
2021-11-19 17:05:40 +11:00
|
|
|
\\d\+ UNARY_OP 3
|
2015-06-25 14:42:13 +00:00
|
|
|
\\d\+ STORE_FAST 10
|
|
|
|
\\d\+ LOAD_DEREF 14
|
all: Remove MICROPY_OPT_CACHE_MAP_LOOKUP_IN_BYTECODE.
This commit removes all parts of code associated with the existing
MICROPY_OPT_CACHE_MAP_LOOKUP_IN_BYTECODE optimisation option, including the
-mcache-lookup-bc option to mpy-cross.
This feature originally provided a significant performance boost for Unix,
but wasn't able to be enabled for MCU targets (due to frozen bytecode), and
added significant extra complexity to generating and distributing .mpy
files.
The equivalent performance gain is now provided by the combination of
MICROPY_OPT_LOAD_ATTR_FAST_PATH and MICROPY_OPT_MAP_LOOKUP_CACHE (which has
been enabled on the unix port in the previous commit).
It's hard to provide precise performance numbers, but tests have been run
on a wide variety of architectures (x86-64, ARM Cortex, Aarch64, RISC-V,
xtensa) and they all generally agree on the qualitative improvements seen
by the combination of MICROPY_OPT_LOAD_ATTR_FAST_PATH and
MICROPY_OPT_MAP_LOOKUP_CACHE.
For example, on a "quiet" Linux x64 environment (i3-5010U @ 2.10GHz) the
change from CACHE_MAP_LOOKUP_IN_BYTECODE, to LOAD_ATTR_FAST_PATH combined
with MAP_LOOKUP_CACHE is:
diff of scores (higher is better)
N=2000 M=2000 bccache -> attrmapcache diff diff% (error%)
bm_chaos.py 13742.56 -> 13905.67 : +163.11 = +1.187% (+/-3.75%)
bm_fannkuch.py 60.13 -> 61.34 : +1.21 = +2.012% (+/-2.11%)
bm_fft.py 113083.20 -> 114793.68 : +1710.48 = +1.513% (+/-1.57%)
bm_float.py 256552.80 -> 243908.29 : -12644.51 = -4.929% (+/-1.90%)
bm_hexiom.py 521.93 -> 625.41 : +103.48 = +19.826% (+/-0.40%)
bm_nqueens.py 197544.25 -> 217713.12 : +20168.87 = +10.210% (+/-3.01%)
bm_pidigits.py 8072.98 -> 8198.75 : +125.77 = +1.558% (+/-3.22%)
misc_aes.py 17283.45 -> 16480.52 : -802.93 = -4.646% (+/-0.82%)
misc_mandel.py 99083.99 -> 128939.84 : +29855.85 = +30.132% (+/-5.88%)
misc_pystone.py 83860.10 -> 82592.56 : -1267.54 = -1.511% (+/-2.27%)
misc_raytrace.py 21490.40 -> 22227.23 : +736.83 = +3.429% (+/-1.88%)
This shows that the new optimisations are at least as good as the existing
inline-bytecode-caching, and are sometimes much better (because the new
ones apply caching to a wider variety of map lookups).
The new optimisations can also benefit code generated by the native
emitter, because they apply to the runtime rather than the generated code.
The improvement for the native emitter when LOAD_ATTR_FAST_PATH and
MAP_LOOKUP_CACHE are enabled is (same Linux environment as above):
diff of scores (higher is better)
N=2000 M=2000 native -> nat-attrmapcache diff diff% (error%)
bm_chaos.py 14130.62 -> 15464.68 : +1334.06 = +9.441% (+/-7.11%)
bm_fannkuch.py 74.96 -> 76.16 : +1.20 = +1.601% (+/-1.80%)
bm_fft.py 166682.99 -> 168221.86 : +1538.87 = +0.923% (+/-4.20%)
bm_float.py 233415.23 -> 265524.90 : +32109.67 = +13.756% (+/-2.57%)
bm_hexiom.py 628.59 -> 734.17 : +105.58 = +16.796% (+/-1.39%)
bm_nqueens.py 225418.44 -> 232926.45 : +7508.01 = +3.331% (+/-3.10%)
bm_pidigits.py 6322.00 -> 6379.52 : +57.52 = +0.910% (+/-5.62%)
misc_aes.py 20670.10 -> 27223.18 : +6553.08 = +31.703% (+/-1.56%)
misc_mandel.py 138221.11 -> 152014.01 : +13792.90 = +9.979% (+/-2.46%)
misc_pystone.py 85032.14 -> 105681.44 : +20649.30 = +24.284% (+/-2.25%)
misc_raytrace.py 19800.01 -> 23350.73 : +3550.72 = +17.933% (+/-2.79%)
In summary, compared to MICROPY_OPT_CACHE_MAP_LOOKUP_IN_BYTECODE, the new
MICROPY_OPT_LOAD_ATTR_FAST_PATH and MICROPY_OPT_MAP_LOOKUP_CACHE options:
- are simpler;
- take less code size;
- are faster (generally);
- work with code generated by the native emitter;
- can be used on embedded targets with a small and constant RAM overhead;
- allow the same .mpy bytecode to run on all targets.
See #7680 for further discussion. And see also #7653 for a discussion
about simplifying mpy-cross options.
Signed-off-by: Jim Mussared <jim.mussared@gmail.com>
2021-09-06 12:28:06 +10:00
|
|
|
\\d\+ LOAD_ATTR c
|
2015-06-25 14:42:13 +00:00
|
|
|
\\d\+ STORE_FAST 11
|
|
|
|
\\d\+ LOAD_FAST 11
|
|
|
|
\\d\+ LOAD_DEREF 14
|
all: Remove MICROPY_OPT_CACHE_MAP_LOOKUP_IN_BYTECODE.
This commit removes all parts of code associated with the existing
MICROPY_OPT_CACHE_MAP_LOOKUP_IN_BYTECODE optimisation option, including the
-mcache-lookup-bc option to mpy-cross.
This feature originally provided a significant performance boost for Unix,
but wasn't able to be enabled for MCU targets (due to frozen bytecode), and
added significant extra complexity to generating and distributing .mpy
files.
The equivalent performance gain is now provided by the combination of
MICROPY_OPT_LOAD_ATTR_FAST_PATH and MICROPY_OPT_MAP_LOOKUP_CACHE (which has
been enabled on the unix port in the previous commit).
It's hard to provide precise performance numbers, but tests have been run
on a wide variety of architectures (x86-64, ARM Cortex, Aarch64, RISC-V,
xtensa) and they all generally agree on the qualitative improvements seen
by the combination of MICROPY_OPT_LOAD_ATTR_FAST_PATH and
MICROPY_OPT_MAP_LOOKUP_CACHE.
For example, on a "quiet" Linux x64 environment (i3-5010U @ 2.10GHz) the
change from CACHE_MAP_LOOKUP_IN_BYTECODE, to LOAD_ATTR_FAST_PATH combined
with MAP_LOOKUP_CACHE is:
diff of scores (higher is better)
N=2000 M=2000 bccache -> attrmapcache diff diff% (error%)
bm_chaos.py 13742.56 -> 13905.67 : +163.11 = +1.187% (+/-3.75%)
bm_fannkuch.py 60.13 -> 61.34 : +1.21 = +2.012% (+/-2.11%)
bm_fft.py 113083.20 -> 114793.68 : +1710.48 = +1.513% (+/-1.57%)
bm_float.py 256552.80 -> 243908.29 : -12644.51 = -4.929% (+/-1.90%)
bm_hexiom.py 521.93 -> 625.41 : +103.48 = +19.826% (+/-0.40%)
bm_nqueens.py 197544.25 -> 217713.12 : +20168.87 = +10.210% (+/-3.01%)
bm_pidigits.py 8072.98 -> 8198.75 : +125.77 = +1.558% (+/-3.22%)
misc_aes.py 17283.45 -> 16480.52 : -802.93 = -4.646% (+/-0.82%)
misc_mandel.py 99083.99 -> 128939.84 : +29855.85 = +30.132% (+/-5.88%)
misc_pystone.py 83860.10 -> 82592.56 : -1267.54 = -1.511% (+/-2.27%)
misc_raytrace.py 21490.40 -> 22227.23 : +736.83 = +3.429% (+/-1.88%)
This shows that the new optimisations are at least as good as the existing
inline-bytecode-caching, and are sometimes much better (because the new
ones apply caching to a wider variety of map lookups).
The new optimisations can also benefit code generated by the native
emitter, because they apply to the runtime rather than the generated code.
The improvement for the native emitter when LOAD_ATTR_FAST_PATH and
MAP_LOOKUP_CACHE are enabled is (same Linux environment as above):
diff of scores (higher is better)
N=2000 M=2000 native -> nat-attrmapcache diff diff% (error%)
bm_chaos.py 14130.62 -> 15464.68 : +1334.06 = +9.441% (+/-7.11%)
bm_fannkuch.py 74.96 -> 76.16 : +1.20 = +1.601% (+/-1.80%)
bm_fft.py 166682.99 -> 168221.86 : +1538.87 = +0.923% (+/-4.20%)
bm_float.py 233415.23 -> 265524.90 : +32109.67 = +13.756% (+/-2.57%)
bm_hexiom.py 628.59 -> 734.17 : +105.58 = +16.796% (+/-1.39%)
bm_nqueens.py 225418.44 -> 232926.45 : +7508.01 = +3.331% (+/-3.10%)
bm_pidigits.py 6322.00 -> 6379.52 : +57.52 = +0.910% (+/-5.62%)
misc_aes.py 20670.10 -> 27223.18 : +6553.08 = +31.703% (+/-1.56%)
misc_mandel.py 138221.11 -> 152014.01 : +13792.90 = +9.979% (+/-2.46%)
misc_pystone.py 85032.14 -> 105681.44 : +20649.30 = +24.284% (+/-2.25%)
misc_raytrace.py 19800.01 -> 23350.73 : +3550.72 = +17.933% (+/-2.79%)
In summary, compared to MICROPY_OPT_CACHE_MAP_LOOKUP_IN_BYTECODE, the new
MICROPY_OPT_LOAD_ATTR_FAST_PATH and MICROPY_OPT_MAP_LOOKUP_CACHE options:
- are simpler;
- take less code size;
- are faster (generally);
- work with code generated by the native emitter;
- can be used on embedded targets with a small and constant RAM overhead;
- allow the same .mpy bytecode to run on all targets.
See #7680 for further discussion. And see also #7653 for a discussion
about simplifying mpy-cross options.
Signed-off-by: Jim Mussared <jim.mussared@gmail.com>
2021-09-06 12:28:06 +10:00
|
|
|
\\d\+ STORE_ATTR c
|
2015-06-25 14:42:13 +00:00
|
|
|
\\d\+ LOAD_DEREF 14
|
|
|
|
\\d\+ LOAD_CONST_SMALL_INT 0
|
|
|
|
\\d\+ LOAD_SUBSCR
|
|
|
|
\\d\+ STORE_FAST 12
|
|
|
|
\\d\+ LOAD_FAST 12
|
|
|
|
\\d\+ LOAD_DEREF 14
|
|
|
|
\\d\+ LOAD_CONST_SMALL_INT 0
|
|
|
|
\\d\+ STORE_SUBSCR
|
|
|
|
\\d\+ LOAD_DEREF 14
|
2016-10-17 11:23:37 +11:00
|
|
|
\\d\+ LOAD_CONST_SMALL_INT 0
|
|
|
|
\\d\+ DUP_TOP_TWO
|
|
|
|
\\d\+ LOAD_SUBSCR
|
|
|
|
\\d\+ LOAD_FAST 12
|
2017-10-03 23:34:28 +11:00
|
|
|
\\d\+ BINARY_OP 14 __iadd__
|
2016-10-17 11:23:37 +11:00
|
|
|
\\d\+ ROT_THREE
|
|
|
|
\\d\+ STORE_SUBSCR
|
|
|
|
\\d\+ LOAD_DEREF 14
|
2015-06-25 14:42:13 +00:00
|
|
|
\\d\+ LOAD_CONST_NONE
|
|
|
|
\\d\+ LOAD_CONST_NONE
|
|
|
|
\\d\+ BUILD_SLICE 2
|
|
|
|
\\d\+ LOAD_SUBSCR
|
|
|
|
\\d\+ STORE_FAST 0
|
|
|
|
\\d\+ LOAD_FAST 1
|
|
|
|
\\d\+ UNPACK_SEQUENCE 2
|
|
|
|
\\d\+ STORE_FAST 0
|
|
|
|
\\d\+ STORE_DEREF 14
|
2016-10-17 11:23:37 +11:00
|
|
|
\\d\+ LOAD_FAST 0
|
|
|
|
\\d\+ UNPACK_EX 1
|
|
|
|
\\d\+ STORE_FAST 0
|
|
|
|
\\d\+ STORE_FAST 0
|
2015-06-25 14:42:13 +00:00
|
|
|
\\d\+ LOAD_DEREF 14
|
|
|
|
\\d\+ LOAD_FAST 0
|
|
|
|
\\d\+ ROT_TWO
|
|
|
|
\\d\+ STORE_FAST 0
|
|
|
|
\\d\+ STORE_DEREF 14
|
|
|
|
\\d\+ LOAD_FAST 1
|
|
|
|
\\d\+ LOAD_DEREF 14
|
|
|
|
\\d\+ LOAD_FAST 0
|
|
|
|
\\d\+ ROT_THREE
|
|
|
|
\\d\+ ROT_TWO
|
|
|
|
\\d\+ STORE_FAST 0
|
|
|
|
\\d\+ STORE_DEREF 14
|
|
|
|
\\d\+ STORE_FAST 1
|
|
|
|
\\d\+ DELETE_FAST 0
|
|
|
|
\\d\+ LOAD_FAST 0
|
|
|
|
\\d\+ STORE_GLOBAL gl
|
|
|
|
\\d\+ DELETE_GLOBAL gl
|
|
|
|
\\d\+ LOAD_FAST 14
|
|
|
|
\\d\+ LOAD_FAST 15
|
|
|
|
\\d\+ MAKE_CLOSURE \.\+ 2
|
2015-03-20 17:25:25 +00:00
|
|
|
\\d\+ LOAD_FAST 2
|
|
|
|
\\d\+ GET_ITER
|
|
|
|
\\d\+ CALL_FUNCTION n=1 nkw=0
|
|
|
|
\\d\+ STORE_FAST 0
|
|
|
|
\\d\+ LOAD_FAST 14
|
|
|
|
\\d\+ LOAD_FAST 15
|
|
|
|
\\d\+ MAKE_CLOSURE \.\+ 2
|
|
|
|
\\d\+ LOAD_FAST 2
|
|
|
|
\\d\+ CALL_FUNCTION n=1 nkw=0
|
|
|
|
\\d\+ STORE_FAST 0
|
|
|
|
\\d\+ LOAD_FAST 14
|
|
|
|
\\d\+ LOAD_FAST 15
|
|
|
|
\\d\+ MAKE_CLOSURE \.\+ 2
|
|
|
|
\\d\+ LOAD_FAST 2
|
|
|
|
\\d\+ CALL_FUNCTION n=1 nkw=0
|
|
|
|
\\d\+ STORE_FAST 0
|
|
|
|
\\d\+ LOAD_FAST 0
|
|
|
|
\\d\+ CALL_FUNCTION n=0 nkw=0
|
|
|
|
\\d\+ POP_TOP
|
|
|
|
\\d\+ LOAD_FAST 0
|
|
|
|
\\d\+ LOAD_CONST_SMALL_INT 1
|
|
|
|
\\d\+ CALL_FUNCTION n=1 nkw=0
|
|
|
|
\\d\+ POP_TOP
|
|
|
|
\\d\+ LOAD_FAST 0
|
|
|
|
\\d\+ LOAD_CONST_STRING 'b'
|
|
|
|
\\d\+ LOAD_CONST_SMALL_INT 1
|
|
|
|
\\d\+ CALL_FUNCTION n=0 nkw=1
|
|
|
|
\\d\+ POP_TOP
|
|
|
|
\\d\+ LOAD_FAST 0
|
|
|
|
\\d\+ LOAD_DEREF 14
|
|
|
|
\\d\+ LOAD_NULL
|
|
|
|
\\d\+ CALL_FUNCTION_VAR_KW n=0 nkw=0
|
|
|
|
\\d\+ POP_TOP
|
|
|
|
\\d\+ LOAD_FAST 0
|
|
|
|
\\d\+ LOAD_METHOD b
|
|
|
|
\\d\+ CALL_METHOD n=0 nkw=0
|
|
|
|
\\d\+ POP_TOP
|
|
|
|
\\d\+ LOAD_FAST 0
|
|
|
|
\\d\+ LOAD_METHOD b
|
|
|
|
\\d\+ LOAD_CONST_SMALL_INT 1
|
|
|
|
\\d\+ CALL_METHOD n=1 nkw=0
|
|
|
|
\\d\+ POP_TOP
|
|
|
|
\\d\+ LOAD_FAST 0
|
|
|
|
\\d\+ LOAD_METHOD b
|
|
|
|
\\d\+ LOAD_CONST_STRING 'c'
|
|
|
|
\\d\+ LOAD_CONST_SMALL_INT 1
|
|
|
|
\\d\+ CALL_METHOD n=0 nkw=1
|
|
|
|
\\d\+ POP_TOP
|
|
|
|
\\d\+ LOAD_FAST 0
|
|
|
|
\\d\+ LOAD_METHOD b
|
|
|
|
\\d\+ LOAD_FAST 1
|
|
|
|
\\d\+ LOAD_NULL
|
|
|
|
\\d\+ CALL_METHOD_VAR_KW n=0 nkw=0
|
|
|
|
\\d\+ POP_TOP
|
|
|
|
\\d\+ LOAD_FAST 0
|
|
|
|
\\d\+ POP_JUMP_IF_FALSE \\d\+
|
|
|
|
\\d\+ LOAD_DEREF 16
|
|
|
|
\\d\+ POP_TOP
|
|
|
|
\\d\+ JUMP \\d\+
|
all: Remove MICROPY_OPT_CACHE_MAP_LOOKUP_IN_BYTECODE.
This commit removes all parts of code associated with the existing
MICROPY_OPT_CACHE_MAP_LOOKUP_IN_BYTECODE optimisation option, including the
-mcache-lookup-bc option to mpy-cross.
This feature originally provided a significant performance boost for Unix,
but wasn't able to be enabled for MCU targets (due to frozen bytecode), and
added significant extra complexity to generating and distributing .mpy
files.
The equivalent performance gain is now provided by the combination of
MICROPY_OPT_LOAD_ATTR_FAST_PATH and MICROPY_OPT_MAP_LOOKUP_CACHE (which has
been enabled on the unix port in the previous commit).
It's hard to provide precise performance numbers, but tests have been run
on a wide variety of architectures (x86-64, ARM Cortex, Aarch64, RISC-V,
xtensa) and they all generally agree on the qualitative improvements seen
by the combination of MICROPY_OPT_LOAD_ATTR_FAST_PATH and
MICROPY_OPT_MAP_LOOKUP_CACHE.
For example, on a "quiet" Linux x64 environment (i3-5010U @ 2.10GHz) the
change from CACHE_MAP_LOOKUP_IN_BYTECODE, to LOAD_ATTR_FAST_PATH combined
with MAP_LOOKUP_CACHE is:
diff of scores (higher is better)
N=2000 M=2000 bccache -> attrmapcache diff diff% (error%)
bm_chaos.py 13742.56 -> 13905.67 : +163.11 = +1.187% (+/-3.75%)
bm_fannkuch.py 60.13 -> 61.34 : +1.21 = +2.012% (+/-2.11%)
bm_fft.py 113083.20 -> 114793.68 : +1710.48 = +1.513% (+/-1.57%)
bm_float.py 256552.80 -> 243908.29 : -12644.51 = -4.929% (+/-1.90%)
bm_hexiom.py 521.93 -> 625.41 : +103.48 = +19.826% (+/-0.40%)
bm_nqueens.py 197544.25 -> 217713.12 : +20168.87 = +10.210% (+/-3.01%)
bm_pidigits.py 8072.98 -> 8198.75 : +125.77 = +1.558% (+/-3.22%)
misc_aes.py 17283.45 -> 16480.52 : -802.93 = -4.646% (+/-0.82%)
misc_mandel.py 99083.99 -> 128939.84 : +29855.85 = +30.132% (+/-5.88%)
misc_pystone.py 83860.10 -> 82592.56 : -1267.54 = -1.511% (+/-2.27%)
misc_raytrace.py 21490.40 -> 22227.23 : +736.83 = +3.429% (+/-1.88%)
This shows that the new optimisations are at least as good as the existing
inline-bytecode-caching, and are sometimes much better (because the new
ones apply caching to a wider variety of map lookups).
The new optimisations can also benefit code generated by the native
emitter, because they apply to the runtime rather than the generated code.
The improvement for the native emitter when LOAD_ATTR_FAST_PATH and
MAP_LOOKUP_CACHE are enabled is (same Linux environment as above):
diff of scores (higher is better)
N=2000 M=2000 native -> nat-attrmapcache diff diff% (error%)
bm_chaos.py 14130.62 -> 15464.68 : +1334.06 = +9.441% (+/-7.11%)
bm_fannkuch.py 74.96 -> 76.16 : +1.20 = +1.601% (+/-1.80%)
bm_fft.py 166682.99 -> 168221.86 : +1538.87 = +0.923% (+/-4.20%)
bm_float.py 233415.23 -> 265524.90 : +32109.67 = +13.756% (+/-2.57%)
bm_hexiom.py 628.59 -> 734.17 : +105.58 = +16.796% (+/-1.39%)
bm_nqueens.py 225418.44 -> 232926.45 : +7508.01 = +3.331% (+/-3.10%)
bm_pidigits.py 6322.00 -> 6379.52 : +57.52 = +0.910% (+/-5.62%)
misc_aes.py 20670.10 -> 27223.18 : +6553.08 = +31.703% (+/-1.56%)
misc_mandel.py 138221.11 -> 152014.01 : +13792.90 = +9.979% (+/-2.46%)
misc_pystone.py 85032.14 -> 105681.44 : +20649.30 = +24.284% (+/-2.25%)
misc_raytrace.py 19800.01 -> 23350.73 : +3550.72 = +17.933% (+/-2.79%)
In summary, compared to MICROPY_OPT_CACHE_MAP_LOOKUP_IN_BYTECODE, the new
MICROPY_OPT_LOAD_ATTR_FAST_PATH and MICROPY_OPT_MAP_LOOKUP_CACHE options:
- are simpler;
- take less code size;
- are faster (generally);
- work with code generated by the native emitter;
- can be used on embedded targets with a small and constant RAM overhead;
- allow the same .mpy bytecode to run on all targets.
See #7680 for further discussion. And see also #7653 for a discussion
about simplifying mpy-cross options.
Signed-off-by: Jim Mussared <jim.mussared@gmail.com>
2021-09-06 12:28:06 +10:00
|
|
|
\\d\+ LOAD_GLOBAL y
|
2015-03-20 17:25:25 +00:00
|
|
|
\\d\+ POP_TOP
|
|
|
|
\\d\+ JUMP \\d\+
|
|
|
|
\\d\+ LOAD_DEREF 14
|
|
|
|
\\d\+ POP_TOP
|
|
|
|
\\d\+ LOAD_FAST 0
|
|
|
|
\\d\+ POP_JUMP_IF_TRUE \\d\+
|
|
|
|
\\d\+ JUMP \\d\+
|
|
|
|
\\d\+ LOAD_DEREF 14
|
|
|
|
\\d\+ POP_TOP
|
|
|
|
\\d\+ LOAD_FAST 0
|
|
|
|
\\d\+ POP_JUMP_IF_FALSE \\d\+
|
2016-10-17 11:23:37 +11:00
|
|
|
\\d\+ LOAD_FAST 0
|
|
|
|
\\d\+ JUMP_IF_TRUE_OR_POP \\d\+
|
|
|
|
\\d\+ LOAD_FAST 0
|
|
|
|
\\d\+ STORE_FAST 0
|
2015-03-20 17:25:25 +00:00
|
|
|
\\d\+ LOAD_DEREF 14
|
2016-01-09 23:59:52 +00:00
|
|
|
\\d\+ GET_ITER_STACK
|
2015-03-20 17:25:25 +00:00
|
|
|
\\d\+ FOR_ITER \\d\+
|
|
|
|
\\d\+ STORE_FAST 0
|
|
|
|
\\d\+ LOAD_FAST 1
|
|
|
|
\\d\+ POP_TOP
|
|
|
|
\\d\+ JUMP \\d\+
|
|
|
|
\\d\+ SETUP_FINALLY \\d\+
|
|
|
|
\\d\+ SETUP_EXCEPT \\d\+
|
|
|
|
\\d\+ JUMP \\d\+
|
|
|
|
\\d\+ JUMP \\d\+
|
|
|
|
\\d\+ LOAD_FAST 0
|
|
|
|
\\d\+ POP_JUMP_IF_TRUE \\d\+
|
2019-02-15 12:18:59 +11:00
|
|
|
\\d\+ POP_EXCEPT_JUMP \\d\+
|
2015-03-20 17:25:25 +00:00
|
|
|
\\d\+ POP_TOP
|
|
|
|
\\d\+ LOAD_DEREF 14
|
|
|
|
\\d\+ POP_TOP
|
2019-02-15 12:18:59 +11:00
|
|
|
\\d\+ POP_EXCEPT_JUMP \\d\+
|
2015-03-20 17:25:25 +00:00
|
|
|
\\d\+ END_FINALLY
|
|
|
|
\\d\+ LOAD_CONST_NONE
|
|
|
|
\\d\+ LOAD_FAST 1
|
|
|
|
\\d\+ POP_TOP
|
|
|
|
\\d\+ END_FINALLY
|
2016-10-17 11:23:37 +11:00
|
|
|
\\d\+ JUMP \\d\+
|
|
|
|
\\d\+ SETUP_EXCEPT \\d\+
|
|
|
|
\\d\+ UNWIND_JUMP \\d\+ 1
|
2019-02-15 12:18:59 +11:00
|
|
|
\\d\+ POP_EXCEPT_JUMP \\d\+
|
2016-10-17 11:23:37 +11:00
|
|
|
\\d\+ POP_TOP
|
2019-02-15 12:18:59 +11:00
|
|
|
\\d\+ POP_EXCEPT_JUMP \\d\+
|
2016-10-17 11:23:37 +11:00
|
|
|
\\d\+ END_FINALLY
|
|
|
|
\\d\+ LOAD_FAST 0
|
|
|
|
\\d\+ POP_JUMP_IF_TRUE \\d\+
|
2015-03-20 17:25:25 +00:00
|
|
|
\\d\+ LOAD_FAST 0
|
|
|
|
\\d\+ SETUP_WITH \\d\+
|
|
|
|
\\d\+ POP_TOP
|
|
|
|
\\d\+ LOAD_DEREF 14
|
|
|
|
\\d\+ POP_TOP
|
|
|
|
\\d\+ LOAD_CONST_NONE
|
|
|
|
\\d\+ WITH_CLEANUP
|
|
|
|
\\d\+ END_FINALLY
|
|
|
|
\\d\+ LOAD_CONST_SMALL_INT 1
|
|
|
|
\\d\+ STORE_DEREF 16
|
|
|
|
\\d\+ LOAD_FAST_N 16
|
|
|
|
\\d\+ MAKE_CLOSURE \.\+ 1
|
|
|
|
\\d\+ STORE_FAST 13
|
|
|
|
\\d\+ LOAD_CONST_SMALL_INT 0
|
|
|
|
\\d\+ LOAD_CONST_NONE
|
|
|
|
\\d\+ IMPORT_NAME 'a'
|
|
|
|
\\d\+ STORE_FAST 0
|
|
|
|
\\d\+ LOAD_CONST_SMALL_INT 0
|
|
|
|
\\d\+ LOAD_CONST_STRING 'b'
|
|
|
|
\\d\+ BUILD_TUPLE 1
|
|
|
|
\\d\+ IMPORT_NAME 'a'
|
|
|
|
\\d\+ IMPORT_FROM 'b'
|
|
|
|
\\d\+ STORE_DEREF 14
|
|
|
|
\\d\+ POP_TOP
|
2019-08-22 12:39:07 +10:00
|
|
|
\\d\+ RAISE_LAST
|
2015-03-20 17:25:25 +00:00
|
|
|
\\d\+ LOAD_CONST_SMALL_INT 1
|
2019-08-22 12:39:07 +10:00
|
|
|
\\d\+ RAISE_OBJ
|
2015-03-20 17:25:25 +00:00
|
|
|
\\d\+ LOAD_CONST_NONE
|
|
|
|
\\d\+ RETURN_VALUE
|
|
|
|
\\d\+ LOAD_CONST_SMALL_INT 1
|
|
|
|
\\d\+ RETURN_VALUE
|
|
|
|
File cmdline/cmd_showbc.py, code block 'f' (descriptor: \.\+, bytecode @\.\+ bytes)
|
|
|
|
Raw bytecode (code_info_size=\\d\+, bytecode_size=\\d\+):
|
|
|
|
########
|
2016-10-17 11:23:37 +11:00
|
|
|
\.\+rg names:
|
|
|
|
(N_STATE 22)
|
|
|
|
(N_EXC_STACK 0)
|
2019-09-25 15:45:47 +10:00
|
|
|
bc=0 line=1
|
2016-10-17 11:23:37 +11:00
|
|
|
########
|
|
|
|
bc=\\d\+ line=133
|
|
|
|
00 LOAD_CONST_SMALL_INT 1
|
|
|
|
01 DUP_TOP
|
|
|
|
02 STORE_FAST 0
|
|
|
|
03 DUP_TOP
|
|
|
|
04 STORE_FAST 1
|
|
|
|
05 DUP_TOP
|
|
|
|
06 STORE_FAST 2
|
|
|
|
07 DUP_TOP
|
|
|
|
08 STORE_FAST 3
|
|
|
|
09 DUP_TOP
|
|
|
|
10 STORE_FAST 4
|
|
|
|
11 DUP_TOP
|
|
|
|
12 STORE_FAST 5
|
|
|
|
13 DUP_TOP
|
|
|
|
14 STORE_FAST 6
|
|
|
|
15 DUP_TOP
|
|
|
|
16 STORE_FAST 7
|
|
|
|
17 DUP_TOP
|
|
|
|
18 STORE_FAST 8
|
|
|
|
19 STORE_FAST 9
|
|
|
|
20 LOAD_CONST_SMALL_INT 2
|
|
|
|
21 DUP_TOP
|
|
|
|
22 STORE_FAST 10
|
|
|
|
23 DUP_TOP
|
|
|
|
24 STORE_FAST 11
|
|
|
|
25 DUP_TOP
|
|
|
|
26 STORE_FAST 12
|
|
|
|
27 DUP_TOP
|
|
|
|
28 STORE_FAST 13
|
|
|
|
29 DUP_TOP
|
|
|
|
30 STORE_FAST 14
|
|
|
|
31 DUP_TOP
|
|
|
|
32 STORE_FAST 15
|
|
|
|
33 DUP_TOP
|
|
|
|
34 STORE_FAST_N 16
|
|
|
|
36 DUP_TOP
|
|
|
|
37 STORE_FAST_N 17
|
|
|
|
39 DUP_TOP
|
|
|
|
40 STORE_FAST_N 18
|
|
|
|
42 STORE_FAST_N 19
|
|
|
|
44 LOAD_FAST 9
|
|
|
|
45 LOAD_FAST_N 19
|
2019-07-25 13:15:54 +10:00
|
|
|
47 BINARY_OP 27 __add__
|
2016-10-17 11:23:37 +11:00
|
|
|
48 POP_TOP
|
|
|
|
49 LOAD_CONST_NONE
|
|
|
|
50 RETURN_VALUE
|
|
|
|
File cmdline/cmd_showbc.py, code block 'f' (descriptor: \.\+, bytecode @\.\+ bytes)
|
|
|
|
Raw bytecode (code_info_size=\\d\+, bytecode_size=\\d\+):
|
|
|
|
########
|
2019-09-02 21:35:17 +10:00
|
|
|
\.\+63
|
2015-03-14 14:06:20 +00:00
|
|
|
arg names: a
|
|
|
|
(N_STATE 5)
|
|
|
|
(N_EXC_STACK 0)
|
|
|
|
(INIT_CELL 0)
|
2015-03-20 17:25:25 +00:00
|
|
|
########
|
2016-10-17 11:23:37 +11:00
|
|
|
bc=\\d\+ line=139
|
2015-03-14 14:06:20 +00:00
|
|
|
00 LOAD_CONST_SMALL_INT 2
|
|
|
|
01 BUILD_TUPLE 1
|
|
|
|
03 LOAD_NULL
|
|
|
|
04 LOAD_FAST 0
|
2015-03-20 17:25:25 +00:00
|
|
|
05 MAKE_CLOSURE_DEFARGS \.\+ 1
|
|
|
|
\\d\+ STORE_FAST 1
|
|
|
|
\\d\+ LOAD_CONST_NONE
|
|
|
|
\\d\+ RETURN_VALUE
|
|
|
|
File cmdline/cmd_showbc.py, code block 'f' (descriptor: \.\+, bytecode @\.\+ bytes)
|
|
|
|
Raw bytecode (code_info_size=\\d\+, bytecode_size=\\d\+):
|
|
|
|
########
|
2019-09-02 21:35:17 +10:00
|
|
|
\.\+63
|
2015-03-14 14:06:20 +00:00
|
|
|
arg names:
|
|
|
|
(N_STATE 2)
|
|
|
|
(N_EXC_STACK 0)
|
2019-09-25 15:45:47 +10:00
|
|
|
bc=0 line=1
|
2016-10-17 11:23:37 +11:00
|
|
|
bc=0 line=144
|
|
|
|
bc=3 line=145
|
|
|
|
bc=6 line=146
|
2015-03-14 14:06:20 +00:00
|
|
|
00 LOAD_CONST_NONE
|
|
|
|
01 YIELD_VALUE
|
|
|
|
02 POP_TOP
|
|
|
|
03 LOAD_CONST_SMALL_INT 1
|
|
|
|
04 YIELD_VALUE
|
|
|
|
05 POP_TOP
|
|
|
|
06 LOAD_CONST_SMALL_INT 1
|
|
|
|
07 GET_ITER
|
|
|
|
08 LOAD_CONST_NONE
|
|
|
|
09 YIELD_FROM
|
|
|
|
10 POP_TOP
|
|
|
|
11 LOAD_CONST_NONE
|
|
|
|
12 RETURN_VALUE
|
2015-03-20 17:25:25 +00:00
|
|
|
File cmdline/cmd_showbc.py, code block 'Class' (descriptor: \.\+, bytecode @\.\+ bytes)
|
|
|
|
Raw bytecode (code_info_size=\\d\+, bytecode_size=\\d\+):
|
|
|
|
########
|
2019-09-02 21:35:17 +10:00
|
|
|
\.\+63
|
2015-03-14 14:06:20 +00:00
|
|
|
arg names:
|
|
|
|
(N_STATE 1)
|
|
|
|
(N_EXC_STACK 0)
|
2019-09-25 15:45:47 +10:00
|
|
|
bc=0 line=1
|
2019-08-14 16:11:25 +02:00
|
|
|
########
|
all: Remove MICROPY_OPT_CACHE_MAP_LOOKUP_IN_BYTECODE.
This commit removes all parts of code associated with the existing
MICROPY_OPT_CACHE_MAP_LOOKUP_IN_BYTECODE optimisation option, including the
-mcache-lookup-bc option to mpy-cross.
This feature originally provided a significant performance boost for Unix,
but wasn't able to be enabled for MCU targets (due to frozen bytecode), and
added significant extra complexity to generating and distributing .mpy
files.
The equivalent performance gain is now provided by the combination of
MICROPY_OPT_LOAD_ATTR_FAST_PATH and MICROPY_OPT_MAP_LOOKUP_CACHE (which has
been enabled on the unix port in the previous commit).
It's hard to provide precise performance numbers, but tests have been run
on a wide variety of architectures (x86-64, ARM Cortex, Aarch64, RISC-V,
xtensa) and they all generally agree on the qualitative improvements seen
by the combination of MICROPY_OPT_LOAD_ATTR_FAST_PATH and
MICROPY_OPT_MAP_LOOKUP_CACHE.
For example, on a "quiet" Linux x64 environment (i3-5010U @ 2.10GHz) the
change from CACHE_MAP_LOOKUP_IN_BYTECODE, to LOAD_ATTR_FAST_PATH combined
with MAP_LOOKUP_CACHE is:
diff of scores (higher is better)
N=2000 M=2000 bccache -> attrmapcache diff diff% (error%)
bm_chaos.py 13742.56 -> 13905.67 : +163.11 = +1.187% (+/-3.75%)
bm_fannkuch.py 60.13 -> 61.34 : +1.21 = +2.012% (+/-2.11%)
bm_fft.py 113083.20 -> 114793.68 : +1710.48 = +1.513% (+/-1.57%)
bm_float.py 256552.80 -> 243908.29 : -12644.51 = -4.929% (+/-1.90%)
bm_hexiom.py 521.93 -> 625.41 : +103.48 = +19.826% (+/-0.40%)
bm_nqueens.py 197544.25 -> 217713.12 : +20168.87 = +10.210% (+/-3.01%)
bm_pidigits.py 8072.98 -> 8198.75 : +125.77 = +1.558% (+/-3.22%)
misc_aes.py 17283.45 -> 16480.52 : -802.93 = -4.646% (+/-0.82%)
misc_mandel.py 99083.99 -> 128939.84 : +29855.85 = +30.132% (+/-5.88%)
misc_pystone.py 83860.10 -> 82592.56 : -1267.54 = -1.511% (+/-2.27%)
misc_raytrace.py 21490.40 -> 22227.23 : +736.83 = +3.429% (+/-1.88%)
This shows that the new optimisations are at least as good as the existing
inline-bytecode-caching, and are sometimes much better (because the new
ones apply caching to a wider variety of map lookups).
The new optimisations can also benefit code generated by the native
emitter, because they apply to the runtime rather than the generated code.
The improvement for the native emitter when LOAD_ATTR_FAST_PATH and
MAP_LOOKUP_CACHE are enabled is (same Linux environment as above):
diff of scores (higher is better)
N=2000 M=2000 native -> nat-attrmapcache diff diff% (error%)
bm_chaos.py 14130.62 -> 15464.68 : +1334.06 = +9.441% (+/-7.11%)
bm_fannkuch.py 74.96 -> 76.16 : +1.20 = +1.601% (+/-1.80%)
bm_fft.py 166682.99 -> 168221.86 : +1538.87 = +0.923% (+/-4.20%)
bm_float.py 233415.23 -> 265524.90 : +32109.67 = +13.756% (+/-2.57%)
bm_hexiom.py 628.59 -> 734.17 : +105.58 = +16.796% (+/-1.39%)
bm_nqueens.py 225418.44 -> 232926.45 : +7508.01 = +3.331% (+/-3.10%)
bm_pidigits.py 6322.00 -> 6379.52 : +57.52 = +0.910% (+/-5.62%)
misc_aes.py 20670.10 -> 27223.18 : +6553.08 = +31.703% (+/-1.56%)
misc_mandel.py 138221.11 -> 152014.01 : +13792.90 = +9.979% (+/-2.46%)
misc_pystone.py 85032.14 -> 105681.44 : +20649.30 = +24.284% (+/-2.25%)
misc_raytrace.py 19800.01 -> 23350.73 : +3550.72 = +17.933% (+/-2.79%)
In summary, compared to MICROPY_OPT_CACHE_MAP_LOOKUP_IN_BYTECODE, the new
MICROPY_OPT_LOAD_ATTR_FAST_PATH and MICROPY_OPT_MAP_LOOKUP_CACHE options:
- are simpler;
- take less code size;
- are faster (generally);
- work with code generated by the native emitter;
- can be used on embedded targets with a small and constant RAM overhead;
- allow the same .mpy bytecode to run on all targets.
See #7680 for further discussion. And see also #7653 for a discussion
about simplifying mpy-cross options.
Signed-off-by: Jim Mussared <jim.mussared@gmail.com>
2021-09-06 12:28:06 +10:00
|
|
|
bc=12 line=150
|
|
|
|
00 LOAD_NAME __name__
|
|
|
|
03 STORE_NAME __module__
|
|
|
|
06 LOAD_CONST_STRING 'Class'
|
|
|
|
09 STORE_NAME __qualname__
|
|
|
|
12 LOAD_CONST_NONE
|
|
|
|
13 RETURN_VALUE
|
2017-04-19 09:49:48 +10:00
|
|
|
File cmdline/cmd_showbc.py, code block 'f' (descriptor: \.\+, bytecode @\.\+ bytes)
|
|
|
|
Raw bytecode (code_info_size=\\d\+, bytecode_size=\\d\+):
|
|
|
|
########
|
2019-09-02 21:35:17 +10:00
|
|
|
\.\+63
|
2017-04-19 09:49:48 +10:00
|
|
|
arg names: self
|
|
|
|
(N_STATE 4)
|
|
|
|
(N_EXC_STACK 0)
|
2019-09-25 15:45:47 +10:00
|
|
|
bc=0 line=1
|
2017-04-19 09:49:48 +10:00
|
|
|
bc=0 line=157
|
all: Remove MICROPY_OPT_CACHE_MAP_LOOKUP_IN_BYTECODE.
This commit removes all parts of code associated with the existing
MICROPY_OPT_CACHE_MAP_LOOKUP_IN_BYTECODE optimisation option, including the
-mcache-lookup-bc option to mpy-cross.
This feature originally provided a significant performance boost for Unix,
but wasn't able to be enabled for MCU targets (due to frozen bytecode), and
added significant extra complexity to generating and distributing .mpy
files.
The equivalent performance gain is now provided by the combination of
MICROPY_OPT_LOAD_ATTR_FAST_PATH and MICROPY_OPT_MAP_LOOKUP_CACHE (which has
been enabled on the unix port in the previous commit).
It's hard to provide precise performance numbers, but tests have been run
on a wide variety of architectures (x86-64, ARM Cortex, Aarch64, RISC-V,
xtensa) and they all generally agree on the qualitative improvements seen
by the combination of MICROPY_OPT_LOAD_ATTR_FAST_PATH and
MICROPY_OPT_MAP_LOOKUP_CACHE.
For example, on a "quiet" Linux x64 environment (i3-5010U @ 2.10GHz) the
change from CACHE_MAP_LOOKUP_IN_BYTECODE, to LOAD_ATTR_FAST_PATH combined
with MAP_LOOKUP_CACHE is:
diff of scores (higher is better)
N=2000 M=2000 bccache -> attrmapcache diff diff% (error%)
bm_chaos.py 13742.56 -> 13905.67 : +163.11 = +1.187% (+/-3.75%)
bm_fannkuch.py 60.13 -> 61.34 : +1.21 = +2.012% (+/-2.11%)
bm_fft.py 113083.20 -> 114793.68 : +1710.48 = +1.513% (+/-1.57%)
bm_float.py 256552.80 -> 243908.29 : -12644.51 = -4.929% (+/-1.90%)
bm_hexiom.py 521.93 -> 625.41 : +103.48 = +19.826% (+/-0.40%)
bm_nqueens.py 197544.25 -> 217713.12 : +20168.87 = +10.210% (+/-3.01%)
bm_pidigits.py 8072.98 -> 8198.75 : +125.77 = +1.558% (+/-3.22%)
misc_aes.py 17283.45 -> 16480.52 : -802.93 = -4.646% (+/-0.82%)
misc_mandel.py 99083.99 -> 128939.84 : +29855.85 = +30.132% (+/-5.88%)
misc_pystone.py 83860.10 -> 82592.56 : -1267.54 = -1.511% (+/-2.27%)
misc_raytrace.py 21490.40 -> 22227.23 : +736.83 = +3.429% (+/-1.88%)
This shows that the new optimisations are at least as good as the existing
inline-bytecode-caching, and are sometimes much better (because the new
ones apply caching to a wider variety of map lookups).
The new optimisations can also benefit code generated by the native
emitter, because they apply to the runtime rather than the generated code.
The improvement for the native emitter when LOAD_ATTR_FAST_PATH and
MAP_LOOKUP_CACHE are enabled is (same Linux environment as above):
diff of scores (higher is better)
N=2000 M=2000 native -> nat-attrmapcache diff diff% (error%)
bm_chaos.py 14130.62 -> 15464.68 : +1334.06 = +9.441% (+/-7.11%)
bm_fannkuch.py 74.96 -> 76.16 : +1.20 = +1.601% (+/-1.80%)
bm_fft.py 166682.99 -> 168221.86 : +1538.87 = +0.923% (+/-4.20%)
bm_float.py 233415.23 -> 265524.90 : +32109.67 = +13.756% (+/-2.57%)
bm_hexiom.py 628.59 -> 734.17 : +105.58 = +16.796% (+/-1.39%)
bm_nqueens.py 225418.44 -> 232926.45 : +7508.01 = +3.331% (+/-3.10%)
bm_pidigits.py 6322.00 -> 6379.52 : +57.52 = +0.910% (+/-5.62%)
misc_aes.py 20670.10 -> 27223.18 : +6553.08 = +31.703% (+/-1.56%)
misc_mandel.py 138221.11 -> 152014.01 : +13792.90 = +9.979% (+/-2.46%)
misc_pystone.py 85032.14 -> 105681.44 : +20649.30 = +24.284% (+/-2.25%)
misc_raytrace.py 19800.01 -> 23350.73 : +3550.72 = +17.933% (+/-2.79%)
In summary, compared to MICROPY_OPT_CACHE_MAP_LOOKUP_IN_BYTECODE, the new
MICROPY_OPT_LOAD_ATTR_FAST_PATH and MICROPY_OPT_MAP_LOOKUP_CACHE options:
- are simpler;
- take less code size;
- are faster (generally);
- work with code generated by the native emitter;
- can be used on embedded targets with a small and constant RAM overhead;
- allow the same .mpy bytecode to run on all targets.
See #7680 for further discussion. And see also #7653 for a discussion
about simplifying mpy-cross options.
Signed-off-by: Jim Mussared <jim.mussared@gmail.com>
2021-09-06 12:28:06 +10:00
|
|
|
00 LOAD_GLOBAL super
|
|
|
|
\\d\+ LOAD_GLOBAL __class__
|
2017-04-19 09:49:48 +10:00
|
|
|
\\d\+ LOAD_FAST 0
|
|
|
|
\\d\+ LOAD_SUPER_METHOD f
|
|
|
|
\\d\+ CALL_METHOD n=0 nkw=0
|
|
|
|
\\d\+ POP_TOP
|
|
|
|
\\d\+ LOAD_CONST_NONE
|
|
|
|
\\d\+ RETURN_VALUE
|
2015-03-20 17:25:25 +00:00
|
|
|
File cmdline/cmd_showbc.py, code block '<genexpr>' (descriptor: \.\+, bytecode @\.\+ bytes)
|
|
|
|
Raw bytecode (code_info_size=\\d\+, bytecode_size=\\d\+):
|
|
|
|
########
|
2019-09-02 21:35:17 +10:00
|
|
|
\.\+63
|
2015-05-06 13:55:33 +01:00
|
|
|
arg names: * * *
|
2017-01-18 16:53:15 +11:00
|
|
|
(N_STATE 9)
|
2015-03-14 14:06:20 +00:00
|
|
|
(N_EXC_STACK 0)
|
2019-09-25 15:45:47 +10:00
|
|
|
bc=0 line=1
|
2019-08-19 14:16:33 +02:00
|
|
|
bc=0 line=60
|
2019-08-14 16:11:25 +02:00
|
|
|
########
|
2017-01-18 16:53:15 +11:00
|
|
|
00 LOAD_NULL
|
|
|
|
01 LOAD_FAST 2
|
|
|
|
02 LOAD_NULL
|
|
|
|
03 LOAD_NULL
|
|
|
|
04 FOR_ITER 20
|
|
|
|
07 STORE_FAST 3
|
|
|
|
08 LOAD_DEREF 1
|
|
|
|
10 POP_JUMP_IF_FALSE 4
|
|
|
|
13 LOAD_DEREF 0
|
|
|
|
15 YIELD_VALUE
|
|
|
|
16 POP_TOP
|
|
|
|
17 JUMP 4
|
|
|
|
20 LOAD_CONST_NONE
|
|
|
|
21 RETURN_VALUE
|
2015-03-20 17:25:25 +00:00
|
|
|
File cmdline/cmd_showbc.py, code block '<listcomp>' (descriptor: \.\+, bytecode @\.\+ bytes)
|
|
|
|
Raw bytecode (code_info_size=\\d\+, bytecode_size=\\d\+):
|
|
|
|
########
|
2019-09-02 21:35:17 +10:00
|
|
|
\.\+63
|
2015-05-06 13:55:33 +01:00
|
|
|
arg names: * * *
|
2017-01-18 16:53:15 +11:00
|
|
|
(N_STATE 10)
|
2015-03-14 14:06:20 +00:00
|
|
|
(N_EXC_STACK 0)
|
2019-09-25 15:45:47 +10:00
|
|
|
bc=0 line=1
|
2019-08-19 14:16:33 +02:00
|
|
|
bc=0 line=61
|
2019-08-14 16:11:25 +02:00
|
|
|
########
|
2015-03-14 14:06:20 +00:00
|
|
|
00 BUILD_LIST 0
|
2017-02-10 15:39:55 +11:00
|
|
|
02 LOAD_FAST 2
|
|
|
|
03 GET_ITER_STACK
|
|
|
|
04 FOR_ITER 20
|
|
|
|
07 STORE_FAST 3
|
|
|
|
08 LOAD_DEREF 1
|
|
|
|
10 POP_JUMP_IF_FALSE 4
|
|
|
|
13 LOAD_DEREF 0
|
|
|
|
15 STORE_COMP 20
|
|
|
|
17 JUMP 4
|
|
|
|
20 RETURN_VALUE
|
2015-03-20 17:25:25 +00:00
|
|
|
File cmdline/cmd_showbc.py, code block '<dictcomp>' (descriptor: \.\+, bytecode @\.\+ bytes)
|
|
|
|
Raw bytecode (code_info_size=\\d\+, bytecode_size=\\d\+):
|
|
|
|
########
|
2019-09-02 21:35:17 +10:00
|
|
|
\.\+63
|
2015-05-06 13:55:33 +01:00
|
|
|
arg names: * * *
|
2017-01-18 16:53:15 +11:00
|
|
|
(N_STATE 11)
|
2015-03-14 14:06:20 +00:00
|
|
|
(N_EXC_STACK 0)
|
2019-09-25 15:45:47 +10:00
|
|
|
bc=0 line=1
|
2015-03-20 17:25:25 +00:00
|
|
|
########
|
2015-03-14 14:06:20 +00:00
|
|
|
00 BUILD_MAP 0
|
2017-02-10 15:39:55 +11:00
|
|
|
02 LOAD_FAST 2
|
|
|
|
03 GET_ITER_STACK
|
|
|
|
04 FOR_ITER 22
|
|
|
|
07 STORE_FAST 3
|
|
|
|
08 LOAD_DEREF 1
|
|
|
|
10 POP_JUMP_IF_FALSE 4
|
|
|
|
13 LOAD_DEREF 0
|
2017-01-18 16:53:15 +11:00
|
|
|
15 LOAD_DEREF 0
|
2017-02-10 15:39:55 +11:00
|
|
|
17 STORE_COMP 25
|
|
|
|
19 JUMP 4
|
|
|
|
22 RETURN_VALUE
|
2015-03-20 17:25:25 +00:00
|
|
|
File cmdline/cmd_showbc.py, code block 'closure' (descriptor: \.\+, bytecode @\.\+ bytes)
|
|
|
|
Raw bytecode (code_info_size=\\d\+, bytecode_size=\\d\+):
|
|
|
|
########
|
2019-09-02 21:35:17 +10:00
|
|
|
\.\+63
|
2015-05-06 13:55:33 +01:00
|
|
|
arg names: *
|
2015-03-14 14:06:20 +00:00
|
|
|
(N_STATE 4)
|
|
|
|
(N_EXC_STACK 0)
|
2019-09-25 15:45:47 +10:00
|
|
|
bc=0 line=1
|
2015-03-20 17:25:25 +00:00
|
|
|
########
|
2016-10-17 11:23:37 +11:00
|
|
|
bc=\\d\+ line=114
|
2015-03-14 14:06:20 +00:00
|
|
|
00 LOAD_DEREF 0
|
|
|
|
02 LOAD_CONST_SMALL_INT 1
|
2019-07-25 13:15:54 +10:00
|
|
|
03 BINARY_OP 27 __add__
|
2015-03-14 14:06:20 +00:00
|
|
|
04 STORE_FAST 1
|
|
|
|
05 LOAD_CONST_SMALL_INT 1
|
|
|
|
06 STORE_DEREF 0
|
|
|
|
08 DELETE_DEREF 0
|
|
|
|
10 LOAD_CONST_NONE
|
|
|
|
11 RETURN_VALUE
|
2015-03-20 17:25:25 +00:00
|
|
|
File cmdline/cmd_showbc.py, code block 'f' (descriptor: \.\+, bytecode @\.\+ bytes)
|
|
|
|
Raw bytecode (code_info_size=\\d\+, bytecode_size=\\d\+):
|
|
|
|
########
|
2019-09-02 21:35:17 +10:00
|
|
|
\.\+63
|
2015-05-06 13:55:33 +01:00
|
|
|
arg names: * b
|
2015-03-14 14:06:20 +00:00
|
|
|
(N_STATE 4)
|
|
|
|
(N_EXC_STACK 0)
|
2019-09-25 15:45:47 +10:00
|
|
|
bc=0 line=1
|
2015-03-20 17:25:25 +00:00
|
|
|
########
|
2016-10-17 11:23:37 +11:00
|
|
|
bc=\\d\+ line=140
|
2015-03-14 14:06:20 +00:00
|
|
|
00 LOAD_FAST 1
|
|
|
|
01 LOAD_DEREF 0
|
2019-07-25 13:15:54 +10:00
|
|
|
03 BINARY_OP 27 __add__
|
2015-03-14 14:06:20 +00:00
|
|
|
04 RETURN_VALUE
|
2015-03-20 17:25:25 +00:00
|
|
|
mem: total=\\d\+, current=\\d\+, peak=\\d\+
|
|
|
|
stack: \\d\+ out of \\d\+
|
|
|
|
GC: total: \\d\+, used: \\d\+, free: \\d\+
|
2016-09-20 11:33:19 +10:00
|
|
|
No. of 1-blocks: \\d\+, 2-blocks: \\d\+, max blk sz: \\d\+, max free sz: \\d\+
|