Merge pull request #985 from tannewt/heap_tweaks3

A few heap related tweaks
This commit is contained in:
Dan Halbert 2018-07-08 23:21:38 -04:00 committed by GitHub
commit 64b9ee9c74
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
16 changed files with 206 additions and 35 deletions

1
.gitignore vendored
View File

@ -58,3 +58,4 @@ TAGS
#################
*.orig
*.DS_Store

8
lib/utils/pyexec.c Normal file → Executable file
View File

@ -33,6 +33,7 @@
#include "py/runtime.h"
#include "py/repl.h"
#include "py/gc.h"
#include "py/gc_long_lived.h"
#include "py/frozenmod.h"
#include "py/mphal.h"
#if defined(USE_DEVICE_MODE)
@ -97,6 +98,13 @@ STATIC int parse_compile_execute(const void *source, mp_parse_input_kind_t input
#endif
}
// If the code was loaded from a file its likely to be running for a while so we'll long
// live it and collect any garbage before running.
if (input_kind == MP_PARSE_FILE_INPUT) {
module_fun = make_obj_long_lived(module_fun, 6);
gc_collect();
}
// execute code
mp_hal_set_interrupt_char(CHAR_CTRL_C); // allow ctrl-C to interrupt us
start = mp_hal_ticks_ms();

10
main.c Normal file → Executable file
View File

@ -44,6 +44,7 @@
#include "lib/utils/pyexec.h"
#include "mpconfigboard.h"
#include "supervisor/cpu.h"
#include "supervisor/port.h"
#include "supervisor/filesystem.h"
// TODO(tannewt): Figure out how to choose language at compile time.
@ -381,16 +382,17 @@ int __attribute__((used)) main(void) {
}
void gc_collect(void) {
// WARNING: This gc_collect implementation doesn't try to get root
// pointers from CPU registers, and thus may function incorrectly.
void *dummy;
gc_collect_start();
mp_uint_t regs[10];
mp_uint_t sp = cpu_get_regs_and_sp(regs);
// This collects root pointers from the VFS mount table. Some of them may
// have lost their references in the VM even though they are mounted.
gc_collect_root((void**)&MP_STATE_VM(vfs_mount_table), sizeof(mp_vfs_mount_t) / sizeof(mp_uint_t));
// This naively collects all object references from an approximate stack
// range.
gc_collect_root(&dummy, ((mp_uint_t)&_estack - (mp_uint_t)&dummy) / sizeof(mp_uint_t));
gc_collect_root((void**)sp, ((uint32_t)&_estack - sp) / sizeof(uint32_t));
gc_collect_end();
}

3
ports/atmel-samd/Makefile Normal file → Executable file
View File

@ -412,6 +412,8 @@ endif
SRC_SHARED_MODULE_EXPANDED = $(addprefix shared-bindings/, $(SRC_SHARED_MODULE)) \
$(addprefix shared-module/, $(SRC_SHARED_MODULE))
SRC_S = supervisor/$(CHIP_FAMILY)_cpu.s
OBJ = $(PY_O) $(SUPERVISOR_O) $(addprefix $(BUILD)/, $(SRC_C:.c=.o))
OBJ += $(addprefix $(BUILD)/, $(SRC_ASF:.c=.o))
OBJ += $(addprefix $(BUILD)/, $(SRC_COMMON_HAL_EXPANDED:.c=.o))
@ -419,6 +421,7 @@ OBJ += $(addprefix $(BUILD)/, $(SRC_SHARED_MODULE_EXPANDED:.c=.o))
ifeq ($(INTERNAL_LIBM),1)
OBJ += $(addprefix $(BUILD)/, $(SRC_LIBM:.c=.o))
endif
OBJ += $(addprefix $(BUILD)/, $(SRC_S:.s=.o))
SRC_QSTR += $(SRC_C) $(SRC_SUPERVISOR) $(SRC_COMMON_HAL_EXPANDED) $(SRC_SHARED_MODULE_EXPANDED) $(STM_SRC_C)

View File

@ -0,0 +1,35 @@
.syntax unified
.cpu cortex-m0
.thumb
.text
.align 2
@ uint cpu_get_regs_and_sp(r0=uint regs[10])
.global cpu_get_regs_and_sp
.thumb
.thumb_func
.type cpu_get_regs_and_sp, %function
cpu_get_regs_and_sp:
@ store registers into given array
str r4, [r0, #0]
str r5, [r0, #4]
str r6, [r0, #8]
str r7, [r0, #12]
push {r1}
mov r1, r8
str r1, [r0, #16]
mov r1, r9
str r1, [r0, #20]
mov r1, r10
str r1, [r0, #24]
mov r1, r11
str r1, [r0, #28]
mov r1, r12
str r1, [r0, #32]
mov r1, r13
str r1, [r0, #36]
pop {r1}
@ return the sp
mov r0, sp
bx lr

View File

@ -0,0 +1,27 @@
.syntax unified
.cpu cortex-m4
.thumb
.text
.align 2
@ uint cpu_get_regs_and_sp(r0=uint regs[10])
.global cpu_get_regs_and_sp
.thumb
.thumb_func
.type cpu_get_regs_and_sp, %function
cpu_get_regs_and_sp:
@ store registers into given array
str r4, [r0], #4
str r5, [r0], #4
str r6, [r0], #4
str r7, [r0], #4
str r8, [r0], #4
str r9, [r0], #4
str r10, [r0], #4
str r11, [r0], #4
str r12, [r0], #4
str r13, [r0], #4
@ return the sp
mov r0, sp
bx lr

3
ports/nrf/Makefile Normal file → Executable file
View File

@ -208,6 +208,8 @@ SRC_SHARED_BINDINGS = \
SRC_SHARED_MODULE_EXPANDED = $(addprefix shared-bindings/, $(SRC_SHARED_BINDINGS)) \
$(addprefix shared-module/, $(SRC_SHARED_MODULE))
SRC_S = supervisor/cpu.s
FROZEN_MPY_PY_FILES := $(shell find -L $(FROZEN_MPY_DIR) -type f -name '*.py')
FROZEN_MPY_MPY_FILES := $(addprefix $(BUILD)/,$(FROZEN_MPY_PY_FILES:.py=.mpy))
@ -218,6 +220,7 @@ OBJ += $(addprefix $(BUILD)/, $(SRC_NRFX:.c=.o))
OBJ += $(addprefix $(BUILD)/, $(DRIVERS_SRC_C:.c=.o))
OBJ += $(addprefix $(BUILD)/, $(SRC_COMMON_HAL_EXPANDED:.c=.o))
OBJ += $(addprefix $(BUILD)/, $(SRC_SHARED_MODULE_EXPANDED:.c=.o))
OBJ += $(addprefix $(BUILD)/, $(SRC_S:.s=.o))
$(BUILD)/$(FATFS_DIR)/ff.o: COPT += -Os
$(filter $(PY_BUILD)/../extmod/vfs_fat_%.o, $(PY_O)): COPT += -Os

27
ports/nrf/supervisor/cpu.s Executable file
View File

@ -0,0 +1,27 @@
.syntax unified
.cpu cortex-m4
.thumb
.text
.align 2
@ uint cpu_get_regs_and_sp(r0=uint regs[10])
.global cpu_get_regs_and_sp
.thumb
.thumb_func
.type cpu_get_regs_and_sp, %function
cpu_get_regs_and_sp:
@ store registers into given array
str r4, [r0], #4
str r5, [r0], #4
str r6, [r0], #4
str r7, [r0], #4
str r8, [r0], #4
str r9, [r0], #4
str r10, [r0], #4
str r11, [r0], #4
str r12, [r0], #4
str r13, [r0], #4
@ return the sp
mov r0, sp
bx lr

5
py/builtinimport.c Normal file → Executable file
View File

@ -31,6 +31,7 @@
#include "py/compile.h"
#include "py/gc_long_lived.h"
#include "py/gc.h"
#include "py/objmodule.h"
#include "py/persistentcode.h"
#include "py/runtime.h"
@ -468,6 +469,10 @@ mp_obj_t mp_builtin___import__(size_t n_args, const mp_obj_t *args) {
// (the module that was just loaded) is not a package. This will be caught
// on the next iteration because the file will not exist.
}
// Loading a module thrashes the heap significantly so we explicitly clean up
// afterwards.
gc_collect();
}
if (outer_module_obj != MP_OBJ_NULL) {
qstr s = qstr_from_strn(mod_str + last, i - last);

0
py/gc.c Normal file → Executable file
View File

2
py/gc_long_lived.c Normal file → Executable file
View File

@ -121,7 +121,7 @@ mp_obj_t make_obj_long_lived(mp_obj_t obj, uint8_t max_depth){
} else if (MP_OBJ_IS_TYPE(obj, &mp_type_property)) {
mp_obj_property_t *prop = MP_OBJ_TO_PTR(obj);
return MP_OBJ_FROM_PTR(make_property_long_lived(prop, max_depth));
} else if (MP_OBJ_IS_TYPE(obj, &mp_type_str)) {
} else if (MP_OBJ_IS_TYPE(obj, &mp_type_str) || MP_OBJ_IS_TYPE(obj, &mp_type_bytes)) {
mp_obj_str_t *str = MP_OBJ_TO_PTR(obj);
return MP_OBJ_FROM_PTR(make_str_long_lived(str));
} else if (MP_OBJ_IS_TYPE(obj, &mp_type_type)) {

6
py/mpconfig.h Normal file → Executable file
View File

@ -130,6 +130,12 @@
#define MICROPY_ALLOC_QSTR_CHUNK_INIT (128)
#endif
// Max number of entries in newly allocated QSTR pools. Smaller numbers may make QSTR lookups
// slightly slower but reduce the waste of unused spots.
#ifndef MICROPY_QSTR_POOL_MAX_ENTRIES
#define MICROPY_QSTR_POOL_MAX_ENTRIES (64)
#endif
// Initial amount for lexer indentation level
#ifndef MICROPY_ALLOC_LEXER_INDENT_INIT
#define MICROPY_ALLOC_LEXER_INDENT_INIT (10)

10
py/qstr.c Normal file → Executable file
View File

@ -144,14 +144,18 @@ STATIC qstr qstr_add(const byte *q_ptr) {
// make sure we have room in the pool for a new qstr
if (MP_STATE_VM(last_pool)->len >= MP_STATE_VM(last_pool)->alloc) {
qstr_pool_t *pool = m_new_ll_obj_var_maybe(qstr_pool_t, const char*, MP_STATE_VM(last_pool)->alloc * 2);
uint32_t new_pool_length = MP_STATE_VM(last_pool)->alloc * 2;
if (new_pool_length > MICROPY_QSTR_POOL_MAX_ENTRIES) {
new_pool_length = MICROPY_QSTR_POOL_MAX_ENTRIES;
}
qstr_pool_t *pool = m_new_ll_obj_var_maybe(qstr_pool_t, const char*, new_pool_length);
if (pool == NULL) {
QSTR_EXIT();
m_malloc_fail(MP_STATE_VM(last_pool)->alloc * 2);
m_malloc_fail(new_pool_length);
}
pool->prev = MP_STATE_VM(last_pool);
pool->total_prev_len = MP_STATE_VM(last_pool)->total_prev_len + MP_STATE_VM(last_pool)->len;
pool->alloc = MP_STATE_VM(last_pool)->alloc * 2;
pool->alloc = new_pool_length;
pool->len = 0;
MP_STATE_VM(last_pool) = pool;
DEBUG_printf("QSTR: allocate new pool of size %d\n", MP_STATE_VM(last_pool)->alloc);

34
supervisor/cpu.h Executable file
View File

@ -0,0 +1,34 @@
/*
* This file is part of the MicroPython project, http://micropython.org/
*
* The MIT License (MIT)
*
* Copyright (c) 2017 Scott Shawcroft for Adafruit Industries
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef MICROPY_INCLUDED_SUPERVISOR_CPU_H
#define MICROPY_INCLUDED_SUPERVISOR_CPU_H
// Adds up to 10 pointers from the CPUs registers to regs. This is used to make sure no actively
// used heap memory is freed. Its usually implemented in assembly.
mp_uint_t cpu_get_regs_and_sp(mp_uint_t *regs);
#endif // MICROPY_INCLUDED_SUPERVISOR_CPU_H

68
tools/analyze_heap_dump.py Normal file → Executable file
View File

@ -135,8 +135,11 @@ def do_all_the_things(ram_filename, bin_filename, map_filename, print_block_cont
ram_start = symbols["_srelocate"][0]
ram_end = symbols["_estack"][0]
ram_length = ram_end - ram_start
# print(ram_length, "ram length")
# print(len(ram_dump) // ram_length, "snapshots")
if analyze_snapshots == "all":
snapshots = range(len(ram_dump) // ram_length - 1, -1, -1)
#snapshots = range(4576, -1, -1)
elif analyze_snapshots == "last":
snapshots = range(len(ram_dump) // ram_length - 1, len(ram_dump) // ram_length - 2, -1)
for snapshot_num in snapshots:
@ -167,16 +170,16 @@ def do_all_the_things(ram_filename, bin_filename, map_filename, print_block_cont
mp_state_ctx = symbols["mp_state_ctx"][0]
manual_symbol_map["mp_state_ctx+20"] = "mp_state_ctx.vm.last_pool"
last_pool = load_pointer(mp_state_ctx + 20) # (gdb) p &mp_state_ctx.vm.last_pool
manual_symbol_map["mp_state_ctx+88"] = "mp_state_ctx.vm.dict_main.map.table"
dict_main_table = load_pointer(mp_state_ctx + 88) # (gdb) p &mp_state_ctx.vm.dict_main.map.table
manual_symbol_map["mp_state_ctx+68"] = "mp_state_ctx.vm.mp_loaded_modules_dict.map.table"
imports_table = load_pointer(mp_state_ctx + 68) # (gdb) p &mp_state_ctx.vm.mp_loaded_modules_dict.map.table
manual_symbol_map["mp_state_ctx+104"] = "mp_state_ctx.vm.dict_main.map.table"
dict_main_table = load_pointer(mp_state_ctx + 104) # (gdb) p &mp_state_ctx.vm.dict_main.map.table
manual_symbol_map["mp_state_ctx+84"] = "mp_state_ctx.vm.mp_loaded_modules_dict.map.table"
imports_table = load_pointer(mp_state_ctx + 84) # (gdb) p &mp_state_ctx.vm.mp_loaded_modules_dict.map.table
manual_symbol_map["mp_state_ctx+104"] = "mp_state_ctx.vm.mp_sys_path_obj.items"
manual_symbol_map["mp_state_ctx+120"] = "mp_state_ctx.vm.mp_sys_argv_obj.items"
manual_symbol_map["mp_state_ctx+120"] = "mp_state_ctx.vm.mp_sys_path_obj.items"
manual_symbol_map["mp_state_ctx+136"] = "mp_state_ctx.vm.mp_sys_argv_obj.items"
for i in range(READLINE_HIST_SIZE):
manual_symbol_map["mp_state_ctx+{}".format(128 + i * 4)] = "mp_state_ctx.vm.readline_hist[{}]".format(i)
manual_symbol_map["mp_state_ctx+{}".format(144 + i * 4)] = "mp_state_ctx.vm.readline_hist[{}]".format(i)
tuple_type = symbols["mp_type_tuple"][0]
type_type = symbols["mp_type_type"][0]
@ -214,8 +217,8 @@ def do_all_the_things(ram_filename, bin_filename, map_filename, print_block_cont
pool_start = heap_start + total_byte_len - pool_length - pool_shift
pool = heap[-pool_length-pool_shift:]
total_height = 65 * 18
total_width = (pool_length // (64 * 16)) * 90
total_height = 128 * 18
total_width = (pool_length // (128 * 16)) * 85
map_element_blocks = [dict_main_table, imports_table]
string_blocks = []
@ -255,10 +258,11 @@ def do_all_the_things(ram_filename, bin_filename, map_filename, print_block_cont
block_data[address] = data
for k in range(len(data) // 4):
word = struct.unpack_from("<I", data, offset=(k * 4))[0]
if word < 0x00040000 and k == 0 or address in qstr_pools:
if word < len(rom) and k == 0 or address in qstr_pools:
potential_type = word
bgcolor = "gray"
if address in qstr_pools:
#print(address, len(data))
bgcolor = "tomato"
elif potential_type in function_types:
bgcolor = "green"
@ -292,11 +296,11 @@ def do_all_the_things(ram_filename, bin_filename, map_filename, print_block_cont
if potential_type == dynamic_type:
if k == 0:
node.attr["fillcolor"] = "plum"
if k == 3 and 0x20000000 < word < 0x20040000:
if k == 3 and ram_start < word < ram_end:
map_element_blocks.append(word)
if potential_type in function_types:
if k == 2 and 0x20000000 < word < 0x20040000:
if k == 2 and ram_start < word < ram_end:
bytecode_blocks.append(word)
@ -338,7 +342,12 @@ def do_all_the_things(ram_filename, bin_filename, map_filename, print_block_cont
pool_ptr = last_pool
if not is_qstr(qstr_index):
return "object"
pool = block_data[pool_ptr]
prev, total_prev_len, alloc, length = struct.unpack_from("<IIII", pool)
qstr_index >>= 3
if qstr_index > total_prev_len + alloc:
return "invalid"
while pool_ptr != 0:
if pool_ptr > ram_start:
if pool_ptr in block_data:
@ -492,7 +501,10 @@ def do_all_the_things(ram_filename, bin_filename, map_filename, print_block_cont
offset = len(data)
continue
offset += 2 + qstr_len + 1
qstrs_in_chunk += " " + data[offset - qstr_len - 1: offset - 1].decode("utf-8")
try:
qstrs_in_chunk += " " + data[offset - qstr_len - 1: offset - 1].decode("utf-8")
except UnicodeDecodeError:
qstrs_in_chunk += " " + ""*qstr_len
printable_qstrs = ""
for i in range(len(qstrs_in_chunk)):
c = qstrs_in_chunk[i]
@ -515,20 +527,28 @@ def do_all_the_things(ram_filename, bin_filename, map_filename, print_block_cont
# First render the graph of objects on the heap.
if draw_heap_ownership:
ownership_graph.layout(prog="dot")
fn = os.path.join(output_directory, "heap_ownership{:04d}.png".format(snapshot_num))
fn = os.path.join(output_directory, "heap_ownership{:04d}.svg".format(snapshot_num))
print(fn)
ownership_graph.draw(fn)
# Clear edge positioning from ownership graph layout.
if draw_heap_ownership:
for edge in ownership_graph.iteredges():
del edge.attr["pos"]
else:
for edge in ownership_graph.edges():
ownership_graph.delete_edge(edge)
# Second, render the heap layout in memory order.
for node in ownership_graph:
for node in ownership_graph.nodes():
try:
address = int(node.name)
except ValueError:
ownership_graph.remove_node(node)
ownership_graph.remove_node(node.name)
continue
block = (address - pool_start) // 16
x = block // 64
y = 64 - block % 64
x = block // 128
y = 128 - block % 128
try:
height = float(node.attr["height"])
except:
@ -538,11 +558,6 @@ def do_all_the_things(ram_filename, bin_filename, map_filename, print_block_cont
# print(hex(address), block, len(block_data[address]), x, y, height)
node.attr["pos"] = "{},{}".format(x * 80, (y - (height - 0.25) * 2) * 18) # in inches
# Clear edge positioning from ownership graph layout.
if draw_heap_ownership:
for edge in ownership_graph.iteredges():
del edge.attr["pos"]
# Reformat block nodes so they are the correct size and do not have keys in them.
for block in sorted(map_element_blocks):
try:
@ -565,9 +580,9 @@ def do_all_the_things(ram_filename, bin_filename, map_filename, print_block_cont
else:
#print(" {}, {}".format(format(key), format(value)))
cells.append((key, ""))
if value in block_data:
edge = ownership_graph.get_edge(block, value)
edge.attr["tailport"] = str(key)
# if value in block_data:
# edge = ownership_graph.get_edge(block, value)
# edge.attr["tailport"] = str(key)
rows = ""
for i in range(len(cells) // 2):
rows += "<tr><td port=\"{}\" height=\"18\" width=\"40\">{}</td><td port=\"{}\" height=\"18\" width=\"40\">{}</td></tr>".format(
@ -586,6 +601,7 @@ def do_all_the_things(ram_filename, bin_filename, map_filename, print_block_cont
if draw_heap_layout:
fn = os.path.join(output_directory, "heap_layout{:04d}.png".format(snapshot_num))
print(fn)
#ownership_graph.write(fn+".dot")
ownership_graph.draw(fn)
if __name__ == "__main__":

View File

@ -19,7 +19,7 @@ append binary memory ram.bin &_srelocate &_estack
continue
end
break main.c:164
break main.c:179
continue