From 66b79723b6d238f93908a029ed7a1c4619842b52 Mon Sep 17 00:00:00 2001 From: Scott Shawcroft Date: Fri, 8 Jun 2018 12:55:40 -0700 Subject: [PATCH] Add code size analysis tool and shrink samd.clock a smidge. --- ports/atmel-samd/bindings/samd/Clock.c | 10 +- ports/atmel-samd/bindings/samd/Clock.h | 10 +- tools/chart_code_size.py | 454 +++++++++++++++++++++++++ 3 files changed, 460 insertions(+), 14 deletions(-) create mode 100644 tools/chart_code_size.py diff --git a/ports/atmel-samd/bindings/samd/Clock.c b/ports/atmel-samd/bindings/samd/Clock.c index 3c87177a46..e09cc0bd89 100644 --- a/ports/atmel-samd/bindings/samd/Clock.c +++ b/ports/atmel-samd/bindings/samd/Clock.c @@ -47,15 +47,7 @@ STATIC void samd_clock_print(const mp_print_t *print, mp_obj_t self_in, mp_print_kind_t kind) { samd_clock_obj_t *self = MP_OBJ_TO_PTR(self_in); - mp_printf(print, "%q.%q.%s(", MP_QSTR_samd, MP_QSTR_clock, self->name); - if (clock_get_enabled(self->type, self->index)) { - mp_printf(print, "frequency=%u", clock_get_frequency(self->type, self->index)); - uint32_t calibration = clock_get_calibration(self->type, self->index); - if (calibration) { - mp_printf(print, ", calibration=%u", calibration); - } - } - mp_printf(print, ")"); + mp_printf(print, "%q.%q.%q", MP_QSTR_samd, MP_QSTR_clock, self->name); } //| .. attribute:: enabled diff --git a/ports/atmel-samd/bindings/samd/Clock.h b/ports/atmel-samd/bindings/samd/Clock.h index 0d13ae2533..ccc8f10d10 100644 --- a/ports/atmel-samd/bindings/samd/Clock.h +++ b/ports/atmel-samd/bindings/samd/Clock.h @@ -31,7 +31,7 @@ typedef struct { mp_obj_base_t base; - const char *name; + qstr name; uint8_t type; uint8_t index; } samd_clock_obj_t; @@ -39,7 +39,7 @@ typedef struct { #define CLOCK(_name, _type, _index) \ const samd_clock_obj_t clock_ ## _name = { \ { &samd_clock_type }, \ - .name = #_name, \ + .name = MP_QSTR_ ## _name, \ .type = _type, \ .index = _index, \ } @@ -47,7 +47,7 @@ const samd_clock_obj_t clock_ ## _name = { \ #define CLOCK_SOURCE(_name) \ const samd_clock_obj_t clock_ ## _name = { \ { &samd_clock_type }, \ - .name = #_name, \ + .name = MP_QSTR_ ## _name, \ .type = 0, \ .index = GCLK_SOURCE_ ## _name, \ } @@ -55,7 +55,7 @@ const samd_clock_obj_t clock_ ## _name = { \ #define CLOCK_GCLK(_name) \ const samd_clock_obj_t clock_ ## _name = { \ { &samd_clock_type }, \ - .name = #_name, \ + .name = MP_QSTR_ ## _name, \ .type = 1, \ .index = _name ## _GCLK_ID, \ } @@ -63,7 +63,7 @@ const samd_clock_obj_t clock_ ## _name = { \ #define CLOCK_GCLK_(_name, _extra) \ const samd_clock_obj_t clock_ ## _name ## _ ## _extra = { \ { &samd_clock_type }, \ - .name = #_name "_" #_extra, \ + .name = MP_QSTR_ ## _name ## _ ## _extra, \ .type = 1, \ .index = _name ## _GCLK_ID_ ## _extra, \ } diff --git a/tools/chart_code_size.py b/tools/chart_code_size.py new file mode 100644 index 0000000000..0b55787fa4 --- /dev/null +++ b/tools/chart_code_size.py @@ -0,0 +1,454 @@ +# This script renders a graph of the CircuitPython rom image. +# It takes the single elf file and uses objdump to get its contents. + +import pygraphviz as pgv +import click +import sh + +# Replace dashes with underscores +objdump = sh.arm_none_eabi_objdump + +def parse_hex(h): + return int("0x" + h, 0) + +BAD_JUMPS = ["UNPREDICTABLE", "_etext"] + +SPECIAL_NODE_COLORS = { + "main": "pink", + "exception_table": "green" +} + +@click.command() +@click.argument("elf_filename") +def do_all_the_things(elf_filename): + symbol = None + last_address = 0 + all_symbols = {} + symbols_by_debug_address = {} + symbols_by_memory_address = {} + symbols_by_linkage_name = {} + # Gather type info so we know how to treat the disassembly + debug_dump = objdump("--dwarf=info", elf_filename) + debug_dump_lines = debug_dump.stdout.decode("utf-8").split("\n") + symbol_stack = [] + symbol = None + ignore = False + min_call_site_param = 0x20000000 + for line in debug_dump_lines: + if not line: + continue + parts = line.split() + if line[1] == "<": + if parts[-1] == "0": + symbol = symbol_stack.pop() + continue + debug_type = parts[-1].strip("()") + ignore = False + # skip info about function parameters + if debug_type == "DW_TAG_formal_parameter": + ignore = True + depth = int(parts[0].split(">")[0].strip("<")) + if len(symbol_stack) == (depth - 1) and depth > 0: + symbol_stack.append(symbol) + elif symbol and "name" in symbol: + if symbol["debug_type"] == "DW_TAG_variable": + if "start_address" not in symbol: + pass + else: + symbols_by_memory_address[symbol["start_address"]] = symbol + elif symbol["debug_type"] in ["DW_TAG_member", "DW_TAG_label", "DW_TAG_typedef", "DW_TAG_enumerator", "DW_TAG_enumeration_type", "DW_TAG_base_type", "DW_TAG_structure_type", "DW_TAG_compile_unit", "DW_TAG_union_type"]: + # skip symbols that don't end up in memory. the type info is available through the debug address map + pass + else: + if symbol["name"] in all_symbols: + # print(depth, symbol["name"]) + # print(symbol) + # print(all_symbols[symbol["name"]]) + # print() + pass + all_symbols[symbol["name"]] = symbol + elif symbol and symbol["debug_type"] == "DW_TAG_GNU_call_site_parameter" and "call_site_value" in symbol: + parent = -1 + while symbol_stack[parent]["debug_type"] != "DW_TAG_subprogram": + parent -= 1 + parent = symbol_stack[parent] + + # Only point to ROM + addr = symbol["call_site_value"] + if 0x2000 <= addr < 0x20000000: + if "outgoing_pointers" not in parent: + parent["outgoing_pointers"] = set() + parent["outgoing_pointers"].add(addr) + if addr not in symbols_by_memory_address: + symbols_by_memory_address[addr] = symbol + min_call_site_param = min(addr, min_call_site_param) + symbol["name"] = "name{:x}".format(addr) + address = parse_hex(parts[0].split("<")[-1].strip(">:")) + symbol = {"debug_address": address, "debug_type": debug_type, "other": []} + if debug_type == "DW_TAG_structure_type": + symbol["struct"] = {} + elif debug_type == "DW_TAG_array_type": + symbol["subtype"] = None + symbol["bound_count"] = 0 + symbol["maxlen"] = 0 + elif debug_type == "DW_TAG_subrange_type": + symbol_stack[-1]["subtype"] = symbol + symbols_by_debug_address[address] = symbol + elif ignore: + continue + elif line[:4] == " ": + tag = parts[1].strip(":") + if tag == "DW_AT_name": + symbol["name"] = parts[-1] + elif tag == "DW_AT_type": + symbol["type"] = int(parts[-1].strip("<>"), 0) + if symbol["debug_type"] == "DW_TAG_subrange_type": + if not symbol_stack[-1]["subtype"]: + symbol_stack[-1]["subtype"] = symbol + elif symbol_stack[-1]["subtype"]["type"] == symbol["type"]: + second_subtype = True + else: + raise RuntimeError() + elif tag == "DW_AT_upper_bound": + # Skip arrays with length defined by other variables + if parts[-1][0] != "<": + upper_bound = int(parts[-1]) + if symbol_stack[-1]["bound_count"] > 0: + symbol_stack[-1]["maxlen"] *= upper_bound + 1 + else: + symbol_stack[-1]["maxlen"] = upper_bound + 1 + symbol_stack[-1]["bound_count"] += 1 + elif tag == "DW_AT_byte_size": + symbol["size"] = int(parts[-1]) + elif tag == "DW_AT_inline": + symbol["inlined"] = True + elif tag == "DW_AT_low_pc": + addr = int(parts[-1], 0) + symbols_by_memory_address[addr] = symbol + elif tag == "DW_AT_location": + if parts[-2] == "(DW_OP_addr:": + addr = parse_hex(parts[-1].strip(")")) + if addr > 0: + symbol["start_address"] = addr + elif tag == "DW_AT_linkage_name": + symbol["linkage_name"] = parts[-1] + symbols_by_linkage_name[symbol["linkage_name"]] = symbol + elif tag == "DW_AT_data_member_location": + symbol_stack[-1]["struct"][int(parts[-1])] = symbol + elif tag == "DW_AT_GNU_call_site_value": + if parts[-2] == "(DW_OP_addr:": + symbol["call_site_value"] = parse_hex(parts[-1].strip(")")) + else: + symbol["other"].append(line) + #print(parts) + pass + else: + #print(line) + pass + + MEMORY_NONE = 0 + MEMORY_POINTER = 1 + MEMORY_PY_OBJECT = 2 + + def get_size(t): + if "size" in t: + return t["size"] + return get_size(symbols_by_debug_address[t["type"]]) + + def get_pointer_map(t, depth=0): + if t["debug_type"] == "DW_TAG_pointer_type": + return {0: MEMORY_POINTER} + elif t["debug_type"] in ["DW_TAG_const_type", "DW_TAG_typedef", "DW_TAG_member", "DW_TAG_subrange_type", "DW_TAG_volatile_type"]: + if "name" in t and t["name"] == "mp_rom_obj_t": + return {0: MEMORY_PY_OBJECT} + return get_pointer_map(symbols_by_debug_address[t["type"]], depth+1) + elif t["debug_type"] in ["DW_TAG_base_type", "DW_TAG_enumeration_type"]: + return {} + elif t["debug_type"] == "DW_TAG_union_type": + # skip for now + return {} + elif "struct" in t: + combined_map = {} + for offset in t["struct"]: + member = t["struct"][offset] + submap = get_pointer_map(member) + for suboffset in submap: + combined_map[offset + suboffset] = submap[suboffset] + return combined_map + elif "subtype" in t: + subtype = symbols_by_debug_address[t["type"]] + pmap = get_pointer_map(subtype, depth+1) + size = get_size(subtype) + expanded_map = {} + for i in range(t["maxlen"]): + for offset in pmap: + expanded_map[size * i + offset] = pmap[offset] + return expanded_map + else: + print("no recurse", t) + pass + return {} + + # Do a second pass to dereference the types + for symbol_address in symbols_by_memory_address: + symbol = symbols_by_memory_address[symbol_address] + if "type" in symbol: + if symbol["debug_type"] == "DW_TAG_variable": + symbol["pointer_map"] = get_pointer_map(symbols_by_debug_address[symbol["type"]]) + type_string = [] + t = symbol["type"] + offset = [] + while t != None: + t_symbol = symbols_by_debug_address[t] + t = t_symbol.get("type", None) + if "name" in t_symbol: + type_string.append(t_symbol["name"]) + elif t_symbol["debug_type"] == "DW_TAG_array_type": + type_string.append("[]") + elif t_symbol["debug_type"] == "DW_TAG_pointer_type": + type_string.append("*") + elif t_symbol["debug_type"] == "DW_TAG_const_type": + type_string.append("const") + elif t_symbol["debug_type"] == "DW_TAG_volatile_type": + type_string.append("volatile") + else: + #print(" ", t_symbol) + pass + type_string.reverse() + symbol["type_string"] = " ".join(type_string) + #print(symbol_name, symbol["debug_type"], symbol.get("type_string", "")) + + # print() + # print() + # print(all_symbols["mp_builtin_module_table"]) + # return + + # Gather size and call info + text_dump = objdump("-Dz", "-j", ".text", elf_filename) + text_dump_lines = text_dump.stdout.decode("utf-8").split("\n") + section = None + symbol = None + symbol_type = None + for line in text_dump_lines[4:]: + if line.startswith("Disassembly of section"): + section = line.split()[-1].strip(":") + elif not line: + if symbol and "end_address" not in symbol: + symbol["end_address"] = last_address + symbol["size"] = last_address - symbol["start_address"] + symbol = None + continue + elif line[0].isnumeric(): + symbol_address, symbol_name = line.split() + symbol_address = parse_hex(symbol_address) + symbol_name = symbol_name.strip("<>:") + if symbol_name in symbols_by_linkage_name: + linked_name = symbol_name + symbol = symbols_by_linkage_name[symbol_name] + if "name" in symbol: + non_linkage = symbol["name"] + if not non_linkage.startswith("__builtin"): + symbol_name = non_linkage + all_symbols[symbol_name] = symbol + if "name" not in symbol: + symbol["name"] = symbol_name + elif symbol_address in symbols_by_memory_address: + all_symbols[symbol_name] = symbols_by_memory_address[symbol_address] + if "name" not in all_symbols[symbol_name]: + all_symbols[symbol_name]["name"] = symbol_name + elif symbol_name not in all_symbols: + if symbol_name == "nlr_push_tail_var": + fake_type = all_symbols["mp_obj_get_type"]["type"] + symbol = {"debug_type": "DW_TAG_variable", "name": symbol_name, "type": fake_type} + else: + print(line) + print(symbol_name, symbol_address) + symbol = {"debug_type": "DW_TAG_subprogram", "name": symbol_name} + all_symbols[symbol_name] = symbol + #raise RuntimeError() + + symbol = all_symbols[symbol_name] + symbol["start_address"] = symbol_address + symbols_by_memory_address[symbol_address] = symbol + symbol["section"] = section + + if symbol["debug_type"] == "DW_TAG_subprogram": + symbol["outgoing_jumps"] = set() + symbol["incoming_jumps"] = set() + symbol_type = None + elif symbol["debug_type"] == "DW_TAG_variable": + symbol["outgoing_pointers"] = set() + symbol_type = symbols_by_debug_address[symbol["type"]] + all_symbols[symbol_name] = symbol + + elif line[0] == " ": + parts = line.strip().split() + last_address = parse_hex(parts[0].strip(":")) + + offset = last_address - symbol["start_address"] + if "pointer_map" in symbol: + if offset not in symbol["pointer_map"]: + #print(offset, symbol) + pass + else: + ref = parse_hex(parts[1]) + pointer_style = symbol["pointer_map"][offset] + if pointer_style == MEMORY_POINTER: + symbol["outgoing_pointers"].add(ref & 0xfffffffe) + elif pointer_style == MEMORY_PY_OBJECT and ref & 0x3 == 0: + symbol["outgoing_pointers"].add(ref) + if len(parts[1]) == 8 and parts[1][0] == "0": + addr = parse_hex(parts[1]) + if 0x2000 <= addr < 0x20000000: + if "outgoing_pointers" not in symbol: + symbol["outgoing_pointers"] = set() + symbol["outgoing_pointers"].add(addr) + elif "<" in line and symbol["debug_type"] == "DW_TAG_subprogram": + if line[-1] == ">": + jump_to = parts[-1].strip("<>").split("+")[0] + if "name" not in symbol: + print(jump_to) + print(symbol) + if jump_to != symbol["name"] and jump_to not in BAD_JUMPS: + symbol["outgoing_jumps"].add(jump_to) + #print(symbol_name, jump_to) + if jump_to == "_etext": + print(line) + elif "UNDEFINED" in line: + continue + elif parts[2] == "ldr": + continue + else: + print(line) + else: + #print(line) + pass + + # print() + print(hex(min_call_site_param)) + print(all_symbols["exception_table"]) + # return + + print("converting outgoing pointers to names") + + # Convert outgoing pointers to names from addresses + for symbol_name in all_symbols: + symbol = all_symbols[symbol_name] + if "outgoing_pointers" not in symbol: + continue + converted = set() + for outgoing in symbol["outgoing_pointers"]: + if outgoing in symbols_by_memory_address: + outgoing = symbols_by_memory_address[outgoing] + #print(outgoing) + if outgoing["debug_type"] in ["DW_TAG_GNU_call_site", "DW_TAG_lexical_block"]: + continue + if outgoing["name"] == "audioio_wavefile_type": + print(outgoing) + converted.add(outgoing["name"]) + symbol["outgoing_pointers"] = converted + + print("linking back") + # Link back + for symbol_name in all_symbols: + symbol = all_symbols[symbol_name] + if "outgoing_jumps" in symbol: + for outgoing in symbol["outgoing_jumps"]: + if outgoing not in all_symbols: + #print(outgoing, symbol_name) + continue + #print(all_symbols[outgoing], symbol_name) + + referenced_symbol = all_symbols[outgoing] + if "incoming_jumps" not in referenced_symbol: + #print(symbol_name, "->", outgoing) + referenced_symbol["incoming_jumps"] = set() + referenced_symbol["incoming_jumps"].add(symbol_name) + if "outgoing_pointers" in symbol: + for outgoing in symbol["outgoing_pointers"]: + if outgoing not in all_symbols: + #print(outgoing, symbol_name) + continue + #print(all_symbols[outgoing], symbol_name) + + referenced_symbol = all_symbols[outgoing] + if "incoming_pointers" not in referenced_symbol: + #print(symbol_name, "->", outgoing) + referenced_symbol["incoming_pointers"] = set() + referenced_symbol["incoming_pointers"].add(symbol_name) + + print(all_symbols["exception_table"]) + + # Chart it all + print("charting {} symbols".format(len(all_symbols))) + callgraph = pgv.AGraph(directed=True) + for i, symbol_name in enumerate(all_symbols): + symbol = all_symbols[symbol_name] + # print(i, symbol_name) + # if "outgoing_jumps" in symbol: + # print(" ", len(symbol["outgoing_jumps"]), "jumps") + # if "outgoing_pointers" in symbol: + # print(" ", len(symbol["outgoing_pointers"]), "ptrs") + # if i > 3000: + # break + if ("incoming_jumps" not in symbol or len(symbol["incoming_jumps"]) == 0) and ("incoming_pointers" not in symbol or len(symbol["incoming_pointers"]) == 0): + #print(symbol_name) + continue + if "start_address" not in symbol: + continue + callgraph.add_node(symbol_name) + if "outgoing_jumps" in symbol: + for outgoing in symbol["outgoing_jumps"]: + callgraph.add_edge(symbol_name, outgoing) + if "outgoing_pointers" in symbol: + for outgoing in symbol["outgoing_pointers"]: + callgraph.add_edge(symbol_name, outgoing, color="red") + #print(symbol_name, symbol) + + # Style all of the nodes + print("styling") + for node in callgraph.iternodes(): + if node.name not in all_symbols: + continue + symbol = all_symbols[node.name] + node.attr["shape"] = "box" + text_width_ish = len(node.name) * 0.1 + if "size" not in symbol: + print(symbol) + size = symbol["size"] / 8 + square_size = size ** 0.5 + if text_width_ish > square_size: + w = text_width_ish + h = size / text_width_ish + else: + w = square_size + h = square_size + node.attr["width"] = w + node.attr["height"] = h + node.attr["label"] = node.name + "\r\n" + str(symbol["size"]) + " bytes" + node.attr["style"] = "filled" + + incoming = 0 + if "incoming_jumps" in symbol: + incoming += len(symbol["incoming_jumps"]) + if "incoming_pointers" in symbol: + incoming += len(symbol["incoming_pointers"]) + + if node.name in SPECIAL_NODE_COLORS: + node.attr["color"] = SPECIAL_NODE_COLORS[node.name] + elif incoming == 1: + node.attr["color"] = "lightblue" + elif incoming > 25: + print("delete", node.name, "because it has {} incoming".format(incoming)) + callgraph.delete_node(node.name) + elif incoming > 15: + node.attr["color"] = "red" + + print("drawing") + callgraph.layout(prog="dot") + fn = "callgraph.svg" + print(fn) + callgraph.draw(fn) + +if __name__ == "__main__": + do_all_the_things()