455 lines
19 KiB
Python
455 lines
19 KiB
Python
# This script renders a graph of the CircuitPython rom image.
|
|
# It takes the single elf file and uses objdump to get its contents.
|
|
|
|
import pygraphviz as pgv
|
|
import click
|
|
import sh
|
|
|
|
# Replace dashes with underscores
|
|
objdump = sh.arm_none_eabi_objdump
|
|
|
|
def parse_hex(h):
|
|
return int("0x" + h, 0)
|
|
|
|
BAD_JUMPS = ["UNPREDICTABLE", "_etext"]
|
|
|
|
SPECIAL_NODE_COLORS = {
|
|
"main": "pink",
|
|
"exception_table": "green"
|
|
}
|
|
|
|
@click.command()
|
|
@click.argument("elf_filename")
|
|
def do_all_the_things(elf_filename):
|
|
symbol = None
|
|
last_address = 0
|
|
all_symbols = {}
|
|
symbols_by_debug_address = {}
|
|
symbols_by_memory_address = {}
|
|
symbols_by_linkage_name = {}
|
|
# Gather type info so we know how to treat the disassembly
|
|
debug_dump = objdump("--dwarf=info", elf_filename)
|
|
debug_dump_lines = debug_dump.stdout.decode("utf-8").split("\n")
|
|
symbol_stack = []
|
|
symbol = None
|
|
ignore = False
|
|
min_call_site_param = 0x20000000
|
|
for line in debug_dump_lines:
|
|
if not line:
|
|
continue
|
|
parts = line.split()
|
|
if line[1] == "<":
|
|
if parts[-1] == "0":
|
|
symbol = symbol_stack.pop()
|
|
continue
|
|
debug_type = parts[-1].strip("()")
|
|
ignore = False
|
|
# skip info about function parameters
|
|
if debug_type == "DW_TAG_formal_parameter":
|
|
ignore = True
|
|
depth = int(parts[0].split(">")[0].strip("<"))
|
|
if len(symbol_stack) == (depth - 1) and depth > 0:
|
|
symbol_stack.append(symbol)
|
|
elif symbol and "name" in symbol:
|
|
if symbol["debug_type"] == "DW_TAG_variable":
|
|
if "start_address" not in symbol:
|
|
pass
|
|
else:
|
|
symbols_by_memory_address[symbol["start_address"]] = symbol
|
|
elif symbol["debug_type"] in ["DW_TAG_member", "DW_TAG_label", "DW_TAG_typedef", "DW_TAG_enumerator", "DW_TAG_enumeration_type", "DW_TAG_base_type", "DW_TAG_structure_type", "DW_TAG_compile_unit", "DW_TAG_union_type"]:
|
|
# skip symbols that don't end up in memory. the type info is available through the debug address map
|
|
pass
|
|
else:
|
|
if symbol["name"] in all_symbols:
|
|
# print(depth, symbol["name"])
|
|
# print(symbol)
|
|
# print(all_symbols[symbol["name"]])
|
|
# print()
|
|
pass
|
|
all_symbols[symbol["name"]] = symbol
|
|
elif symbol and symbol["debug_type"] == "DW_TAG_GNU_call_site_parameter" and "call_site_value" in symbol:
|
|
parent = -1
|
|
while symbol_stack[parent]["debug_type"] != "DW_TAG_subprogram":
|
|
parent -= 1
|
|
parent = symbol_stack[parent]
|
|
|
|
# Only point to ROM
|
|
addr = symbol["call_site_value"]
|
|
if 0x2000 <= addr < 0x20000000:
|
|
if "outgoing_pointers" not in parent:
|
|
parent["outgoing_pointers"] = set()
|
|
parent["outgoing_pointers"].add(addr)
|
|
if addr not in symbols_by_memory_address:
|
|
symbols_by_memory_address[addr] = symbol
|
|
min_call_site_param = min(addr, min_call_site_param)
|
|
symbol["name"] = "name{:x}".format(addr)
|
|
address = parse_hex(parts[0].split("<")[-1].strip(">:"))
|
|
symbol = {"debug_address": address, "debug_type": debug_type, "other": []}
|
|
if debug_type == "DW_TAG_structure_type":
|
|
symbol["struct"] = {}
|
|
elif debug_type == "DW_TAG_array_type":
|
|
symbol["subtype"] = None
|
|
symbol["bound_count"] = 0
|
|
symbol["maxlen"] = 0
|
|
elif debug_type == "DW_TAG_subrange_type":
|
|
symbol_stack[-1]["subtype"] = symbol
|
|
symbols_by_debug_address[address] = symbol
|
|
elif ignore:
|
|
continue
|
|
elif line[:4] == " ":
|
|
tag = parts[1].strip(":")
|
|
if tag == "DW_AT_name":
|
|
symbol["name"] = parts[-1]
|
|
elif tag == "DW_AT_type":
|
|
symbol["type"] = int(parts[-1].strip("<>"), 0)
|
|
if symbol["debug_type"] == "DW_TAG_subrange_type":
|
|
if not symbol_stack[-1]["subtype"]:
|
|
symbol_stack[-1]["subtype"] = symbol
|
|
elif symbol_stack[-1]["subtype"]["type"] == symbol["type"]:
|
|
second_subtype = True
|
|
else:
|
|
raise RuntimeError()
|
|
elif tag == "DW_AT_upper_bound":
|
|
# Skip arrays with length defined by other variables
|
|
if parts[-1][0] != "<":
|
|
upper_bound = int(parts[-1])
|
|
if symbol_stack[-1]["bound_count"] > 0:
|
|
symbol_stack[-1]["maxlen"] *= upper_bound + 1
|
|
else:
|
|
symbol_stack[-1]["maxlen"] = upper_bound + 1
|
|
symbol_stack[-1]["bound_count"] += 1
|
|
elif tag == "DW_AT_byte_size":
|
|
symbol["size"] = int(parts[-1])
|
|
elif tag == "DW_AT_inline":
|
|
symbol["inlined"] = True
|
|
elif tag == "DW_AT_low_pc":
|
|
addr = int(parts[-1], 0)
|
|
symbols_by_memory_address[addr] = symbol
|
|
elif tag == "DW_AT_location":
|
|
if parts[-2] == "(DW_OP_addr:":
|
|
addr = parse_hex(parts[-1].strip(")"))
|
|
if addr > 0:
|
|
symbol["start_address"] = addr
|
|
elif tag == "DW_AT_linkage_name":
|
|
symbol["linkage_name"] = parts[-1]
|
|
symbols_by_linkage_name[symbol["linkage_name"]] = symbol
|
|
elif tag == "DW_AT_data_member_location":
|
|
symbol_stack[-1]["struct"][int(parts[-1])] = symbol
|
|
elif tag == "DW_AT_GNU_call_site_value":
|
|
if parts[-2] == "(DW_OP_addr:":
|
|
symbol["call_site_value"] = parse_hex(parts[-1].strip(")"))
|
|
else:
|
|
symbol["other"].append(line)
|
|
#print(parts)
|
|
pass
|
|
else:
|
|
#print(line)
|
|
pass
|
|
|
|
MEMORY_NONE = 0
|
|
MEMORY_POINTER = 1
|
|
MEMORY_PY_OBJECT = 2
|
|
|
|
def get_size(t):
|
|
if "size" in t:
|
|
return t["size"]
|
|
return get_size(symbols_by_debug_address[t["type"]])
|
|
|
|
def get_pointer_map(t, depth=0):
|
|
if t["debug_type"] == "DW_TAG_pointer_type":
|
|
return {0: MEMORY_POINTER}
|
|
elif t["debug_type"] in ["DW_TAG_const_type", "DW_TAG_typedef", "DW_TAG_member", "DW_TAG_subrange_type", "DW_TAG_volatile_type"]:
|
|
if "name" in t and t["name"] == "mp_rom_obj_t":
|
|
return {0: MEMORY_PY_OBJECT}
|
|
return get_pointer_map(symbols_by_debug_address[t["type"]], depth+1)
|
|
elif t["debug_type"] in ["DW_TAG_base_type", "DW_TAG_enumeration_type"]:
|
|
return {}
|
|
elif t["debug_type"] == "DW_TAG_union_type":
|
|
# skip for now
|
|
return {}
|
|
elif "struct" in t:
|
|
combined_map = {}
|
|
for offset in t["struct"]:
|
|
member = t["struct"][offset]
|
|
submap = get_pointer_map(member)
|
|
for suboffset in submap:
|
|
combined_map[offset + suboffset] = submap[suboffset]
|
|
return combined_map
|
|
elif "subtype" in t:
|
|
subtype = symbols_by_debug_address[t["type"]]
|
|
pmap = get_pointer_map(subtype, depth+1)
|
|
size = get_size(subtype)
|
|
expanded_map = {}
|
|
for i in range(t["maxlen"]):
|
|
for offset in pmap:
|
|
expanded_map[size * i + offset] = pmap[offset]
|
|
return expanded_map
|
|
else:
|
|
print("no recurse", t)
|
|
pass
|
|
return {}
|
|
|
|
# Do a second pass to dereference the types
|
|
for symbol_address in symbols_by_memory_address:
|
|
symbol = symbols_by_memory_address[symbol_address]
|
|
if "type" in symbol:
|
|
if symbol["debug_type"] == "DW_TAG_variable":
|
|
symbol["pointer_map"] = get_pointer_map(symbols_by_debug_address[symbol["type"]])
|
|
type_string = []
|
|
t = symbol["type"]
|
|
offset = []
|
|
while t != None:
|
|
t_symbol = symbols_by_debug_address[t]
|
|
t = t_symbol.get("type", None)
|
|
if "name" in t_symbol:
|
|
type_string.append(t_symbol["name"])
|
|
elif t_symbol["debug_type"] == "DW_TAG_array_type":
|
|
type_string.append("[]")
|
|
elif t_symbol["debug_type"] == "DW_TAG_pointer_type":
|
|
type_string.append("*")
|
|
elif t_symbol["debug_type"] == "DW_TAG_const_type":
|
|
type_string.append("const")
|
|
elif t_symbol["debug_type"] == "DW_TAG_volatile_type":
|
|
type_string.append("volatile")
|
|
else:
|
|
#print(" ", t_symbol)
|
|
pass
|
|
type_string.reverse()
|
|
symbol["type_string"] = " ".join(type_string)
|
|
#print(symbol_name, symbol["debug_type"], symbol.get("type_string", ""))
|
|
|
|
# print()
|
|
# print()
|
|
# print(all_symbols["mp_builtin_module_table"])
|
|
# return
|
|
|
|
# Gather size and call info
|
|
text_dump = objdump("-Dz", "-j", ".text", elf_filename)
|
|
text_dump_lines = text_dump.stdout.decode("utf-8").split("\n")
|
|
section = None
|
|
symbol = None
|
|
symbol_type = None
|
|
for line in text_dump_lines[4:]:
|
|
if line.startswith("Disassembly of section"):
|
|
section = line.split()[-1].strip(":")
|
|
elif not line:
|
|
if symbol and "end_address" not in symbol:
|
|
symbol["end_address"] = last_address
|
|
symbol["size"] = last_address - symbol["start_address"]
|
|
symbol = None
|
|
continue
|
|
elif line[0].isnumeric():
|
|
symbol_address, symbol_name = line.split()
|
|
symbol_address = parse_hex(symbol_address)
|
|
symbol_name = symbol_name.strip("<>:")
|
|
if symbol_name in symbols_by_linkage_name:
|
|
linked_name = symbol_name
|
|
symbol = symbols_by_linkage_name[symbol_name]
|
|
if "name" in symbol:
|
|
non_linkage = symbol["name"]
|
|
if not non_linkage.startswith("__builtin"):
|
|
symbol_name = non_linkage
|
|
all_symbols[symbol_name] = symbol
|
|
if "name" not in symbol:
|
|
symbol["name"] = symbol_name
|
|
elif symbol_address in symbols_by_memory_address:
|
|
all_symbols[symbol_name] = symbols_by_memory_address[symbol_address]
|
|
if "name" not in all_symbols[symbol_name]:
|
|
all_symbols[symbol_name]["name"] = symbol_name
|
|
elif symbol_name not in all_symbols:
|
|
if symbol_name == "nlr_push_tail_var":
|
|
fake_type = all_symbols["mp_obj_get_type"]["type"]
|
|
symbol = {"debug_type": "DW_TAG_variable", "name": symbol_name, "type": fake_type}
|
|
else:
|
|
print(line)
|
|
print(symbol_name, symbol_address)
|
|
symbol = {"debug_type": "DW_TAG_subprogram", "name": symbol_name}
|
|
all_symbols[symbol_name] = symbol
|
|
#raise RuntimeError()
|
|
|
|
symbol = all_symbols[symbol_name]
|
|
symbol["start_address"] = symbol_address
|
|
symbols_by_memory_address[symbol_address] = symbol
|
|
symbol["section"] = section
|
|
|
|
if symbol["debug_type"] == "DW_TAG_subprogram":
|
|
symbol["outgoing_jumps"] = set()
|
|
symbol["incoming_jumps"] = set()
|
|
symbol_type = None
|
|
elif symbol["debug_type"] == "DW_TAG_variable":
|
|
symbol["outgoing_pointers"] = set()
|
|
symbol_type = symbols_by_debug_address[symbol["type"]]
|
|
all_symbols[symbol_name] = symbol
|
|
|
|
elif line[0] == " ":
|
|
parts = line.strip().split()
|
|
last_address = parse_hex(parts[0].strip(":"))
|
|
|
|
offset = last_address - symbol["start_address"]
|
|
if "pointer_map" in symbol:
|
|
if offset not in symbol["pointer_map"]:
|
|
#print(offset, symbol)
|
|
pass
|
|
else:
|
|
ref = parse_hex(parts[1])
|
|
pointer_style = symbol["pointer_map"][offset]
|
|
if pointer_style == MEMORY_POINTER:
|
|
symbol["outgoing_pointers"].add(ref & 0xfffffffe)
|
|
elif pointer_style == MEMORY_PY_OBJECT and ref & 0x3 == 0:
|
|
symbol["outgoing_pointers"].add(ref)
|
|
if len(parts[1]) == 8 and parts[1][0] == "0":
|
|
addr = parse_hex(parts[1])
|
|
if 0x2000 <= addr < 0x20000000:
|
|
if "outgoing_pointers" not in symbol:
|
|
symbol["outgoing_pointers"] = set()
|
|
symbol["outgoing_pointers"].add(addr)
|
|
elif "<" in line and symbol["debug_type"] == "DW_TAG_subprogram":
|
|
if line[-1] == ">":
|
|
jump_to = parts[-1].strip("<>").split("+")[0]
|
|
if "name" not in symbol:
|
|
print(jump_to)
|
|
print(symbol)
|
|
if jump_to != symbol["name"] and jump_to not in BAD_JUMPS:
|
|
symbol["outgoing_jumps"].add(jump_to)
|
|
#print(symbol_name, jump_to)
|
|
if jump_to == "_etext":
|
|
print(line)
|
|
elif "UNDEFINED" in line:
|
|
continue
|
|
elif parts[2] == "ldr":
|
|
continue
|
|
else:
|
|
print(line)
|
|
else:
|
|
#print(line)
|
|
pass
|
|
|
|
# print()
|
|
print(hex(min_call_site_param))
|
|
print(all_symbols["exception_table"])
|
|
# return
|
|
|
|
print("converting outgoing pointers to names")
|
|
|
|
# Convert outgoing pointers to names from addresses
|
|
for symbol_name in all_symbols:
|
|
symbol = all_symbols[symbol_name]
|
|
if "outgoing_pointers" not in symbol:
|
|
continue
|
|
converted = set()
|
|
for outgoing in symbol["outgoing_pointers"]:
|
|
if outgoing in symbols_by_memory_address:
|
|
outgoing = symbols_by_memory_address[outgoing]
|
|
#print(outgoing)
|
|
if outgoing["debug_type"] in ["DW_TAG_GNU_call_site", "DW_TAG_lexical_block"]:
|
|
continue
|
|
if outgoing["name"] == "audioio_wavefile_type":
|
|
print(outgoing)
|
|
converted.add(outgoing["name"])
|
|
symbol["outgoing_pointers"] = converted
|
|
|
|
print("linking back")
|
|
# Link back
|
|
for symbol_name in all_symbols:
|
|
symbol = all_symbols[symbol_name]
|
|
if "outgoing_jumps" in symbol:
|
|
for outgoing in symbol["outgoing_jumps"]:
|
|
if outgoing not in all_symbols:
|
|
#print(outgoing, symbol_name)
|
|
continue
|
|
#print(all_symbols[outgoing], symbol_name)
|
|
|
|
referenced_symbol = all_symbols[outgoing]
|
|
if "incoming_jumps" not in referenced_symbol:
|
|
#print(symbol_name, "->", outgoing)
|
|
referenced_symbol["incoming_jumps"] = set()
|
|
referenced_symbol["incoming_jumps"].add(symbol_name)
|
|
if "outgoing_pointers" in symbol:
|
|
for outgoing in symbol["outgoing_pointers"]:
|
|
if outgoing not in all_symbols:
|
|
#print(outgoing, symbol_name)
|
|
continue
|
|
#print(all_symbols[outgoing], symbol_name)
|
|
|
|
referenced_symbol = all_symbols[outgoing]
|
|
if "incoming_pointers" not in referenced_symbol:
|
|
#print(symbol_name, "->", outgoing)
|
|
referenced_symbol["incoming_pointers"] = set()
|
|
referenced_symbol["incoming_pointers"].add(symbol_name)
|
|
|
|
print(all_symbols["exception_table"])
|
|
|
|
# Chart it all
|
|
print("charting {} symbols".format(len(all_symbols)))
|
|
callgraph = pgv.AGraph(directed=True)
|
|
for i, symbol_name in enumerate(all_symbols):
|
|
symbol = all_symbols[symbol_name]
|
|
# print(i, symbol_name)
|
|
# if "outgoing_jumps" in symbol:
|
|
# print(" ", len(symbol["outgoing_jumps"]), "jumps")
|
|
# if "outgoing_pointers" in symbol:
|
|
# print(" ", len(symbol["outgoing_pointers"]), "ptrs")
|
|
# if i > 3000:
|
|
# break
|
|
if ("incoming_jumps" not in symbol or len(symbol["incoming_jumps"]) == 0) and ("incoming_pointers" not in symbol or len(symbol["incoming_pointers"]) == 0):
|
|
#print(symbol_name)
|
|
continue
|
|
if "start_address" not in symbol:
|
|
continue
|
|
callgraph.add_node(symbol_name)
|
|
if "outgoing_jumps" in symbol:
|
|
for outgoing in symbol["outgoing_jumps"]:
|
|
callgraph.add_edge(symbol_name, outgoing)
|
|
if "outgoing_pointers" in symbol:
|
|
for outgoing in symbol["outgoing_pointers"]:
|
|
callgraph.add_edge(symbol_name, outgoing, color="red")
|
|
#print(symbol_name, symbol)
|
|
|
|
# Style all of the nodes
|
|
print("styling")
|
|
for node in callgraph.iternodes():
|
|
if node.name not in all_symbols:
|
|
continue
|
|
symbol = all_symbols[node.name]
|
|
node.attr["shape"] = "box"
|
|
text_width_ish = len(node.name) * 0.1
|
|
if "size" not in symbol:
|
|
print(symbol)
|
|
size = symbol["size"] / 8
|
|
square_size = size ** 0.5
|
|
if text_width_ish > square_size:
|
|
w = text_width_ish
|
|
h = size / text_width_ish
|
|
else:
|
|
w = square_size
|
|
h = square_size
|
|
node.attr["width"] = w
|
|
node.attr["height"] = h
|
|
node.attr["label"] = node.name + "\r\n" + str(symbol["size"]) + " bytes"
|
|
node.attr["style"] = "filled"
|
|
|
|
incoming = 0
|
|
if "incoming_jumps" in symbol:
|
|
incoming += len(symbol["incoming_jumps"])
|
|
if "incoming_pointers" in symbol:
|
|
incoming += len(symbol["incoming_pointers"])
|
|
|
|
if node.name in SPECIAL_NODE_COLORS:
|
|
node.attr["color"] = SPECIAL_NODE_COLORS[node.name]
|
|
elif incoming == 1:
|
|
node.attr["color"] = "lightblue"
|
|
elif incoming > 25:
|
|
print("delete", node.name, "because it has {} incoming".format(incoming))
|
|
callgraph.delete_node(node.name)
|
|
elif incoming > 15:
|
|
node.attr["color"] = "red"
|
|
|
|
print("drawing")
|
|
callgraph.layout(prog="dot")
|
|
fn = "callgraph.svg"
|
|
print(fn)
|
|
callgraph.draw(fn)
|
|
|
|
if __name__ == "__main__":
|
|
do_all_the_things()
|