a64f2fdca0
Because mpy_ld.py doesn't know the target object representation, it emits instances of `MP_OBJ_NEW_QSTR(MP_QSTR_Foo)` as const string objects, rather than qstrs. However this doesn't work for map keys (e.g. for a locals dict) because the map has all_keys_are_qstrs flag is set (and also auto-complete requires the map keys to be qstrs). Instead, emit them as regular qstrs, and make a functioning MP_OBJ_NEW_QSTR function available (via `native_to_obj`, also used for e.g. making integers). Remove the code from mpy_ld.py to emit qstrs as constant strings, but leave behind the scaffold to emit constant objects in case we want to do use this in the future. Strictly this should be a .mpy sub-version bump, even though the function table isn't changing, it does lead to a change in behavior for a new .mpy running against old MicroPython. `mp_native_to_obj` will incorrectly return the qstr value directly as an `mp_obj_t`, leading to unexpected results. But given that it's broken at the moment, it seems unlikely that anyone is relying on this, so it's not work the other downsides of a sub-version bump (i.e. breaking pure-Python modules that use @native). The opposite case of running an old .mpy on new MicroPython is unchanged, and remains broken in exactly the same way. This work was funded through GitHub Sponsors. Signed-off-by: Jim Mussared <jim.mussared@gmail.com>
1070 lines
34 KiB
Python
Executable File
1070 lines
34 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
#
|
|
# This file is part of the MicroPython project, http://micropython.org/
|
|
#
|
|
# The MIT License (MIT)
|
|
#
|
|
# Copyright (c) 2019 Damien P. George
|
|
#
|
|
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
# of this software and associated documentation files (the "Software"), to deal
|
|
# in the Software without restriction, including without limitation the rights
|
|
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
# copies of the Software, and to permit persons to whom the Software is
|
|
# furnished to do so, subject to the following conditions:
|
|
#
|
|
# The above copyright notice and this permission notice shall be included in
|
|
# all copies or substantial portions of the Software.
|
|
#
|
|
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
# THE SOFTWARE.
|
|
|
|
"""
|
|
Link .o files to .mpy
|
|
"""
|
|
|
|
import sys, os, struct, re
|
|
from elftools.elf import elffile
|
|
|
|
sys.path.append(os.path.dirname(__file__) + "/../py")
|
|
import makeqstrdata as qstrutil
|
|
|
|
# MicroPython constants
|
|
MPY_VERSION = 6
|
|
MPY_SUB_VERSION = 1
|
|
MP_CODE_BYTECODE = 2
|
|
MP_CODE_NATIVE_VIPER = 4
|
|
MP_NATIVE_ARCH_X86 = 1
|
|
MP_NATIVE_ARCH_X64 = 2
|
|
MP_NATIVE_ARCH_ARMV6M = 4
|
|
MP_NATIVE_ARCH_ARMV7M = 5
|
|
MP_NATIVE_ARCH_ARMV7EMSP = 7
|
|
MP_NATIVE_ARCH_ARMV7EMDP = 8
|
|
MP_NATIVE_ARCH_XTENSA = 9
|
|
MP_NATIVE_ARCH_XTENSAWIN = 10
|
|
MP_PERSISTENT_OBJ_STR = 5
|
|
MP_SCOPE_FLAG_VIPERRELOC = 0x10
|
|
MP_SCOPE_FLAG_VIPERRODATA = 0x20
|
|
MP_SCOPE_FLAG_VIPERBSS = 0x40
|
|
MP_SMALL_INT_BITS = 31
|
|
|
|
# ELF constants
|
|
R_386_32 = 1
|
|
R_X86_64_64 = 1
|
|
R_XTENSA_32 = 1
|
|
R_386_PC32 = 2
|
|
R_X86_64_PC32 = 2
|
|
R_ARM_ABS32 = 2
|
|
R_386_GOT32 = 3
|
|
R_ARM_REL32 = 3
|
|
R_386_PLT32 = 4
|
|
R_X86_64_PLT32 = 4
|
|
R_XTENSA_PLT = 6
|
|
R_386_GOTOFF = 9
|
|
R_386_GOTPC = 10
|
|
R_ARM_THM_CALL = 10
|
|
R_XTENSA_DIFF32 = 19
|
|
R_XTENSA_SLOT0_OP = 20
|
|
R_ARM_BASE_PREL = 25 # aka R_ARM_GOTPC
|
|
R_ARM_GOT_BREL = 26 # aka R_ARM_GOT32
|
|
R_ARM_THM_JUMP24 = 30
|
|
R_X86_64_GOTPCREL = 9
|
|
R_X86_64_REX_GOTPCRELX = 42
|
|
R_386_GOT32X = 43
|
|
R_XTENSA_PDIFF32 = 59
|
|
|
|
################################################################################
|
|
# Architecture configuration
|
|
|
|
|
|
def asm_jump_x86(entry):
|
|
return struct.pack("<BI", 0xE9, entry - 5)
|
|
|
|
|
|
def asm_jump_thumb(entry):
|
|
# Only signed values that fit in 12 bits are supported
|
|
b_off = entry - 4
|
|
assert b_off >> 11 == 0 or b_off >> 11 == -1, b_off
|
|
return struct.pack("<H", 0xE000 | (b_off >> 1 & 0x07FF))
|
|
|
|
|
|
def asm_jump_thumb2(entry):
|
|
b_off = entry - 4
|
|
if b_off >> 11 == 0 or b_off >> 11 == -1:
|
|
# Signed value fits in 12 bits
|
|
b0 = 0xE000 | (b_off >> 1 & 0x07FF)
|
|
b1 = 0
|
|
else:
|
|
# Use large jump
|
|
b0 = 0xF000 | (b_off >> 12 & 0x07FF)
|
|
b1 = 0xB800 | (b_off >> 1 & 0x7FF)
|
|
return struct.pack("<HH", b0, b1)
|
|
|
|
|
|
def asm_jump_xtensa(entry):
|
|
jump_offset = entry - 4
|
|
jump_op = jump_offset << 6 | 6
|
|
return struct.pack("<BH", jump_op & 0xFF, jump_op >> 8)
|
|
|
|
|
|
class ArchData:
|
|
def __init__(self, name, mpy_feature, word_size, arch_got, asm_jump, *, separate_rodata=False):
|
|
self.name = name
|
|
self.mpy_feature = mpy_feature
|
|
self.qstr_entry_size = 2
|
|
self.word_size = word_size
|
|
self.arch_got = arch_got
|
|
self.asm_jump = asm_jump
|
|
self.separate_rodata = separate_rodata
|
|
|
|
|
|
ARCH_DATA = {
|
|
"x86": ArchData(
|
|
"EM_386",
|
|
MP_NATIVE_ARCH_X86 << 2,
|
|
4,
|
|
(R_386_PC32, R_386_GOT32, R_386_GOT32X),
|
|
asm_jump_x86,
|
|
),
|
|
"x64": ArchData(
|
|
"EM_X86_64",
|
|
MP_NATIVE_ARCH_X64 << 2,
|
|
8,
|
|
(R_X86_64_GOTPCREL, R_X86_64_REX_GOTPCRELX),
|
|
asm_jump_x86,
|
|
),
|
|
"armv6m": ArchData(
|
|
"EM_ARM",
|
|
MP_NATIVE_ARCH_ARMV6M << 2,
|
|
4,
|
|
(R_ARM_GOT_BREL,),
|
|
asm_jump_thumb,
|
|
),
|
|
"armv7m": ArchData(
|
|
"EM_ARM",
|
|
MP_NATIVE_ARCH_ARMV7M << 2,
|
|
4,
|
|
(R_ARM_GOT_BREL,),
|
|
asm_jump_thumb2,
|
|
),
|
|
"armv7emsp": ArchData(
|
|
"EM_ARM",
|
|
MP_NATIVE_ARCH_ARMV7EMSP << 2,
|
|
4,
|
|
(R_ARM_GOT_BREL,),
|
|
asm_jump_thumb2,
|
|
),
|
|
"armv7emdp": ArchData(
|
|
"EM_ARM",
|
|
MP_NATIVE_ARCH_ARMV7EMDP << 2,
|
|
4,
|
|
(R_ARM_GOT_BREL,),
|
|
asm_jump_thumb2,
|
|
),
|
|
"xtensa": ArchData(
|
|
"EM_XTENSA",
|
|
MP_NATIVE_ARCH_XTENSA << 2,
|
|
4,
|
|
(R_XTENSA_32, R_XTENSA_PLT),
|
|
asm_jump_xtensa,
|
|
),
|
|
"xtensawin": ArchData(
|
|
"EM_XTENSA",
|
|
MP_NATIVE_ARCH_XTENSAWIN << 2,
|
|
4,
|
|
(R_XTENSA_32, R_XTENSA_PLT),
|
|
asm_jump_xtensa,
|
|
separate_rodata=True,
|
|
),
|
|
}
|
|
|
|
################################################################################
|
|
# Helper functions
|
|
|
|
|
|
def align_to(value, align):
|
|
return (value + align - 1) & ~(align - 1)
|
|
|
|
|
|
def unpack_u24le(data, offset):
|
|
return data[offset] | data[offset + 1] << 8 | data[offset + 2] << 16
|
|
|
|
|
|
def pack_u24le(data, offset, value):
|
|
data[offset] = value & 0xFF
|
|
data[offset + 1] = value >> 8 & 0xFF
|
|
data[offset + 2] = value >> 16 & 0xFF
|
|
|
|
|
|
def xxd(text):
|
|
for i in range(0, len(text), 16):
|
|
print("{:08x}:".format(i), end="")
|
|
for j in range(4):
|
|
off = i + j * 4
|
|
if off < len(text):
|
|
d = int.from_bytes(text[off : off + 4], "little")
|
|
print(" {:08x}".format(d), end="")
|
|
print()
|
|
|
|
|
|
# Smaller numbers are enabled first
|
|
LOG_LEVEL_1 = 1
|
|
LOG_LEVEL_2 = 2
|
|
LOG_LEVEL_3 = 3
|
|
log_level = LOG_LEVEL_1
|
|
|
|
|
|
def log(level, msg):
|
|
if level <= log_level:
|
|
print(msg)
|
|
|
|
|
|
################################################################################
|
|
# Qstr extraction
|
|
|
|
|
|
def extract_qstrs(source_files):
|
|
def read_qstrs(f):
|
|
with open(f) as f:
|
|
vals = set()
|
|
for line in f:
|
|
for m in re.finditer(r"MP_QSTR_[A-Za-z0-9_]*", line):
|
|
vals.add(m.group())
|
|
return vals
|
|
|
|
static_qstrs = ["MP_QSTR_" + qstrutil.qstr_escape(q) for q in qstrutil.static_qstr_list]
|
|
|
|
qstr_vals = set()
|
|
for f in source_files:
|
|
vals = read_qstrs(f)
|
|
qstr_vals.update(vals)
|
|
qstr_vals.difference_update(static_qstrs)
|
|
|
|
return static_qstrs, qstr_vals
|
|
|
|
|
|
################################################################################
|
|
# Linker
|
|
|
|
|
|
class LinkError(Exception):
|
|
pass
|
|
|
|
|
|
class Section:
|
|
def __init__(self, name, data, alignment, filename=None):
|
|
self.filename = filename
|
|
self.name = name
|
|
self.data = data
|
|
self.alignment = alignment
|
|
self.addr = 0
|
|
self.reloc = []
|
|
|
|
@staticmethod
|
|
def from_elfsec(elfsec, filename):
|
|
assert elfsec.header.sh_addr == 0
|
|
return Section(elfsec.name, elfsec.data(), elfsec.data_alignment, filename)
|
|
|
|
|
|
class GOTEntry:
|
|
def __init__(self, name, sym, link_addr=0):
|
|
self.name = name
|
|
self.sym = sym
|
|
self.offset = None
|
|
self.link_addr = link_addr
|
|
|
|
def isexternal(self):
|
|
return self.sec_name.startswith(".external")
|
|
|
|
def istext(self):
|
|
return self.sec_name.startswith(".text")
|
|
|
|
def isrodata(self):
|
|
return self.sec_name.startswith((".rodata", ".data.rel.ro"))
|
|
|
|
def isbss(self):
|
|
return self.sec_name.startswith(".bss")
|
|
|
|
|
|
class LiteralEntry:
|
|
def __init__(self, value, offset):
|
|
self.value = value
|
|
self.offset = offset
|
|
|
|
|
|
class LinkEnv:
|
|
def __init__(self, arch):
|
|
self.arch = ARCH_DATA[arch]
|
|
self.sections = [] # list of sections in order of output
|
|
self.literal_sections = [] # list of literal sections (xtensa only)
|
|
self.known_syms = {} # dict of symbols that are defined
|
|
self.unresolved_syms = [] # list of unresolved symbols
|
|
self.mpy_relocs = [] # list of relocations needed in the output .mpy file
|
|
|
|
def check_arch(self, arch_name):
|
|
if arch_name != self.arch.name:
|
|
raise LinkError("incompatible arch")
|
|
|
|
def print_sections(self):
|
|
log(LOG_LEVEL_2, "sections:")
|
|
for sec in self.sections:
|
|
log(LOG_LEVEL_2, " {:08x} {} size={}".format(sec.addr, sec.name, len(sec.data)))
|
|
|
|
def find_addr(self, name):
|
|
if name in self.known_syms:
|
|
s = self.known_syms[name]
|
|
return s.section.addr + s["st_value"]
|
|
raise LinkError("unknown symbol: {}".format(name))
|
|
|
|
|
|
def build_got_generic(env):
|
|
env.got_entries = {}
|
|
for sec in env.sections:
|
|
for r in sec.reloc:
|
|
s = r.sym
|
|
if not (
|
|
s.entry["st_info"]["bind"] == "STB_GLOBAL"
|
|
and r["r_info_type"] in env.arch.arch_got
|
|
):
|
|
continue
|
|
s_type = s.entry["st_info"]["type"]
|
|
assert s_type in ("STT_NOTYPE", "STT_FUNC", "STT_OBJECT"), s_type
|
|
assert s.name
|
|
if s.name in env.got_entries:
|
|
continue
|
|
env.got_entries[s.name] = GOTEntry(s.name, s)
|
|
|
|
|
|
def build_got_xtensa(env):
|
|
env.got_entries = {}
|
|
env.lit_entries = {}
|
|
env.xt_literals = {}
|
|
|
|
# Extract the values from the literal table
|
|
for sec in env.literal_sections:
|
|
assert len(sec.data) % env.arch.word_size == 0
|
|
|
|
# Look through literal relocations to find any global pointers that should be GOT entries
|
|
for r in sec.reloc:
|
|
s = r.sym
|
|
s_type = s.entry["st_info"]["type"]
|
|
assert s_type in ("STT_NOTYPE", "STT_FUNC", "STT_OBJECT", "STT_SECTION"), s_type
|
|
assert r["r_info_type"] in env.arch.arch_got
|
|
assert r["r_offset"] % env.arch.word_size == 0
|
|
# This entry is a global pointer
|
|
existing = struct.unpack_from("<I", sec.data, r["r_offset"])[0]
|
|
if s_type == "STT_SECTION":
|
|
assert r["r_addend"] == 0
|
|
name = "{}+0x{:x}".format(s.section.name, existing)
|
|
else:
|
|
assert existing == 0
|
|
name = s.name
|
|
if r["r_addend"] != 0:
|
|
name = "{}+0x{:x}".format(name, r["r_addend"])
|
|
idx = "{}+0x{:x}".format(sec.filename, r["r_offset"])
|
|
env.xt_literals[idx] = name
|
|
if name in env.got_entries:
|
|
# Deduplicate GOT entries
|
|
continue
|
|
env.got_entries[name] = GOTEntry(name, s, existing)
|
|
|
|
# Go through all literal entries finding those that aren't global pointers so must be actual literals
|
|
for i in range(0, len(sec.data), env.arch.word_size):
|
|
idx = "{}+0x{:x}".format(sec.filename, i)
|
|
if idx not in env.xt_literals:
|
|
# This entry is an actual literal
|
|
value = struct.unpack_from("<I", sec.data, i)[0]
|
|
env.xt_literals[idx] = value
|
|
if value in env.lit_entries:
|
|
# Deduplicate literals
|
|
continue
|
|
env.lit_entries[value] = LiteralEntry(
|
|
value, len(env.lit_entries) * env.arch.word_size
|
|
)
|
|
|
|
|
|
def populate_got(env):
|
|
# Compute GOT destination addresses
|
|
for got_entry in env.got_entries.values():
|
|
sym = got_entry.sym
|
|
if hasattr(sym, "resolved"):
|
|
sym = sym.resolved
|
|
sec = sym.section
|
|
addr = sym["st_value"]
|
|
got_entry.sec_name = sec.name
|
|
got_entry.link_addr += sec.addr + addr
|
|
|
|
# Get sorted GOT, sorted by external, text, rodata, bss so relocations can be combined
|
|
got_list = sorted(
|
|
env.got_entries.values(),
|
|
key=lambda g: g.isexternal() + 2 * g.istext() + 3 * g.isrodata() + 4 * g.isbss(),
|
|
)
|
|
|
|
# Layout and populate the GOT
|
|
offset = 0
|
|
for got_entry in got_list:
|
|
got_entry.offset = offset
|
|
offset += env.arch.word_size
|
|
o = env.got_section.addr + got_entry.offset
|
|
env.full_text[o : o + env.arch.word_size] = got_entry.link_addr.to_bytes(
|
|
env.arch.word_size, "little"
|
|
)
|
|
|
|
# Create a relocation for each GOT entry
|
|
for got_entry in got_list:
|
|
if got_entry.name in ("mp_native_qstr_table", "mp_native_obj_table", "mp_fun_table"):
|
|
dest = got_entry.name
|
|
elif got_entry.name.startswith("mp_fun_table+0x"):
|
|
dest = int(got_entry.name.split("+")[1], 16) // env.arch.word_size
|
|
elif got_entry.sec_name == ".external.mp_fun_table":
|
|
dest = got_entry.sym.mp_fun_table_offset
|
|
elif got_entry.sec_name.startswith(".text"):
|
|
dest = ".text"
|
|
elif got_entry.sec_name.startswith(".rodata"):
|
|
dest = ".rodata"
|
|
elif got_entry.sec_name.startswith(".data.rel.ro"):
|
|
dest = ".data.rel.ro"
|
|
elif got_entry.sec_name.startswith(".bss"):
|
|
dest = ".bss"
|
|
else:
|
|
assert 0, (got_entry.name, got_entry.sec_name)
|
|
env.mpy_relocs.append((".text", env.got_section.addr + got_entry.offset, dest))
|
|
|
|
# Print out the final GOT
|
|
log(LOG_LEVEL_2, "GOT: {:08x}".format(env.got_section.addr))
|
|
for g in got_list:
|
|
log(
|
|
LOG_LEVEL_2,
|
|
" {:08x} {} -> {}+{:08x}".format(g.offset, g.name, g.sec_name, g.link_addr),
|
|
)
|
|
|
|
|
|
def populate_lit(env):
|
|
log(LOG_LEVEL_2, "LIT: {:08x}".format(env.lit_section.addr))
|
|
for lit_entry in env.lit_entries.values():
|
|
value = lit_entry.value
|
|
log(LOG_LEVEL_2, " {:08x} = {:08x}".format(lit_entry.offset, value))
|
|
o = env.lit_section.addr + lit_entry.offset
|
|
env.full_text[o : o + env.arch.word_size] = value.to_bytes(env.arch.word_size, "little")
|
|
|
|
|
|
def do_relocation_text(env, text_addr, r):
|
|
# Extract relevant info about symbol that's being relocated
|
|
s = r.sym
|
|
s_bind = s.entry["st_info"]["bind"]
|
|
s_type = s.entry["st_info"]["type"]
|
|
r_offset = r["r_offset"] + text_addr
|
|
r_info_type = r["r_info_type"]
|
|
try:
|
|
# only for RELA sections
|
|
r_addend = r["r_addend"]
|
|
except KeyError:
|
|
r_addend = 0
|
|
|
|
# Default relocation type and name for logging
|
|
reloc_type = "le32"
|
|
log_name = None
|
|
|
|
if (
|
|
env.arch.name == "EM_386"
|
|
and r_info_type in (R_386_PC32, R_386_PLT32)
|
|
or env.arch.name == "EM_X86_64"
|
|
and r_info_type in (R_X86_64_PC32, R_X86_64_PLT32)
|
|
or env.arch.name == "EM_ARM"
|
|
and r_info_type in (R_ARM_REL32, R_ARM_THM_CALL, R_ARM_THM_JUMP24)
|
|
or s_bind == "STB_LOCAL"
|
|
and env.arch.name == "EM_XTENSA"
|
|
and r_info_type == R_XTENSA_32 # not GOT
|
|
):
|
|
# Standard relocation to fixed location within text/rodata
|
|
if hasattr(s, "resolved"):
|
|
s = s.resolved
|
|
|
|
sec = s.section
|
|
|
|
if env.arch.separate_rodata and sec.name.startswith(".rodata"):
|
|
raise LinkError("fixed relocation to rodata with rodata referenced via GOT")
|
|
|
|
if sec.name.startswith(".bss"):
|
|
raise LinkError(
|
|
"{}: fixed relocation to bss (bss variables can't be static)".format(s.filename)
|
|
)
|
|
|
|
if sec.name.startswith(".external"):
|
|
raise LinkError(
|
|
"{}: fixed relocation to external symbol: {}".format(s.filename, s.name)
|
|
)
|
|
|
|
addr = sec.addr + s["st_value"]
|
|
reloc = addr - r_offset + r_addend
|
|
|
|
if r_info_type in (R_ARM_THM_CALL, R_ARM_THM_JUMP24):
|
|
# Both relocations have the same bit pattern to rewrite:
|
|
# R_ARM_THM_CALL: bl
|
|
# R_ARM_THM_JUMP24: b.w
|
|
reloc_type = "thumb_b"
|
|
|
|
elif (
|
|
env.arch.name == "EM_386"
|
|
and r_info_type == R_386_GOTPC
|
|
or env.arch.name == "EM_ARM"
|
|
and r_info_type == R_ARM_BASE_PREL
|
|
):
|
|
# Relocation to GOT address itself
|
|
assert s.name == "_GLOBAL_OFFSET_TABLE_"
|
|
addr = env.got_section.addr
|
|
reloc = addr - r_offset + r_addend
|
|
|
|
elif (
|
|
env.arch.name == "EM_386"
|
|
and r_info_type in (R_386_GOT32, R_386_GOT32X)
|
|
or env.arch.name == "EM_ARM"
|
|
and r_info_type == R_ARM_GOT_BREL
|
|
):
|
|
# Relcation pointing to GOT
|
|
reloc = addr = env.got_entries[s.name].offset
|
|
|
|
elif env.arch.name == "EM_X86_64" and r_info_type in (
|
|
R_X86_64_GOTPCREL,
|
|
R_X86_64_REX_GOTPCRELX,
|
|
):
|
|
# Relcation pointing to GOT
|
|
got_entry = env.got_entries[s.name]
|
|
addr = env.got_section.addr + got_entry.offset
|
|
reloc = addr - r_offset + r_addend
|
|
|
|
elif env.arch.name == "EM_386" and r_info_type == R_386_GOTOFF:
|
|
# Relocation relative to GOT
|
|
addr = s.section.addr + s["st_value"]
|
|
reloc = addr - env.got_section.addr + r_addend
|
|
|
|
elif env.arch.name == "EM_XTENSA" and r_info_type == R_XTENSA_SLOT0_OP:
|
|
# Relocation pointing to GOT, xtensa specific
|
|
sec = s.section
|
|
if sec.name.startswith(".text"):
|
|
# it looks like R_XTENSA_SLOT0_OP into .text is already correctly relocated
|
|
return
|
|
assert sec.name.startswith(".literal"), sec.name
|
|
lit_idx = "{}+0x{:x}".format(sec.filename, r_addend)
|
|
lit_ptr = env.xt_literals[lit_idx]
|
|
if isinstance(lit_ptr, str):
|
|
addr = env.got_section.addr + env.got_entries[lit_ptr].offset
|
|
log_name = "GOT {}".format(lit_ptr)
|
|
else:
|
|
addr = env.lit_section.addr + env.lit_entries[lit_ptr].offset
|
|
log_name = "LIT"
|
|
reloc = addr - r_offset
|
|
reloc_type = "xtensa_l32r"
|
|
|
|
elif env.arch.name == "EM_XTENSA" and r_info_type in (R_XTENSA_DIFF32, R_XTENSA_PDIFF32):
|
|
if s.section.name.startswith(".text"):
|
|
# it looks like R_XTENSA_[P]DIFF32 into .text is already correctly relocated
|
|
return
|
|
assert 0
|
|
|
|
else:
|
|
# Unknown/unsupported relocation
|
|
assert 0, r_info_type
|
|
|
|
# Write relocation
|
|
if reloc_type == "le32":
|
|
(existing,) = struct.unpack_from("<I", env.full_text, r_offset)
|
|
struct.pack_into("<I", env.full_text, r_offset, (existing + reloc) & 0xFFFFFFFF)
|
|
elif reloc_type == "thumb_b":
|
|
b_h, b_l = struct.unpack_from("<HH", env.full_text, r_offset)
|
|
existing = (b_h & 0x7FF) << 12 | (b_l & 0x7FF) << 1
|
|
if existing >= 0x400000: # 2's complement
|
|
existing -= 0x800000
|
|
new = existing + reloc
|
|
b_h = (b_h & 0xF800) | (new >> 12) & 0x7FF
|
|
b_l = (b_l & 0xF800) | (new >> 1) & 0x7FF
|
|
struct.pack_into("<HH", env.full_text, r_offset, b_h, b_l)
|
|
elif reloc_type == "xtensa_l32r":
|
|
l32r = unpack_u24le(env.full_text, r_offset)
|
|
assert l32r & 0xF == 1 # RI16 encoded l32r
|
|
l32r_imm16 = l32r >> 8
|
|
l32r_imm16 = (l32r_imm16 + reloc >> 2) & 0xFFFF
|
|
l32r = l32r & 0xFF | l32r_imm16 << 8
|
|
pack_u24le(env.full_text, r_offset, l32r)
|
|
else:
|
|
assert 0, reloc_type
|
|
|
|
# Log information about relocation
|
|
if log_name is None:
|
|
if s_type == "STT_SECTION":
|
|
log_name = s.section.name
|
|
else:
|
|
log_name = s.name
|
|
log(LOG_LEVEL_3, " {:08x} {} -> {:08x}".format(r_offset, log_name, addr))
|
|
|
|
|
|
def do_relocation_data(env, text_addr, r):
|
|
s = r.sym
|
|
s_type = s.entry["st_info"]["type"]
|
|
r_offset = r["r_offset"] + text_addr
|
|
r_info_type = r["r_info_type"]
|
|
try:
|
|
# only for RELA sections
|
|
r_addend = r["r_addend"]
|
|
except KeyError:
|
|
r_addend = 0
|
|
|
|
if (
|
|
env.arch.name == "EM_386"
|
|
and r_info_type == R_386_32
|
|
or env.arch.name == "EM_X86_64"
|
|
and r_info_type == R_X86_64_64
|
|
or env.arch.name == "EM_ARM"
|
|
and r_info_type == R_ARM_ABS32
|
|
or env.arch.name == "EM_XTENSA"
|
|
and r_info_type == R_XTENSA_32
|
|
):
|
|
# Relocation in data.rel.ro to internal/external symbol
|
|
if env.arch.word_size == 4:
|
|
struct_type = "<I"
|
|
elif env.arch.word_size == 8:
|
|
struct_type = "<Q"
|
|
sec = s.section
|
|
assert r_offset % env.arch.word_size == 0
|
|
addr = sec.addr + s["st_value"] + r_addend
|
|
if s_type == "STT_SECTION":
|
|
log_name = sec.name
|
|
else:
|
|
log_name = s.name
|
|
log(LOG_LEVEL_3, " {:08x} -> {} {:08x}".format(r_offset, log_name, addr))
|
|
if env.arch.separate_rodata:
|
|
data = env.full_rodata
|
|
else:
|
|
data = env.full_text
|
|
(existing,) = struct.unpack_from(struct_type, data, r_offset)
|
|
if sec.name.startswith((".text", ".rodata", ".data.rel.ro", ".bss")):
|
|
struct.pack_into(struct_type, data, r_offset, existing + addr)
|
|
kind = sec.name
|
|
elif sec.name == ".external.mp_fun_table":
|
|
assert addr == 0
|
|
kind = s.mp_fun_table_offset
|
|
else:
|
|
assert 0, sec.name
|
|
if env.arch.separate_rodata:
|
|
base = ".rodata"
|
|
else:
|
|
base = ".text"
|
|
env.mpy_relocs.append((base, r_offset, kind))
|
|
|
|
else:
|
|
# Unknown/unsupported relocation
|
|
assert 0, r_info_type
|
|
|
|
|
|
def load_object_file(env, felf):
|
|
with open(felf, "rb") as f:
|
|
elf = elffile.ELFFile(f)
|
|
env.check_arch(elf["e_machine"])
|
|
|
|
# Get symbol table
|
|
symtab = list(elf.get_section_by_name(".symtab").iter_symbols())
|
|
|
|
# Load needed sections from ELF file
|
|
sections_shndx = {} # maps elf shndx to Section object
|
|
for idx, s in enumerate(elf.iter_sections()):
|
|
if s.header.sh_type in ("SHT_PROGBITS", "SHT_NOBITS"):
|
|
if s.data_size == 0:
|
|
# Ignore empty sections
|
|
pass
|
|
elif s.name.startswith((".literal", ".text", ".rodata", ".data.rel.ro", ".bss")):
|
|
sec = Section.from_elfsec(s, felf)
|
|
sections_shndx[idx] = sec
|
|
if s.name.startswith(".literal"):
|
|
env.literal_sections.append(sec)
|
|
else:
|
|
env.sections.append(sec)
|
|
elif s.name.startswith(".data"):
|
|
raise LinkError("{}: {} non-empty".format(felf, s.name))
|
|
else:
|
|
# Ignore section
|
|
pass
|
|
elif s.header.sh_type in ("SHT_REL", "SHT_RELA"):
|
|
shndx = s.header.sh_info
|
|
if shndx in sections_shndx:
|
|
sec = sections_shndx[shndx]
|
|
sec.reloc_name = s.name
|
|
sec.reloc = list(s.iter_relocations())
|
|
for r in sec.reloc:
|
|
r.sym = symtab[r["r_info_sym"]]
|
|
|
|
# Link symbols to their sections, and update known and unresolved symbols
|
|
for sym in symtab:
|
|
sym.filename = felf
|
|
shndx = sym.entry["st_shndx"]
|
|
if shndx in sections_shndx:
|
|
# Symbol with associated section
|
|
sym.section = sections_shndx[shndx]
|
|
if sym["st_info"]["bind"] == "STB_GLOBAL":
|
|
# Defined global symbol
|
|
if sym.name in env.known_syms and not sym.name.startswith(
|
|
"__x86.get_pc_thunk."
|
|
):
|
|
raise LinkError("duplicate symbol: {}".format(sym.name))
|
|
env.known_syms[sym.name] = sym
|
|
elif sym.entry["st_shndx"] == "SHN_UNDEF" and sym["st_info"]["bind"] == "STB_GLOBAL":
|
|
# Undefined global symbol, needs resolving
|
|
env.unresolved_syms.append(sym)
|
|
|
|
|
|
def link_objects(env, native_qstr_vals_len):
|
|
# Build GOT information
|
|
if env.arch.name == "EM_XTENSA":
|
|
build_got_xtensa(env)
|
|
else:
|
|
build_got_generic(env)
|
|
|
|
# Creat GOT section
|
|
got_size = len(env.got_entries) * env.arch.word_size
|
|
env.got_section = Section("GOT", bytearray(got_size), env.arch.word_size)
|
|
if env.arch.name == "EM_XTENSA":
|
|
env.sections.insert(0, env.got_section)
|
|
else:
|
|
env.sections.append(env.got_section)
|
|
|
|
# Create optional literal section
|
|
if env.arch.name == "EM_XTENSA":
|
|
lit_size = len(env.lit_entries) * env.arch.word_size
|
|
env.lit_section = Section("LIT", bytearray(lit_size), env.arch.word_size)
|
|
env.sections.insert(1, env.lit_section)
|
|
|
|
# Create section to contain mp_native_qstr_table
|
|
env.qstr_table_section = Section(
|
|
".external.qstr_table",
|
|
bytearray(native_qstr_vals_len * env.arch.qstr_entry_size),
|
|
env.arch.qstr_entry_size,
|
|
)
|
|
|
|
# Create section to contain mp_native_obj_table
|
|
env.obj_table_section = Section(
|
|
".external.obj_table",
|
|
bytearray(0 * env.arch.word_size), # currently empty
|
|
env.arch.word_size,
|
|
)
|
|
|
|
# Resolve unknown symbols
|
|
mp_fun_table_sec = Section(".external.mp_fun_table", b"", 0)
|
|
fun_table = {
|
|
key: 67 + idx
|
|
for idx, key in enumerate(
|
|
[
|
|
"mp_type_type",
|
|
"mp_type_str",
|
|
"mp_type_list",
|
|
"mp_type_dict",
|
|
"mp_type_fun_builtin_0",
|
|
"mp_type_fun_builtin_1",
|
|
"mp_type_fun_builtin_2",
|
|
"mp_type_fun_builtin_3",
|
|
"mp_type_fun_builtin_var",
|
|
"mp_stream_read_obj",
|
|
"mp_stream_readinto_obj",
|
|
"mp_stream_unbuffered_readline_obj",
|
|
"mp_stream_write_obj",
|
|
]
|
|
)
|
|
}
|
|
for sym in env.unresolved_syms:
|
|
assert sym["st_value"] == 0
|
|
if sym.name == "_GLOBAL_OFFSET_TABLE_":
|
|
pass
|
|
elif sym.name == "mp_fun_table":
|
|
sym.section = Section(".external", b"", 0)
|
|
elif sym.name == "mp_native_qstr_table":
|
|
sym.section = env.qstr_table_section
|
|
elif sym.name == "mp_native_obj_table":
|
|
sym.section = env.obj_table_section
|
|
elif sym.name in env.known_syms:
|
|
sym.resolved = env.known_syms[sym.name]
|
|
else:
|
|
if sym.name in fun_table:
|
|
sym.section = mp_fun_table_sec
|
|
sym.mp_fun_table_offset = fun_table[sym.name]
|
|
else:
|
|
raise LinkError("{}: undefined symbol: {}".format(sym.filename, sym.name))
|
|
|
|
# Align sections, assign their addresses, and create full_text
|
|
env.full_text = bytearray(env.arch.asm_jump(8)) # dummy, to be filled in later
|
|
env.full_rodata = bytearray(0)
|
|
env.full_bss = bytearray(0)
|
|
for sec in env.sections:
|
|
if env.arch.separate_rodata and sec.name.startswith((".rodata", ".data.rel.ro")):
|
|
data = env.full_rodata
|
|
elif sec.name.startswith(".bss"):
|
|
data = env.full_bss
|
|
else:
|
|
data = env.full_text
|
|
sec.addr = align_to(len(data), sec.alignment)
|
|
data.extend(b"\x00" * (sec.addr - len(data)))
|
|
data.extend(sec.data)
|
|
|
|
env.print_sections()
|
|
|
|
populate_got(env)
|
|
if env.arch.name == "EM_XTENSA":
|
|
populate_lit(env)
|
|
|
|
# Fill in relocations
|
|
for sec in env.sections:
|
|
if not sec.reloc:
|
|
continue
|
|
log(
|
|
LOG_LEVEL_3,
|
|
"{}: {} relocations via {}:".format(sec.filename, sec.name, sec.reloc_name),
|
|
)
|
|
for r in sec.reloc:
|
|
if sec.name.startswith((".text", ".rodata")):
|
|
do_relocation_text(env, sec.addr, r)
|
|
elif sec.name.startswith(".data.rel.ro"):
|
|
do_relocation_data(env, sec.addr, r)
|
|
else:
|
|
assert 0, sec.name
|
|
|
|
|
|
################################################################################
|
|
# .mpy output
|
|
|
|
|
|
class MPYOutput:
|
|
def open(self, fname):
|
|
self.f = open(fname, "wb")
|
|
self.prev_base = -1
|
|
self.prev_offset = -1
|
|
|
|
def close(self):
|
|
self.f.close()
|
|
|
|
def write_bytes(self, buf):
|
|
self.f.write(buf)
|
|
|
|
def write_uint(self, val):
|
|
b = bytearray()
|
|
b.insert(0, val & 0x7F)
|
|
val >>= 7
|
|
while val:
|
|
b.insert(0, 0x80 | (val & 0x7F))
|
|
val >>= 7
|
|
self.write_bytes(b)
|
|
|
|
def write_qstr(self, s):
|
|
if s in qstrutil.static_qstr_list:
|
|
self.write_uint((qstrutil.static_qstr_list.index(s) + 1) << 1 | 1)
|
|
else:
|
|
s = bytes(s, "ascii")
|
|
self.write_uint(len(s) << 1)
|
|
self.write_bytes(s)
|
|
self.write_bytes(b"\x00")
|
|
|
|
def write_reloc(self, base, offset, dest, n):
|
|
need_offset = not (base == self.prev_base and offset == self.prev_offset + 1)
|
|
self.prev_offset = offset + n - 1
|
|
if dest <= 2:
|
|
dest = (dest << 1) | (n > 1)
|
|
else:
|
|
assert 6 <= dest <= 127
|
|
assert n == 1
|
|
dest = dest << 1 | need_offset
|
|
assert 0 <= dest <= 0xFE, dest
|
|
self.write_bytes(bytes([dest]))
|
|
if need_offset:
|
|
if base == ".text":
|
|
base = 0
|
|
elif base == ".rodata":
|
|
base = 1
|
|
self.write_uint(offset << 1 | base)
|
|
if n > 1:
|
|
self.write_uint(n)
|
|
|
|
|
|
def build_mpy(env, entry_offset, fmpy, native_qstr_vals):
|
|
# Write jump instruction to start of text
|
|
jump = env.arch.asm_jump(entry_offset)
|
|
env.full_text[: len(jump)] = jump
|
|
|
|
log(LOG_LEVEL_1, "arch: {}".format(env.arch.name))
|
|
log(LOG_LEVEL_1, "text size: {}".format(len(env.full_text)))
|
|
if len(env.full_rodata):
|
|
log(LOG_LEVEL_1, "rodata size: {}".format(len(env.full_rodata)))
|
|
log(LOG_LEVEL_1, "bss size: {}".format(len(env.full_bss)))
|
|
log(LOG_LEVEL_1, "GOT entries: {}".format(len(env.got_entries)))
|
|
|
|
# xxd(env.full_text)
|
|
|
|
out = MPYOutput()
|
|
out.open(fmpy)
|
|
|
|
# MPY: header
|
|
out.write_bytes(
|
|
bytearray(
|
|
[ord("M"), MPY_VERSION, env.arch.mpy_feature | MPY_SUB_VERSION, MP_SMALL_INT_BITS]
|
|
)
|
|
)
|
|
|
|
# MPY: n_qstr
|
|
out.write_uint(1 + len(native_qstr_vals))
|
|
|
|
# MPY: n_obj
|
|
out.write_uint(0)
|
|
|
|
# MPY: qstr table
|
|
out.write_qstr(fmpy) # filename
|
|
for q in native_qstr_vals:
|
|
out.write_qstr(q)
|
|
|
|
# MPY: object table
|
|
# <empty>
|
|
|
|
# MPY: kind/len
|
|
out.write_uint(len(env.full_text) << 3 | (MP_CODE_NATIVE_VIPER - MP_CODE_BYTECODE))
|
|
|
|
# MPY: machine code
|
|
out.write_bytes(env.full_text)
|
|
|
|
# MPY: scope_flags
|
|
scope_flags = MP_SCOPE_FLAG_VIPERRELOC
|
|
if len(env.full_rodata):
|
|
scope_flags |= MP_SCOPE_FLAG_VIPERRODATA
|
|
if len(env.full_bss):
|
|
scope_flags |= MP_SCOPE_FLAG_VIPERBSS
|
|
out.write_uint(scope_flags)
|
|
|
|
# MPY: bss and/or rodata
|
|
if len(env.full_rodata):
|
|
rodata_const_table_idx = 1
|
|
out.write_uint(len(env.full_rodata))
|
|
if len(env.full_bss):
|
|
bss_const_table_idx = 2
|
|
out.write_uint(len(env.full_bss))
|
|
if len(env.full_rodata):
|
|
out.write_bytes(env.full_rodata)
|
|
|
|
# MPY: relocation information
|
|
# See py/persistentcode.c:mp_native_relocate for meaning of the `kind` integer values.
|
|
prev_kind = None
|
|
prev_base = None
|
|
prev_offset = None
|
|
prev_n = None
|
|
for base, addr, kind in env.mpy_relocs:
|
|
if isinstance(kind, str) and kind.startswith(".text"):
|
|
kind = 0
|
|
elif isinstance(kind, str) and kind.startswith((".rodata", ".data.rel.ro")):
|
|
if env.arch.separate_rodata:
|
|
kind = rodata_const_table_idx
|
|
else:
|
|
kind = 0
|
|
elif isinstance(kind, str) and kind.startswith(".bss"):
|
|
kind = bss_const_table_idx
|
|
elif kind == "mp_native_qstr_table":
|
|
kind = 6
|
|
elif kind == "mp_native_obj_table":
|
|
kind = 7
|
|
elif kind == "mp_fun_table":
|
|
kind = 8
|
|
else:
|
|
kind = 9 + kind
|
|
assert addr % env.arch.word_size == 0, addr
|
|
offset = addr // env.arch.word_size
|
|
if kind == prev_kind and base == prev_base and offset == prev_offset + 1:
|
|
prev_n += 1
|
|
prev_offset += 1
|
|
else:
|
|
if prev_kind is not None:
|
|
out.write_reloc(prev_base, prev_offset - prev_n + 1, prev_kind, prev_n)
|
|
prev_kind = kind
|
|
prev_base = base
|
|
prev_offset = offset
|
|
prev_n = 1
|
|
if prev_kind is not None:
|
|
out.write_reloc(prev_base, prev_offset - prev_n + 1, prev_kind, prev_n)
|
|
|
|
# MPY: sentinel for end of relocations
|
|
out.write_bytes(b"\xff")
|
|
|
|
out.close()
|
|
|
|
|
|
################################################################################
|
|
# main
|
|
|
|
|
|
def do_preprocess(args):
|
|
if args.output is None:
|
|
assert args.files[0].endswith(".c")
|
|
args.output = args.files[0][:-1] + "config.h"
|
|
static_qstrs, qstr_vals = extract_qstrs(args.files)
|
|
with open(args.output, "w") as f:
|
|
print(
|
|
"#include <stdint.h>\n"
|
|
"typedef uintptr_t mp_uint_t;\n"
|
|
"typedef intptr_t mp_int_t;\n"
|
|
"typedef uintptr_t mp_off_t;",
|
|
file=f,
|
|
)
|
|
for i, q in enumerate(static_qstrs):
|
|
print("#define %s (%u)" % (q, i + 1), file=f)
|
|
for i, q in enumerate(sorted(qstr_vals)):
|
|
print("#define %s (mp_native_qstr_table[%d])" % (q, i + 1), file=f)
|
|
print("extern const uint16_t mp_native_qstr_table[];", file=f)
|
|
print("extern const mp_uint_t mp_native_obj_table[];", file=f)
|
|
|
|
|
|
def do_link(args):
|
|
if args.output is None:
|
|
assert args.files[0].endswith(".o")
|
|
args.output = args.files[0][:-1] + "mpy"
|
|
native_qstr_vals = []
|
|
if args.qstrs is not None:
|
|
with open(args.qstrs) as f:
|
|
for l in f:
|
|
m = re.match(r"#define MP_QSTR_([A-Za-z0-9_]*) \(mp_native_", l)
|
|
if m:
|
|
native_qstr_vals.append(m.group(1))
|
|
log(LOG_LEVEL_2, "qstr vals: " + ", ".join(native_qstr_vals))
|
|
env = LinkEnv(args.arch)
|
|
try:
|
|
for file in args.files:
|
|
load_object_file(env, file)
|
|
link_objects(env, len(native_qstr_vals))
|
|
build_mpy(env, env.find_addr("mpy_init"), args.output, native_qstr_vals)
|
|
except LinkError as er:
|
|
print("LinkError:", er.args[0])
|
|
sys.exit(1)
|
|
|
|
|
|
def main():
|
|
import argparse
|
|
|
|
cmd_parser = argparse.ArgumentParser(description="Run scripts on the pyboard.")
|
|
cmd_parser.add_argument(
|
|
"--verbose", "-v", action="count", default=1, help="increase verbosity"
|
|
)
|
|
cmd_parser.add_argument("--arch", default="x64", help="architecture")
|
|
cmd_parser.add_argument("--preprocess", action="store_true", help="preprocess source files")
|
|
cmd_parser.add_argument("--qstrs", default=None, help="file defining additional qstrs")
|
|
cmd_parser.add_argument(
|
|
"--output", "-o", default=None, help="output .mpy file (default to input with .o->.mpy)"
|
|
)
|
|
cmd_parser.add_argument("files", nargs="+", help="input files")
|
|
args = cmd_parser.parse_args()
|
|
|
|
global log_level
|
|
log_level = args.verbose
|
|
|
|
if args.preprocess:
|
|
do_preprocess(args)
|
|
else:
|
|
do_link(args)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|