From 5bb8a7a7c68781e6edbd3aebaf7eeeecb310bd86 Mon Sep 17 00:00:00 2001 From: Scott Shawcroft Date: Tue, 28 Feb 2023 15:07:35 -0800 Subject: [PATCH] Improve iMX RT performance * Enable dcache for OCRAM where the VM heap lives. * Add CIRCUITPY_SWO_TRACE for pushing program counters out over the SWO pin via the ITM module in the CPU. Exempt some functions from instrumentation to reduce traffic and allow inlining. * Place more functions in ITCM to handle errors using code in RAM-only and speed up CP. * Use SET and CLEAR registers for digitalio. The SDK does read, mask and write. * Switch to 2MiB reserved for CircuitPython code. Up from 1MiB. * Run USB interrupts during flash erase and write. * Allow storage writes from CP if the USB drive is disabled. * Get perf bench tests running on CircuitPython and increase timeouts so it works when instrumentation is active. --- ports/atmel-samd/Makefile | 2 +- ports/mimxrt10xx/Makefile | 15 +- ports/mimxrt10xx/background.c | 7 +- .../boards/imxrt1010_evk/mpconfigboard.h | 5 + ports/mimxrt10xx/boards/metro_m7_1011/board.c | 15 ++ .../boards/metro_m7_1011/flash_config.c | 4 +- .../common-hal/digitalio/DigitalInOut.c | 7 +- .../common-hal/microcontroller/__init__.c | 7 +- .../common-hal/neopixel_write/__init__.c | 8 +- ports/mimxrt10xx/linking/common.ld | 76 ++++++-- ports/mimxrt10xx/mpconfigport.mk | 2 + ports/mimxrt10xx/reset.c | 3 +- .../supervisor/flexspi_nor_flash_ops.c | 16 +- ports/mimxrt10xx/supervisor/internal_flash.c | 40 ++-- ports/mimxrt10xx/supervisor/internal_flash.h | 1 + ports/mimxrt10xx/supervisor/port.c | 172 ++++++++++++++---- ports/mimxrt10xx/supervisor/serial.c | 90 --------- ports/mimxrt10xx/supervisor/usb.c | 6 +- py/argcheck.c | 2 +- py/bc.c | 2 +- py/circuitpy_mpconfig.h | 12 ++ py/circuitpy_mpconfig.mk | 4 + py/gc.c | 8 +- py/mpconfig.h | 17 ++ py/obj.h | 74 ++++---- py/objdict.c | 4 +- py/objfun.c | 14 +- py/objproperty.c | 2 +- py/pystack.c | 2 +- py/pystack.h | 26 +-- py/qstr.c | 2 +- py/runtime.c | 61 ++++--- py/runtime.h | 12 +- py/stackctrl.c | 4 +- shared-bindings/digitalio/DigitalInOut.c | 2 +- shared-module/storage/__init__.c | 1 + supervisor/linker.h | 3 +- supervisor/shared/background_callback.c | 6 +- supervisor/shared/filesystem.c | 3 +- supervisor/shared/flash.c | 2 +- supervisor/shared/safe_mode.c | 5 +- supervisor/shared/status_bar.c | 4 +- supervisor/shared/translate/translate_impl.h | 4 +- supervisor/shared/usb/usb.c | 4 +- tests/perf_bench/benchrun.py | 2 +- tools/cortex-m-fault-gdb.py | 106 +++++++++++ tools/cpboard.py | 26 ++- tools/gen_display_resources.py | 10 +- tools/swo_function_trace.py | 143 +++++++++++++++ tools/swo_viewer.py | 67 +++++++ 50 files changed, 809 insertions(+), 301 deletions(-) delete mode 100644 ports/mimxrt10xx/supervisor/serial.c create mode 100644 tools/cortex-m-fault-gdb.py create mode 100644 tools/swo_function_trace.py create mode 100644 tools/swo_viewer.py diff --git a/ports/atmel-samd/Makefile b/ports/atmel-samd/Makefile index 0ac326d3cc..5a4e5111da 100644 --- a/ports/atmel-samd/Makefile +++ b/ports/atmel-samd/Makefile @@ -366,7 +366,7 @@ all: $(BUILD)/firmware.bin $(BUILD)/firmware.uf2 $(BUILD)/firmware.elf: $(OBJ) $(GENERATED_LD_FILE) $(STEPECHO) "LINK $@" $(Q)echo $(OBJ) > $(BUILD)/firmware.objs - $(Q)$(CC) -o $@ $(LDFLAGS) @$(BUILD)/firmware.objs -Wl,--start-group $(LIBS) -Wl,--end-group + $(Q)$(CC) -o $@ $(LDFLAGS) @$(BUILD)/firmware.objs -Wl,--print-memory-usage -Wl,--start-group $(LIBS) -Wl,--end-group $(Q)$(SIZE) $@ | $(PYTHON) $(TOP)/tools/build_memory_info.py $(GENERATED_LD_FILE) $(BUILD) $(BUILD)/firmware.bin: $(BUILD)/firmware.elf diff --git a/ports/mimxrt10xx/Makefile b/ports/mimxrt10xx/Makefile index b38f120ee2..c561b894d3 100644 --- a/ports/mimxrt10xx/Makefile +++ b/ports/mimxrt10xx/Makefile @@ -49,7 +49,12 @@ INC += \ CFLAGS += -ftree-vrp -DNDEBUG # TinyUSB defines -CFLAGS += -DCFG_TUSB_MCU=OPT_MCU_MIMXRT10XX -DCFG_TUD_MIDI_RX_BUFSIZE=512 -DCFG_TUD_CDC_RX_BUFSIZE=512 -DCFG_TUD_MIDI_TX_BUFSIZE=512 -DCFG_TUD_CDC_TX_BUFSIZE=512 -DCFG_TUD_MSC_BUFSIZE=1024 +CFLAGS += -DCFG_TUSB_MCU=OPT_MCU_MIMXRT10XX -DCFG_TUD_CDC_RX_BUFSIZE=512 -DCFG_TUD_CDC_TX_BUFSIZE=512 +ifeq ($(CHIP_FAMILY), MIMXRT1011) +CFLAGS += -DCFG_TUD_MIDI_RX_BUFSIZE=64 -DCFG_TUD_MIDI_TX_BUFSIZE=64 -DCFG_TUD_MSC_BUFSIZE=512 +else +CFLAGS += -DCFG_TUD_MIDI_RX_BUFSIZE=512 -DCFG_TUD_MIDI_TX_BUFSIZE=512 -DCFG_TUD_MSC_BUFSIZE=1024 +endif #Debugging/Optimization # Never set -fno-inline because we use inline to move small functions into routines that must be @@ -76,11 +81,15 @@ CFLAGS += \ -g3 -Wno-unused-parameter \ -ffunction-sections -fdata-sections -fstack-usage -OPTIMIZATION_FLAGS ?= -O2 -fno-inline-functions +OPTIMIZATION_FLAGS ?= -O2 # option to override compiler optimization level, set in boards/$(BOARD)/mpconfigboard.mk CFLAGS += $(OPTIMIZATION_FLAGS) +ifeq ($(CIRCUITPY_SWO_TRACE), 1) + CFLAGS += -finstrument-functions -finstrument-functions-exclude-file-list=tinyusb -finstrument-functions-exclude-function-list='USB_OTG1_IRQHandler,usb_irq_handler,nlr_push,CLOCK_EnableClock,CLOCK_SetDiv,CLOCK_SetMux,__DMB,__ISB,__DSB,SCB_EnableICache,SCB_EnableDCache,ARM_MPU_Disable,ARM_MPU_Enable,SCB_DisableDCache,SCB_DisableICache,__enable_irq,__disable_irq,__set_MSP,port_get_raw_ticks,supervisor_ticks_ms64' +endif + LD_FILES = $(wildcard boards/$(BOARD)/*.ld) $(addprefix linking/, flash/$(FLASH).ld chip_family/$(CHIP_FAMILY).ld common.ld) LD_SCRIPT_FLAG := -Wl,-T, @@ -171,7 +180,7 @@ all: $(BUILD)/firmware.bin $(BUILD)/firmware.uf2 $(BUILD)/firmware.hex $(BUILD)/firmware.elf: $(OBJ) $(LD_FILES) $(STEPECHO) "LINK $@" - $(Q)$(CC) -o $@ $(LDFLAGS) $(filter-out %.ld, $^) -Wl,--start-group $(LIBS) -Wl,--end-group + $(Q)$(CC) -o $@ $(LDFLAGS) $(filter-out %.ld, $^) -Wl,--print-memory-usage -Wl,--start-group $(LIBS) -Wl,--end-group $(BUILD)/firmware.bin: $(BUILD)/firmware.elf $(STEPECHO) "Create $@" diff --git a/ports/mimxrt10xx/background.c b/ports/mimxrt10xx/background.c index 5815c222b4..529f791a39 100644 --- a/ports/mimxrt10xx/background.c +++ b/ports/mimxrt10xx/background.c @@ -27,7 +27,11 @@ #include "supervisor/port.h" -void port_background_task(void) { +#include "supervisor/linker.h" + +#include "fsl_common.h" + +void PLACE_IN_ITCM(port_background_task)(void) { } void port_background_tick(void) { @@ -38,5 +42,6 @@ void port_background_tick(void) { void port_start_background_task(void) { } + void port_finish_background_task(void) { } diff --git a/ports/mimxrt10xx/boards/imxrt1010_evk/mpconfigboard.h b/ports/mimxrt10xx/boards/imxrt1010_evk/mpconfigboard.h index 77d458d75b..192c265f88 100644 --- a/ports/mimxrt10xx/boards/imxrt1010_evk/mpconfigboard.h +++ b/ports/mimxrt10xx/boards/imxrt1010_evk/mpconfigboard.h @@ -16,3 +16,8 @@ #define DEFAULT_UART_BUS_RX (&pin_GPIO_09) #define DEFAULT_UART_BUS_TX (&pin_GPIO_10) + +#define CIRCUITPY_CONSOLE_UART_RX (&pin_GPIO_09) +#define CIRCUITPY_CONSOLE_UART_TX (&pin_GPIO_10) + +#define MICROPY_HW_LED_STATUS (&pin_GPIO_11) diff --git a/ports/mimxrt10xx/boards/metro_m7_1011/board.c b/ports/mimxrt10xx/boards/metro_m7_1011/board.c index 27cbd3eb96..62be2303a5 100644 --- a/ports/mimxrt10xx/boards/metro_m7_1011/board.c +++ b/ports/mimxrt10xx/boards/metro_m7_1011/board.c @@ -47,3 +47,18 @@ const mcu_pin_obj_t *mimxrt10xx_reset_forbidden_pins[] = { }; // Use the MP_WEAK supervisor/shared/board.c versions of routines not defined here. + +bool mimxrt10xx_board_reset_pin_number(const mcu_pin_obj_t *pin) { + #if CIRCUITPY_SWO_TRACE + if (pin == &pin_GPIO_AD_09) { + IOMUXC_SetPinMux( /* Add these lines*/ + IOMUXC_GPIO_AD_09_ARM_TRACE_SWO, + 0U); + IOMUXC_SetPinConfig( /* Add these lines*/ + IOMUXC_GPIO_AD_09_ARM_TRACE_SWO, + 0x00F9U); + return true; + } + #endif + return false; +} diff --git a/ports/mimxrt10xx/boards/metro_m7_1011/flash_config.c b/ports/mimxrt10xx/boards/metro_m7_1011/flash_config.c index b2894d7669..3477de9e34 100644 --- a/ports/mimxrt10xx/boards/metro_m7_1011/flash_config.c +++ b/ports/mimxrt10xx/boards/metro_m7_1011/flash_config.c @@ -10,7 +10,7 @@ #include "fsl_flexspi_nor_boot.h" -__attribute__((section(".boot_hdr.ivt"))) +__attribute__((section(".boot_hdr.ivt"),used)) /************************************* * IVT Data *************************************/ @@ -25,7 +25,7 @@ const ivt image_vector_table = { IVT_RSVD /* Reserved = 0 */ }; -__attribute__((section(".boot_hdr.boot_data"))) +__attribute__((section(".boot_hdr.boot_data"),used)) /************************************* * Boot Data *************************************/ diff --git a/ports/mimxrt10xx/common-hal/digitalio/DigitalInOut.c b/ports/mimxrt10xx/common-hal/digitalio/DigitalInOut.c index 7639204bc6..d800de15de 100644 --- a/ports/mimxrt10xx/common-hal/digitalio/DigitalInOut.c +++ b/ports/mimxrt10xx/common-hal/digitalio/DigitalInOut.c @@ -118,7 +118,12 @@ digitalio_direction_t common_hal_digitalio_digitalinout_get_direction( void common_hal_digitalio_digitalinout_set_value( digitalio_digitalinout_obj_t *self, bool value) { - GPIO_PinWrite(self->pin->gpio, self->pin->number, value); + GPIO_Type *gpio = self->pin->gpio; + if (value) { + gpio->DR_SET = 1 << self->pin->number; + } else { + gpio->DR_CLEAR = 1 << self->pin->number; + } } bool common_hal_digitalio_digitalinout_get_value( diff --git a/ports/mimxrt10xx/common-hal/microcontroller/__init__.c b/ports/mimxrt10xx/common-hal/microcontroller/__init__.c index 5388f404de..1b10b4d4ee 100644 --- a/ports/mimxrt10xx/common-hal/microcontroller/__init__.c +++ b/ports/mimxrt10xx/common-hal/microcontroller/__init__.c @@ -36,6 +36,7 @@ #include "shared-bindings/microcontroller/__init__.h" #include "shared-bindings/microcontroller/Pin.h" #include "shared-bindings/microcontroller/Processor.h" +#include "supervisor/linker.h" #include "supervisor/shared/safe_mode.h" #include "supervisor/shared/translate/translate.h" @@ -43,14 +44,14 @@ void common_hal_mcu_delay_us(uint32_t delay) { mp_hal_delay_us(delay); } -volatile uint32_t nesting_count = 0; -void common_hal_mcu_disable_interrupts(void) { +volatile uint32_t PLACE_IN_DTCM_BSS(nesting_count) = 0; +void PLACE_IN_ITCM(common_hal_mcu_disable_interrupts)(void) { __disable_irq(); __DMB(); nesting_count++; } -void common_hal_mcu_enable_interrupts(void) { +void PLACE_IN_ITCM(common_hal_mcu_enable_interrupts)(void) { if (nesting_count == 0) { // This is very very bad because it means there was mismatched disable/enables reset_into_safe_mode(SAFE_MODE_INTERRUPT_ERROR); diff --git a/ports/mimxrt10xx/common-hal/neopixel_write/__init__.c b/ports/mimxrt10xx/common-hal/neopixel_write/__init__.c index 88d0453065..933d8aa444 100644 --- a/ports/mimxrt10xx/common-hal/neopixel_write/__init__.c +++ b/ports/mimxrt10xx/common-hal/neopixel_write/__init__.c @@ -64,9 +64,7 @@ void PLACE_IN_ITCM(common_hal_neopixel_write)(const digitalio_digitalinout_obj_t const uint32_t pin = digitalinout->pin->number; __disable_irq(); - // Enable DWT in debug core. Useable when interrupts disabled, as opposed to Systick->VAL - CoreDebug->DEMCR |= CoreDebug_DEMCR_TRCENA_Msk; - DWT->CTRL |= DWT_CTRL_CYCCNTENA_Msk; + // Use DWT in debug core. Useable when interrupts disabled, as opposed to Systick->VAL DWT->CYCCNT = 0; for (;;) { @@ -88,12 +86,12 @@ void PLACE_IN_ITCM(common_hal_neopixel_write)(const digitalio_digitalinout_obj_t mask = 0x80; } } + // Enable interrupts again + __enable_irq(); // Update the next start. next_start_raw_ticks = port_get_raw_ticks(NULL) + 4; - // Enable interrupts again - __enable_irq(); } #pragma GCC pop_options diff --git a/ports/mimxrt10xx/linking/common.ld b/ports/mimxrt10xx/linking/common.ld index b6f1acc8d0..61a9a2299f 100644 --- a/ports/mimxrt10xx/linking/common.ld +++ b/ports/mimxrt10xx/linking/common.ld @@ -6,7 +6,7 @@ Boards can setup reserved flash with _ld_reserved_flash_size in board.ld. */ ENTRY(Reset_Handler) -code_size = 1M; +code_size = 2M; _ld_default_stack_size = 20K; /* Default reserved flash to nothing. */ @@ -22,9 +22,9 @@ MEMORY FLASH_IVT (rx) : ORIGIN = 0x60001000, LENGTH = 4K /* Place the ISRs 48k in to leave room for the bootloader when it is available. */ FLASH_FIRMWARE (rx) : ORIGIN = 0x6000C000, LENGTH = code_size - 48K - FLASH_FATFS (r) : ORIGIN = 0x60100000, LENGTH = _ld_flash_size - code_size - _ld_reserved_flash_size + FLASH_FATFS (r) : ORIGIN = 0x60000000 + code_size, LENGTH = _ld_flash_size - code_size - _ld_reserved_flash_size /* Teensy uses the last bit of flash for recovery. */ - RESERVED_FLASH : ORIGIN = 0x60100000 + _ld_flash_size - _ld_reserved_flash_size, LENGTH = _ld_reserved_flash_size + RESERVED_FLASH : ORIGIN = 0x60000000 + code_size + _ld_flash_size - _ld_reserved_flash_size, LENGTH = _ld_reserved_flash_size OCRAM (rwx) : ORIGIN = 0x20200000, LENGTH = ram_size - 64K DTCM (x) : ORIGIN = 0x20000000, LENGTH = 32K ITCM (x) : ORIGIN = 0x00000000, LENGTH = 32K @@ -55,21 +55,42 @@ SECTIONS .text : { . = ALIGN(4); - __VECTOR_TABLE = .; - __VECTOR_RAM = .; - _ld_isr_table = .; - - KEEP(*(.isr_vector)) /* Startup code */ *(EXCLUDE_FILE( *fsl_flexspi.o + *dcd_ci_hs.o + *tusb_fifo.o + *usbd.o + *string0.o + *py/nlr*.o + *py/obj.o + *py/gc.o + *py/map.o + *py/runtime.o + *py/objboundmeth.o + *py/objtype.o ) .text*) /* .text* sections (code) */ - *(.rodata*) /* .rodata* sections (constants, strings, etc.) */ + + /* Keep USB processing functions out of RAM because we don't know which will be used. + We try to only keep USB interrupt related functions. */ + *dcd_ci_hs.o(.text.process_*_request .text.dcd_edpt* .text.dcd_init .text.dcd_set_address) + *usbd.o(.text.process_*_request .text.process_[gs]et* .text.tud_* .text.usbd_* .text.configuration_reset .text.invoke_*) + + /* Anything marked cold/unlikely should be in flash. */ + *(.text.unlikely.*) + + *(EXCLUDE_FILE( + *dcd_ci_hs.o + *py/objboundmeth.o + *py/objtype.o + ) .rodata*) /* .rodata* sections (constants, strings, etc.) */ . = ALIGN(4); } > FLASH_FIRMWARE .ARM.exidx : { + __exidx_start = .; *(.ARM.exidx*) + __exidx_end = .; *(.gnu.linkonce.armexidx.*) _etext = .; /* define a global symbol at end of code */ __etext = .; /* define a global symbol at end of code */ @@ -81,7 +102,6 @@ SECTIONS { . = ALIGN(4); *(.data*) /* .data* sections */ - *fsl_flexspi.o(.text*) . = ALIGN(4); } > OCRAM AT> FLASH_FIRMWARE _ld_ocram_data_destination = ADDR(.data); @@ -93,7 +113,7 @@ SECTIONS { . = ALIGN(4); - *(.bss*) + *(SORT_BY_ALIGNMENT(SORT_BY_NAME(.bss*))) *(COMMON) . = ALIGN(4); @@ -103,22 +123,50 @@ SECTIONS _ld_heap_start = _ld_ocram_bss_start + _ld_ocram_bss_size; _ld_heap_end = ORIGIN(OCRAM) + LENGTH(OCRAM); - .itcm : + + .itcm : ALIGN(4) { . = ALIGN(4); *(.itcm.*) - + *fsl_flexspi.o(.text*) + *dcd_ci_hs.o(.text*) + *tusb_fifo.o(.text*) + *py/objboundmeth.o(.text*) + *py/objtype.o(.text*) + *py/obj.o(.text*) + *py/gc.o(.text*) + *py/map.o(.text*) + *py/nlr*.o(.text*) + *py/runtime.o(.text*) + *(.text.process_*_isr .text.dcd_event_* .text.osal_queue*) + *string0.o(.text*) . = ALIGN(4); } > ITCM AT> FLASH_FIRMWARE _ld_itcm_destination = ADDR(.itcm); _ld_itcm_flash_copy = LOADADDR(.itcm); _ld_itcm_size = SIZEOF(.itcm); + /* Align for 256 ISR entries */ + .isr_vector : ALIGN(4 * 256) + { + . = ALIGN(4); + KEEP(*(.isr_vector)) /* Startup code */ + . = ALIGN(4); + } > ITCM AT> FLASH_FIRMWARE + _ld_isr_destination = ADDR(.isr_vector); + _ld_isr_flash_copy = LOADADDR(.isr_vector); + _ld_isr_size = SIZEOF(.isr_vector); + /* Used by the bootloader to start user code. */ + __VECTOR_TABLE = LOADADDR(.isr_vector); + .dtcm_data : { . = ALIGN(4); *(.dtcm_data.*) + *dcd_ci_hs.o(.rodata*) + *py/objboundmeth.o(.rodata*) + *py/objtype.o(.rodata*) . = ALIGN(4); } > DTCM AT> FLASH_FIRMWARE @@ -139,7 +187,7 @@ SECTIONS _ld_dtcm_bss_start = ADDR(.dtcm_bss); _ld_dtcm_bss_size = SIZEOF(.dtcm_bss); - .stack : + .stack (NOLOAD) : { . = ALIGN(8); _ld_stack_bottom = .; diff --git a/ports/mimxrt10xx/mpconfigport.mk b/ports/mimxrt10xx/mpconfigport.mk index cee2d9a698..236d5b9966 100644 --- a/ports/mimxrt10xx/mpconfigport.mk +++ b/ports/mimxrt10xx/mpconfigport.mk @@ -6,6 +6,8 @@ USB_HIGHSPEED = 1 # Number of USB endpoint pairs. USB_NUM_ENDPOINT_PAIRS = 8 +# Align buffers on the cache boundary so we don't inadvertently load them early. +CIRCUITPY_TUSB_MEM_ALIGN = 32 INTERNAL_FLASH_FILESYSTEM = 1 diff --git a/ports/mimxrt10xx/reset.c b/ports/mimxrt10xx/reset.c index a3a4f667de..3d9a0b071b 100644 --- a/ports/mimxrt10xx/reset.c +++ b/ports/mimxrt10xx/reset.c @@ -26,10 +26,11 @@ #include "reset.h" #include "supervisor/filesystem.h" +#include "supervisor/linker.h" #include "fsl_common.h" -void reset(void) { +void PLACE_IN_ITCM(reset)(void) { filesystem_flush(); NVIC_SystemReset(); } diff --git a/ports/mimxrt10xx/supervisor/flexspi_nor_flash_ops.c b/ports/mimxrt10xx/supervisor/flexspi_nor_flash_ops.c index 950e9aa403..7563f386f9 100644 --- a/ports/mimxrt10xx/supervisor/flexspi_nor_flash_ops.c +++ b/ports/mimxrt10xx/supervisor/flexspi_nor_flash_ops.c @@ -13,6 +13,17 @@ #include "supervisor/internal_flash.h" #include "supervisor/linker.h" +STATIC uint8_t _busy_bit_shift; +STATIC bool _busy_bit_polarity; +STATIC bool _inited = false; + +void flexspi_nor_init(void) { + // Copy busy bit info into RAM so we can use if when flash isn't available. + _busy_bit_shift = qspiflash_config.memConfig.busyOffset; + _busy_bit_polarity = qspiflash_config.memConfig.busyBitPolarity; + _inited = true; +} + STATIC status_t PLACE_IN_ITCM(flexspi_nor_write_enable)(FLEXSPI_Type * base, uint32_t baseAddr) { flexspi_transfer_t flashXfer; @@ -53,9 +64,8 @@ STATIC status_t PLACE_IN_ITCM(flexspi_nor_wait_bus_busy)(FLEXSPI_Type * base) if (status != kStatus_Success) { return status; } - size_t busyBit = readValue & (1U << qspiflash_config.memConfig.busyOffset); - isBusy = (qspiflash_config.memConfig.busyBitPolarity == 0 && busyBit != 0) || - (qspiflash_config.memConfig.busyBitPolarity == 1 && busyBit == 0); + bool busyBit = (readValue >> _busy_bit_shift) & 0x1; + isBusy = busyBit != _busy_bit_polarity; } while (isBusy); return status; diff --git a/ports/mimxrt10xx/supervisor/internal_flash.c b/ports/mimxrt10xx/supervisor/internal_flash.c index 72d57d1dd4..30a033a183 100644 --- a/ports/mimxrt10xx/supervisor/internal_flash.c +++ b/ports/mimxrt10xx/supervisor/internal_flash.c @@ -53,8 +53,15 @@ uint8_t _flash_cache[SECTOR_SIZE] __attribute__((aligned(4))); uint32_t _flash_page_addr = NO_CACHE; void PLACE_IN_ITCM(supervisor_flash_init)(void) { - // Update the LUT to make sure all entries are available. - FLEXSPI_UpdateLUT(FLEXSPI, 0, (const uint32_t *)&qspiflash_config.memConfig.lookupTable, 64); + // Update the LUT to make sure all entries are available. Copy the values to + // memory first so that we don't read from the flash as we update the LUT. + uint32_t lut_copy[64]; + memcpy(lut_copy, (const uint32_t *)&qspiflash_config.memConfig.lookupTable, 64 * sizeof(uint32_t)); + FLEXSPI_UpdateLUT(FLEXSPI, 0, lut_copy, 64); + // Make sure everything is flushed after updating the LUT. + __DSB(); + __ISB(); + flexspi_nor_init(); } static inline uint32_t lba2addr(uint32_t block) { @@ -79,20 +86,21 @@ void PLACE_IN_ITCM(port_internal_flash_flush)(void) { if (memcmp(_flash_cache, (void *)_flash_page_addr, SECTOR_SIZE) != 0) { volatile uint32_t sector_addr = (_flash_page_addr - FlexSPI_AMBA_BASE); - __disable_irq(); + // Disable interrupts of priority 8+. They likely use code in flash + // itself. Higher priority interrupts (<8) should ensure all of their + // code is in RAM. + __set_BASEPRI(8 << (8 - __NVIC_PRIO_BITS)); status = flexspi_nor_flash_erase_sector(FLEXSPI, sector_addr); - __enable_irq(); + __set_BASEPRI(0U); if (status != kStatus_Success) { - printf("Page erase failure %ld!\r\n", status); return; } for (int i = 0; i < SECTOR_SIZE / FLASH_PAGE_SIZE; ++i) { - __disable_irq(); + __set_BASEPRI(8 << (8 - __NVIC_PRIO_BITS)); status = flexspi_nor_flash_page_program(FLEXSPI, sector_addr + i * FLASH_PAGE_SIZE, (void *)_flash_cache + i * FLASH_PAGE_SIZE); - __enable_irq(); + __set_BASEPRI(0U); if (status != kStatus_Success) { - printf("Page program failure %ld!\r\n", status); return; } } @@ -103,11 +111,17 @@ void PLACE_IN_ITCM(port_internal_flash_flush)(void) { } mp_uint_t supervisor_flash_read_blocks(uint8_t *dest, uint32_t block, uint32_t num_blocks) { - // Must write out anything in cache before trying to read. - supervisor_flash_flush(); + for (size_t i = 0; i < num_blocks; i++) { + uint32_t src = lba2addr(block + i); + uint32_t page_addr = src & ~(SECTOR_SIZE - 1); + // Copy from the cache if our page matches the cached one. + if (page_addr == _flash_page_addr) { + src = ((uint32_t)&_flash_cache) + (src - page_addr); + } + + memcpy(dest + FILESYSTEM_BLOCK_SIZE * i, (uint8_t *)src, FILESYSTEM_BLOCK_SIZE); + } - uint32_t src = lba2addr(block); - memcpy(dest, (uint8_t *)src, FILESYSTEM_BLOCK_SIZE * num_blocks); return 0; // success } @@ -141,5 +155,5 @@ mp_uint_t supervisor_flash_write_blocks(const uint8_t *src, uint32_t lba, uint32 return 0; // success } -void supervisor_flash_release_cache(void) { +void PLACE_IN_ITCM(supervisor_flash_release_cache)(void) { } diff --git a/ports/mimxrt10xx/supervisor/internal_flash.h b/ports/mimxrt10xx/supervisor/internal_flash.h index 66d3f73db1..c38e2bc416 100644 --- a/ports/mimxrt10xx/supervisor/internal_flash.h +++ b/ports/mimxrt10xx/supervisor/internal_flash.h @@ -42,6 +42,7 @@ #define ROM_INDEX_PAGEPROGRAM 9 #define ROM_INDEX_READSTATUSREG 1 +extern void flexspi_nor_init(void); extern status_t flexspi_nor_flash_erase_sector(FLEXSPI_Type *base, uint32_t address); extern status_t flexspi_nor_flash_page_program(FLEXSPI_Type *base, uint32_t dstAddr, const uint32_t *src); extern status_t flexspi_nor_enable_quad_mode(FLEXSPI_Type *base); diff --git a/ports/mimxrt10xx/supervisor/port.c b/ports/mimxrt10xx/supervisor/port.c index 6997b4bae5..a27ab3ea58 100644 --- a/ports/mimxrt10xx/supervisor/port.c +++ b/ports/mimxrt10xx/supervisor/port.c @@ -42,13 +42,14 @@ #include "common-hal/busio/SPI.h" #include "shared-bindings/microcontroller/__init__.h" -#include "reset.h" - -#include "supervisor/background_callback.h" - #if CIRCUITPY_PEW #include "shared-module/_pew/PewPew.h" #endif + +#include "reset.h" + +#include "supervisor/background_callback.h" +#include "supervisor/linker.h" #include "supervisor/shared/tick.h" #include "clocks.h" @@ -97,16 +98,25 @@ extern uint32_t _ld_dtcm_data_flash_copy; extern uint32_t _ld_itcm_destination; extern uint32_t _ld_itcm_size; extern uint32_t _ld_itcm_flash_copy; +extern uint32_t _ld_isr_destination; +extern uint32_t _ld_isr_size; +extern uint32_t _ld_isr_flash_copy; extern void main(void); // This replaces the Reset_Handler in startup_*.S and SystemInit in system_*.c. +// Turn off optimize("no-tree-loop-distribute-patterns") so that this isn't replaced +// by calls to memcpy because we're copying it over now. void Reset_Handler(void); -__attribute__((used, naked)) void Reset_Handler(void) { +__attribute__((used, naked, no_instrument_function, optimize("no-tree-loop-distribute-patterns"))) void Reset_Handler(void) { __disable_irq(); - SCB->VTOR = (uint32_t)&__isr_vector; + // Set the VTOR to the flash copy since we haven't copied it into RAM. + SCB->VTOR = (uint32_t)&_ld_isr_flash_copy; __set_MSP((uint32_t)&_ld_stack_top); + // Turn off any residual ITM outputs. + ITM->TER = 0; + /* Disable I cache and D cache */ SCB_DisableICache(); SCB_DisableDCache(); @@ -128,6 +138,11 @@ __attribute__((used, naked)) void Reset_Handler(void) { current_gpr14 |= IOMUXC_GPR_GPR14_CM7_CFGITCMSZ(0x6); IOMUXC_GPR->GPR14 = current_gpr14; + // Enable FlexRAM interrupts on invalid access. + FLEXRAM->INT_STAT_EN = FLEXRAM_INT_STAT_EN_ITCM_ERR_STAT_EN(1) | + FLEXRAM_INT_STAT_EN_DTCM_ERR_STAT_EN(1) | + FLEXRAM_INT_STAT_EN_OCRAM_ERR_STAT_EN(1); + #if ((__FPU_PRESENT == 1) && (__FPU_USED == 1)) SCB->CPACR |= ((3UL << 10 * 2) | (3UL << 11 * 2)); /* set CP10, CP11 Full Access */ #endif /* ((__FPU_PRESENT == 1) && (__FPU_USED == 1)) */ @@ -157,6 +172,13 @@ __attribute__((used, naked)) void Reset_Handler(void) { (&_ld_itcm_destination)[i] = (&_ld_itcm_flash_copy)[i]; } + for (uint32_t i = 0; i < ((size_t)&_ld_isr_size) / 4; i++) { + (&_ld_isr_destination)[i] = (&_ld_isr_flash_copy)[i]; + } + + // Now that we've copied the ISR table over, use that VTOR. + SCB->VTOR = (uint32_t)&_ld_isr_destination; + // The first number in RBAR is the region number. When searching for a policy, the region with // the highest number wins. If none match, then the default policy set at enable applies. @@ -170,14 +192,19 @@ __attribute__((used, naked)) void Reset_Handler(void) { // FlexSPI2 is 0x70000000 - // This the first 1MB of flash is the bootloader and CircuitPython read-only data. - MPU->RBAR = ARM_MPU_RBAR(10, 0x60000000U); - MPU->RASR = ARM_MPU_RASR(EXECUTION, ARM_MPU_AP_FULL, NORMAL, NOT_SHAREABLE, CACHEABLE, BUFFERABLE, NO_SUBREGIONS, ARM_MPU_REGION_SIZE_1MB); + // This the first portion (1MB, 2MB or 4MB) of flash is the bootloader and CircuitPython read-only data. + MPU->RBAR = ARM_MPU_RBAR(10, FlexSPI_AMBA_BASE); + uint32_t region_size = ARM_MPU_REGION_SIZE_32B; + uint32_t code_size = ((uint32_t)&_ld_filesystem_start) - FlexSPI_AMBA_BASE; + while (code_size > (1u << (region_size + 1))) { + region_size += 1; + } + MPU->RASR = ARM_MPU_RASR(EXECUTION, ARM_MPU_AP_FULL, NORMAL, NOT_SHAREABLE, CACHEABLE, BUFFERABLE, NO_SUBREGIONS, region_size); // The remainder of flash is the fat filesystem which could have code on it too. Make sure that // we set the region to the minimal size so that bad data doesn't get speculatively fetched. // Thanks to Damien for the tip! - uint32_t region_size = ARM_MPU_REGION_SIZE_32B; + region_size = ARM_MPU_REGION_SIZE_32B; uint32_t filesystem_size = &_ld_filesystem_end - &_ld_filesystem_start; while (filesystem_size > (1u << (region_size + 1))) { region_size += 1; @@ -189,7 +216,7 @@ __attribute__((used, naked)) void Reset_Handler(void) { uint32_t subregion_size = (1u << (region_size + 1)) / 8; uint8_t subregion_mask = (0xff00 >> (remainder / subregion_size)) & 0xff; - MPU->RBAR = ARM_MPU_RBAR(11, 0x60100000U); + MPU->RBAR = ARM_MPU_RBAR(11, (size_t)&_ld_filesystem_start); MPU->RASR = ARM_MPU_RASR(EXECUTION, ARM_MPU_AP_FULL, NORMAL, NOT_SHAREABLE, CACHEABLE, BUFFERABLE, subregion_mask, region_size); // This the ITCM. Set it to read-only because we've loaded everything already and it's easy to @@ -205,9 +232,10 @@ __attribute__((used, naked)) void Reset_Handler(void) { // cost of 1/4 speed OCRAM accesses. It will leave more room for caching data from the flash // too which might be a net win. MPU->RBAR = ARM_MPU_RBAR(14, 0x20200000U); - MPU->RASR = ARM_MPU_RASR(EXECUTION, ARM_MPU_AP_FULL, NORMAL, SHAREABLE, CACHEABLE, BUFFERABLE, NO_SUBREGIONS, ARM_MPU_REGION_SIZE_512KB); + MPU->RASR = ARM_MPU_RASR(NO_EXECUTION, ARM_MPU_AP_FULL, NORMAL, NOT_SHAREABLE, CACHEABLE, BUFFERABLE, NO_SUBREGIONS, ARM_MPU_REGION_SIZE_512KB); // We steal 64k from FlexRAM for ITCM and DTCM so disable those memory regions here. + // We use 64k from FlexRAM for ITCM and DTCM so disable those memory regions here. MPU->RBAR = ARM_MPU_RBAR(15, 0x20280000U); MPU->RASR = ARM_MPU_RASR(EXECUTION, ARM_MPU_AP_FULL, NORMAL, NOT_SHAREABLE, CACHEABLE, BUFFERABLE, 0x80, ARM_MPU_REGION_SIZE_512KB); @@ -239,14 +267,97 @@ __attribute__((used, naked)) void Reset_Handler(void) { } __enable_irq(); + main(); } +void __attribute__((no_instrument_function,section(".itcm.profile_enter"),long_call)) __cyg_profile_func_enter(void *this_fn, + void *call_site) { + if ((ITM->TER & (1 << 3)) == 0) { + return; + } + uint32_t addr = (uint32_t)this_fn; + while (ITM->PORT[3U].u32 == 0UL) { + // addr |= 1; + } + ITM->PORT[3].u32 = addr; +} + +void __attribute__((no_instrument_function,section(".itcm.profile_exit"),long_call)) __cyg_profile_func_exit(void *this_fn, + void *call_site) { + if ((ITM->TER & (1 << 4)) == 0) { + return; + } + uint32_t addr = (uint32_t)this_fn; + while (ITM->PORT[4U].u32 == 0UL) { + // addr |= 1; + } + ITM->PORT[4].u32 = addr; +} + safe_mode_t port_init(void) { CLOCK_SetMode(kCLOCK_ModeRun); clocks_init(); + // Turn on the DWT so that neopixel_write can use CYCCNT for timing. + CoreDebug->DEMCR |= CoreDebug_DEMCR_TRCENA_Msk; + DWT->CTRL = 0x2 << DWT_CTRL_SYNCTAP_Pos | DWT_CTRL_CYCCNTENA_Msk; + + // Enable SWO if needed. + #if CIRCUITPY_SWO_TRACE + + // Turn on the 528 MHz clock to the TPIU. + CLOCK_EnableClock(kCLOCK_Trace); /* Make these edits*/ + /* Set TRACE_PODF. */ + CLOCK_SetDiv(kCLOCK_TraceDiv, 0); /* Make these edits*/ + /* Set Trace clock source. */ + CLOCK_SetMux(kCLOCK_TraceMux, 0); /* Make these edits*/ + + ITM->TCR = ITM_TCR_TSENA_Msk | ITM_TCR_ITMENA_Msk | ITM_TCR_SYNCENA_Msk | ITM_TCR_DWTENA_Msk; + + // Run at 2.75 mbaud. CP2102N says it can do up to 3. + // Base clock is 528 mhz (not 500 like the core). + // TPI->ACPR = 191; + // Run at 1 mbaud so that USB isn't bottlenecked. + TPI->ACPR = 527; + TPI->SPPR = 0x2; // NRZ aka UART + TPI->FFCR = 0; + + IOMUXC_SetPinMux( /* Add these lines*/ + IOMUXC_GPIO_AD_09_ARM_TRACE_SWO, + 0U); + IOMUXC_SetPinConfig( /* Add these lines*/ + IOMUXC_GPIO_AD_09_ARM_TRACE_SWO, + 0x00F9U); + + // Enable ports 0-4: + // * 0 is serial output + // * + // * 3 is addresses of functions beginning. + // * 4 is addresses of functions ending. + ITM->TER |= 0x1f; + ITM->PORT[0].u8 = 'C'; + ITM->PORT[0].u8 = 'P'; + ITM->PORT[0].u8 = '\n'; + #endif + + // Set all peripheral interrupt priorities to the lowest priority by default. + for (uint16_t i = 0; i < NUMBER_OF_INT_VECTORS; i++) { + NVIC_SetPriority(i, (1UL << __NVIC_PRIO_BITS) - 1UL); + } + NVIC_SetPriority(USB_OTG1_IRQn, 1); + #ifdef USBPHY2 + NVIC_SetPriority(USB_OTG2_IRQn, 1); + #endif + + NVIC_SetPriority(FLEXRAM_IRQn, 0); + NVIC_EnableIRQ(FLEXRAM_IRQn); + + // Priorities 8+ will be disabled during flash operations. To run during + // flash operations, ensure all code is in RAM (not flash) and set the + // priority < 8. + #if CIRCUITPY_RTC rtc_init(); #endif @@ -305,7 +416,7 @@ void reset_to_bootloader(void) { reset(); } -void reset_cpu(void) { +void PLACE_IN_ITCM(reset_cpu)(void) { reset(); } @@ -332,7 +443,7 @@ uint32_t *port_heap_get_top(void) { } // Place the word into the low power section of the SNVS. -void port_set_saved_word(uint32_t value) { +void PLACE_IN_ITCM(port_set_saved_word)(uint32_t value) { SNVS->LPGPR[1] = value; } @@ -355,7 +466,7 @@ uint64_t port_get_raw_ticks(uint8_t *subticks) { void SNVS_HP_WRAPPER_IRQHandler(void); __attribute__((used)) -void SNVS_HP_WRAPPER_IRQHandler(void) { +void PLACE_IN_ITCM(SNVS_HP_WRAPPER_IRQHandler)(void) { if ((SNVS->HPSR & SNVS_HPSR_PI_MASK) != 0) { supervisor_tick(); SNVS->HPSR = SNVS_HPSR_PI_MASK; @@ -414,44 +525,43 @@ void port_idle_until_interrupt(void) { common_hal_mcu_enable_interrupts(); } -/** - * \brief Default interrupt handler for unused IRQs. - */ +// Catch faults where the memory access violates MPU settings. void MemManage_Handler(void); -__attribute__((used)) void MemManage_Handler(void) { +__attribute__((used)) void PLACE_IN_ITCM(MemManage_Handler)(void) { reset_into_safe_mode(SAFE_MODE_HARD_FAULT); while (true) { asm ("nop;"); } } -/** - * \brief Default interrupt handler for unused IRQs. - */ void BusFault_Handler(void); -__attribute__((used)) void BusFault_Handler(void) { +__attribute__((used)) void PLACE_IN_ITCM(BusFault_Handler)(void) { reset_into_safe_mode(SAFE_MODE_HARD_FAULT); while (true) { asm ("nop;"); } } -/** - * \brief Default interrupt handler for unused IRQs. - */ void UsageFault_Handler(void); -__attribute__((used)) void UsageFault_Handler(void) { +__attribute__((used)) void PLACE_IN_ITCM(UsageFault_Handler)(void) { reset_into_safe_mode(SAFE_MODE_HARD_FAULT); while (true) { asm ("nop;"); } } -/** - * \brief Default interrupt handler for unused IRQs. - */ +// Default fault handler. void HardFault_Handler(void); -__attribute__((used)) void HardFault_Handler(void) { +__attribute__((used)) void PLACE_IN_ITCM(HardFault_Handler)(void) { + reset_into_safe_mode(SAFE_MODE_HARD_FAULT); + while (true) { + asm ("nop;"); + } +} + +// Catch access errors to FlexRAM (if the MPU didn't catch it first.) +void FLEXRAM_IRQHandler(void); +__attribute__((used)) void PLACE_IN_ITCM(FLEXRAM_IRQHandler)(void) { reset_into_safe_mode(SAFE_MODE_HARD_FAULT); while (true) { asm ("nop;"); diff --git a/ports/mimxrt10xx/supervisor/serial.c b/ports/mimxrt10xx/supervisor/serial.c deleted file mode 100644 index cb557d36a8..0000000000 --- a/ports/mimxrt10xx/supervisor/serial.c +++ /dev/null @@ -1,90 +0,0 @@ -/* - * This file is part of the MicroPython project, http://micropython.org/ - * - * The MIT License (MIT) - * - * Copyright (c) 2017, 2018 Scott Shawcroft for Adafruit Industries - * Copyright (c) 2019 Lucian Copeland for Adafruit Industries - * Copyright (c) 2019 Artur Pacholec - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ - -#include "supervisor/serial.h" -#include "py/mphal.h" -#include - -#include "fsl_clock.h" -#include "fsl_lpuart.h" - -#if defined(CIRCUITPY_CONSOLE_UART) -// static LPUART_Type *uart_instance = LPUART1; // evk -static LPUART_Type *uart_instance = LPUART4; // feather 1011 -// static LPUART_Type *uart_instance = LPUART2; // feather 1062 -static uint32_t UartSrcFreq(void) { - uint32_t freq; - - /* To make it simple, we assume default PLL and divider settings, and the only - variable from application is use PLL3 source or OSC source */ - /* PLL3 div6 80M */ - if (CLOCK_GetMux(kCLOCK_UartMux) == 0) { - freq = (CLOCK_GetPllFreq(kCLOCK_PllUsb1) / 6U) / - (CLOCK_GetDiv(kCLOCK_UartDiv) + 1U); - } else { - freq = CLOCK_GetOscFreq() / (CLOCK_GetDiv(kCLOCK_UartDiv) + 1U); - } - - return freq; -} - -void port_serial_init(void) { - lpuart_config_t config; - - LPUART_GetDefaultConfig(&config); - config.baudRate_Bps = 115200; - config.enableTx = true; - config.enableRx = true; - - LPUART_Init(uart_instance, &config, UartSrcFreq()); -} - -bool port_serial_connected(void) { - return true; -} - -char port_serial_read(void) { - uint8_t data; - - LPUART_ReadBlocking(uart_instance, &data, sizeof(data)); - - return data; -} - -bool port_serial_bytes_available(void) { - return LPUART_GetStatusFlags(uart_instance) & kLPUART_RxDataRegFullFlag; -} - -void port_serial_write_substring(const char *text, uint32_t len) { - if (len == 0) { - return; - } - - LPUART_WriteBlocking(uart_instance, (uint8_t *)text, len); -} -#endif // CIRCUITPY_CONSOLE_UART diff --git a/ports/mimxrt10xx/supervisor/usb.c b/ports/mimxrt10xx/supervisor/usb.c index 2094a943b5..bc6bc5f0cd 100644 --- a/ports/mimxrt10xx/supervisor/usb.c +++ b/ports/mimxrt10xx/supervisor/usb.c @@ -27,6 +27,8 @@ #include "fsl_clock.h" #include "tusb.h" + +#include "supervisor/linker.h" #include "supervisor/usb.h" STATIC void init_usb_instance(mp_int_t instance) { @@ -78,13 +80,13 @@ STATIC void init_usb_instance(mp_int_t instance) { // Provide the prototypes for the interrupt handlers. The iMX RT SDK doesn't. // The SDK only links to them from assembly. void USB_OTG1_IRQHandler(void); - void USB_OTG1_IRQHandler(void) { + void PLACE_IN_ITCM(USB_OTG1_IRQHandler)(void) { usb_irq_handler(0); } #ifdef USBPHY2 void USB_OTG2_IRQHandler(void); - void USB_OTG2_IRQHandler(void) { + void PLACE_IN_ITCM(USB_OTG2_IRQHandler)(void) { usb_irq_handler(1); } #endif diff --git a/py/argcheck.c b/py/argcheck.c index 05c2567ca2..465a82c97e 100644 --- a/py/argcheck.c +++ b/py/argcheck.c @@ -31,7 +31,7 @@ #include "supervisor/shared/translate/translate.h" -void mp_arg_check_num_sig(size_t n_args, size_t n_kw, uint32_t sig) { +void PLACE_IN_ITCM(mp_arg_check_num_sig)(size_t n_args, size_t n_kw, uint32_t sig) { // TODO maybe take the function name as an argument so we can print nicer error messages // The reverse of MP_OBJ_FUN_MAKE_SIG diff --git a/py/bc.c b/py/bc.c index e1645dbff0..e7ad43389a 100644 --- a/py/bc.c +++ b/py/bc.c @@ -111,7 +111,7 @@ STATIC void dump_args(const mp_obj_t *a, size_t sz) { // - code_state->fun_bc should contain a pointer to the function object // - code_state->ip should contain the offset in bytes from the pointer // code_state->fun_bc->bytecode to the entry n_state (0 for bytecode, non-zero for native) -void mp_setup_code_state(mp_code_state_t *code_state, size_t n_args, size_t n_kw, const mp_obj_t *args) { +void PLACE_IN_ITCM(mp_setup_code_state)(mp_code_state_t * code_state, size_t n_args, size_t n_kw, const mp_obj_t *args) { // This function is pretty complicated. It's main aim is to be efficient in speed and RAM // usage for the common case of positional only args. diff --git a/py/circuitpy_mpconfig.h b/py/circuitpy_mpconfig.h index a514b6a160..fb85983675 100644 --- a/py/circuitpy_mpconfig.h +++ b/py/circuitpy_mpconfig.h @@ -583,6 +583,18 @@ void supervisor_run_background_tasks_if_tick(void); #define MICROPY_WRAP_MP_EXECUTE_BYTECODE PLACE_IN_ITCM #endif +#ifndef MICROPY_WRAP_MP_LOAD_GLOBAL +#define MICROPY_WRAP_MP_LOAD_GLOBAL PLACE_IN_ITCM +#endif + +#ifndef MICROPY_WRAP_MP_LOAD_NAME +#define MICROPY_WRAP_MP_LOAD_NAME PLACE_IN_ITCM +#endif + +#ifndef MICROPY_WRAP_MP_OBJ_GET_TYPE +#define MICROPY_WRAP_MP_OBJ_GET_TYPE PLACE_IN_ITCM +#endif + #ifndef CIRCUITPY_DIGITALIO_HAVE_INPUT_ONLY #define CIRCUITPY_DIGITALIO_HAVE_INPUT_ONLY (0) #endif diff --git a/py/circuitpy_mpconfig.mk b/py/circuitpy_mpconfig.mk index c849a12851..d002ab4d43 100644 --- a/py/circuitpy_mpconfig.mk +++ b/py/circuitpy_mpconfig.mk @@ -560,6 +560,10 @@ CFLAGS += -DCIRCUITPY_TUSB_MEM_ALIGN=$(CIRCUITPY_TUSB_MEM_ALIGN) CIRCUITPY_TUSB_ATTR_USBRAM ?= ".bss.usbram" CFLAGS += -DCIRCUITPY_TUSB_ATTR_USBRAM=$(CIRCUITPY_TUSB_ATTR_USBRAM) +# Output function trace information from the ARM ITM. +CIRCUITPY_SWO_TRACE ?= 0 +CFLAGS += -DCIRCUITPY_SWO_TRACE=$(CIRCUITPY_SWO_TRACE) + # Define an equivalent for MICROPY_LONGINT_IMPL, to pass to $(MPY-TOOL) in py/mkrules.mk # $(MPY-TOOL) needs to know what kind of longint to use (if any) to freeze long integers. # This should correspond to the MICROPY_LONGINT_IMPL definition in mpconfigport.h. diff --git a/py/gc.c b/py/gc.c index 0ced4c854d..1812507c61 100644 --- a/py/gc.c +++ b/py/gc.c @@ -231,7 +231,9 @@ bool gc_is_locked(void) { // children: mark the unmarked child blocks and put those newly marked // blocks on the stack. When all children have been checked, pop off the // topmost block on the stack and repeat with that one. -STATIC void gc_mark_subtree(size_t block) { +// We don't instrument these functions because they occur a lot during GC and +// fill up the output buffer quickly. +STATIC void MP_NO_INSTRUMENT PLACE_IN_ITCM(gc_mark_subtree)(size_t block) { // Start with the block passed in the argument. size_t sp = 0; for (;;) { @@ -350,7 +352,7 @@ STATIC void gc_sweep(void) { } // Mark can handle NULL pointers because it verifies the pointer is within the heap bounds. -STATIC void gc_mark(void *ptr) { +STATIC void MP_NO_INSTRUMENT PLACE_IN_ITCM(gc_mark)(void *ptr) { if (VERIFY_PTR(ptr)) { size_t block = BLOCK_FROM_PTR(ptr); if (ATB_GET_KIND(block) == AT_HEAD) { @@ -397,7 +399,7 @@ void gc_collect_ptr(void *ptr) { #if defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)) __attribute__((no_sanitize_address)) #endif -static void *gc_get_ptr(void **ptrs, int i) { +static void *MP_NO_INSTRUMENT PLACE_IN_ITCM(gc_get_ptr)(void **ptrs, int i) { #if MICROPY_DEBUG_VALGRIND if (!VALGRIND_CHECK_MEM_IS_ADDRESSABLE(&ptrs[i], sizeof(*ptrs))) { return NULL; diff --git a/py/mpconfig.h b/py/mpconfig.h index 9d68f4ce9d..d3842afbe7 100644 --- a/py/mpconfig.h +++ b/py/mpconfig.h @@ -1827,6 +1827,17 @@ typedef double mp_float_t; #define MP_WEAK __attribute__((weak)) #endif +// Modifier for functions which should not be instrumented when tracing with +// -finstrument-functions +#ifndef MP_NO_INSTRUMENT +#define MP_NO_INSTRUMENT __attribute__((no_instrument_function)) +#endif + +// Modifier for functions which should ideally inlined +#ifndef MP_INLINE +#define MP_INLINE inline MP_NO_INSTRUMENT +#endif + // Modifier for functions which should be never inlined #ifndef MP_NOINLINE #define MP_NOINLINE __attribute__((noinline)) @@ -1847,6 +1858,12 @@ typedef double mp_float_t; #define MP_UNLIKELY(x) __builtin_expect((x), 0) #endif +// Modifier for functions which aren't often used. Calls will also be considered +// unlikely. Section names are `.text.unlikely` for use in linker scripts. +#ifndef MP_COLD +#define MP_COLD __attribute__((cold)) +#endif + // To annotate that code is unreachable #ifndef MP_UNREACHABLE #if defined(__GNUC__) diff --git a/py/obj.h b/py/obj.h index 86fbe5155f..92732b31dd 100644 --- a/py/obj.h +++ b/py/obj.h @@ -86,19 +86,19 @@ typedef struct _mp_obj_base_t mp_obj_base_t; #if MICROPY_OBJ_REPR == MICROPY_OBJ_REPR_A -static inline bool mp_obj_is_small_int(mp_const_obj_t o) { +static MP_INLINE bool mp_obj_is_small_int(mp_const_obj_t o) { return (((mp_int_t)(o)) & 1) != 0; } #define MP_OBJ_SMALL_INT_VALUE(o) (((mp_int_t)(o)) >> 1) #define MP_OBJ_NEW_SMALL_INT(small_int) ((mp_obj_t)((((mp_uint_t)(small_int)) << 1) | 1)) -static inline bool mp_obj_is_qstr(mp_const_obj_t o) { +static MP_INLINE bool mp_obj_is_qstr(mp_const_obj_t o) { return (((mp_int_t)(o)) & 7) == 2; } #define MP_OBJ_QSTR_VALUE(o) (((mp_uint_t)(o)) >> 3) #define MP_OBJ_NEW_QSTR(qst) ((mp_obj_t)((((mp_uint_t)(qst)) << 3) | 2)) -static inline bool mp_obj_is_immediate_obj(mp_const_obj_t o) { +static MP_INLINE bool mp_obj_is_immediate_obj(mp_const_obj_t o) { return (((mp_int_t)(o)) & 7) == 6; } #define MP_OBJ_IMMEDIATE_OBJ_VALUE(o) (((mp_uint_t)(o)) >> 3) @@ -115,25 +115,25 @@ mp_float_t mp_obj_float_get(mp_obj_t self_in); mp_obj_t mp_obj_new_float(mp_float_t value); #endif -static inline bool mp_obj_is_obj(mp_const_obj_t o) { +static MP_INLINE bool mp_obj_is_obj(mp_const_obj_t o) { return (((mp_int_t)(o)) & 3) == 0; } #elif MICROPY_OBJ_REPR == MICROPY_OBJ_REPR_B -static inline bool mp_obj_is_small_int(mp_const_obj_t o) { +static MP_INLINE bool mp_obj_is_small_int(mp_const_obj_t o) { return (((mp_int_t)(o)) & 3) == 1; } #define MP_OBJ_SMALL_INT_VALUE(o) (((mp_int_t)(o)) >> 2) #define MP_OBJ_NEW_SMALL_INT(small_int) ((mp_obj_t)((((mp_uint_t)(small_int)) << 2) | 1)) -static inline bool mp_obj_is_qstr(mp_const_obj_t o) { +static MP_INLINE bool mp_obj_is_qstr(mp_const_obj_t o) { return (((mp_int_t)(o)) & 7) == 3; } #define MP_OBJ_QSTR_VALUE(o) (((mp_uint_t)(o)) >> 3) #define MP_OBJ_NEW_QSTR(qst) ((mp_obj_t)((((mp_uint_t)(qst)) << 3) | 3)) -static inline bool mp_obj_is_immediate_obj(mp_const_obj_t o) { +static MP_INLINE bool mp_obj_is_immediate_obj(mp_const_obj_t o) { return (((mp_int_t)(o)) & 7) == 7; } #define MP_OBJ_IMMEDIATE_OBJ_VALUE(o) (((mp_uint_t)(o)) >> 3) @@ -150,13 +150,13 @@ mp_float_t mp_obj_float_get(mp_obj_t self_in); mp_obj_t mp_obj_new_float(mp_float_t value); #endif -static inline bool mp_obj_is_obj(mp_const_obj_t o) { +static MP_INLINE bool mp_obj_is_obj(mp_const_obj_t o) { return (((mp_int_t)(o)) & 1) == 0; } #elif MICROPY_OBJ_REPR == MICROPY_OBJ_REPR_C -static inline bool mp_obj_is_small_int(mp_const_obj_t o) { +static MP_INLINE bool mp_obj_is_small_int(mp_const_obj_t o) { return (((mp_int_t)(o)) & 1) != 0; } #define MP_OBJ_SMALL_INT_VALUE(o) (((mp_int_t)(o)) >> 1) @@ -166,17 +166,17 @@ static inline bool mp_obj_is_small_int(mp_const_obj_t o) { #define mp_const_float_e MP_ROM_PTR((mp_obj_t)(((0x402df854 & ~3) | 2) + 0x80800000)) #define mp_const_float_pi MP_ROM_PTR((mp_obj_t)(((0x40490fdb & ~3) | 2) + 0x80800000)) -static inline bool mp_obj_is_float(mp_const_obj_t o) { +static MP_INLINE bool mp_obj_is_float(mp_const_obj_t o) { return (((mp_uint_t)(o)) & 3) == 2 && (((mp_uint_t)(o)) & 0xff800007) != 0x00000006; } -static inline mp_float_t mp_obj_float_get(mp_const_obj_t o) { +static MP_INLINE mp_float_t mp_obj_float_get(mp_const_obj_t o) { union { mp_float_t f; mp_uint_t u; } num = {.u = ((mp_uint_t)o - 0x80800000) & ~3}; return num.f; } -static inline mp_obj_t mp_obj_new_float(mp_float_t f) { +static MP_INLINE mp_obj_t mp_obj_new_float(mp_float_t f) { union { mp_float_t f; mp_uint_t u; @@ -185,37 +185,37 @@ static inline mp_obj_t mp_obj_new_float(mp_float_t f) { } #endif -static inline bool mp_obj_is_qstr(mp_const_obj_t o) { +static MP_INLINE bool mp_obj_is_qstr(mp_const_obj_t o) { return (((mp_uint_t)(o)) & 0xff80000f) == 0x00000006; } #define MP_OBJ_QSTR_VALUE(o) (((mp_uint_t)(o)) >> 4) #define MP_OBJ_NEW_QSTR(qst) ((mp_obj_t)((((mp_uint_t)(qst)) << 4) | 0x00000006)) -static inline bool mp_obj_is_immediate_obj(mp_const_obj_t o) { +static MP_INLINE bool mp_obj_is_immediate_obj(mp_const_obj_t o) { return (((mp_uint_t)(o)) & 0xff80000f) == 0x0000000e; } #define MP_OBJ_IMMEDIATE_OBJ_VALUE(o) (((mp_uint_t)(o)) >> 4) #define MP_OBJ_NEW_IMMEDIATE_OBJ(val) ((mp_obj_t)(((val) << 4) | 0xe)) -static inline bool mp_obj_is_obj(mp_const_obj_t o) { +static MP_INLINE bool mp_obj_is_obj(mp_const_obj_t o) { return (((mp_int_t)(o)) & 3) == 0; } #elif MICROPY_OBJ_REPR == MICROPY_OBJ_REPR_D -static inline bool mp_obj_is_small_int(mp_const_obj_t o) { +static MP_INLINE bool mp_obj_is_small_int(mp_const_obj_t o) { return (((uint64_t)(o)) & 0xffff000000000000) == 0x0001000000000000; } #define MP_OBJ_SMALL_INT_VALUE(o) (((mp_int_t)((o) << 16)) >> 17) #define MP_OBJ_NEW_SMALL_INT(small_int) (((((uint64_t)(small_int)) & 0x7fffffffffff) << 1) | 0x0001000000000001) -static inline bool mp_obj_is_qstr(mp_const_obj_t o) { +static MP_INLINE bool mp_obj_is_qstr(mp_const_obj_t o) { return (((uint64_t)(o)) & 0xffff000000000000) == 0x0002000000000000; } #define MP_OBJ_QSTR_VALUE(o) ((((uint32_t)(o)) >> 1) & 0xffffffff) #define MP_OBJ_NEW_QSTR(qst) ((mp_obj_t)(((uint64_t)(((uint32_t)(qst)) << 1)) | 0x0002000000000001)) -static inline bool mp_obj_is_immediate_obj(mp_const_obj_t o) { +static MP_INLINE bool mp_obj_is_immediate_obj(mp_const_obj_t o) { return (((uint64_t)(o)) & 0xffff000000000000) == 0x0003000000000000; } #define MP_OBJ_IMMEDIATE_OBJ_VALUE(o) ((((uint32_t)(o)) >> 46) & 3) @@ -230,17 +230,17 @@ static inline bool mp_obj_is_immediate_obj(mp_const_obj_t o) { #define mp_const_float_e {((mp_obj_t)((uint64_t)0x4005bf0a8b145769 + 0x8004000000000000))} #define mp_const_float_pi {((mp_obj_t)((uint64_t)0x400921fb54442d18 + 0x8004000000000000))} -static inline bool mp_obj_is_float(mp_const_obj_t o) { +static MP_INLINE bool mp_obj_is_float(mp_const_obj_t o) { return ((uint64_t)(o) & 0xfffc000000000000) != 0; } -static inline mp_float_t mp_obj_float_get(mp_const_obj_t o) { +static MP_INLINE mp_float_t mp_obj_float_get(mp_const_obj_t o) { union { mp_float_t f; uint64_t r; } num = {.r = o - 0x8004000000000000}; return num.f; } -static inline mp_obj_t mp_obj_new_float(mp_float_t f) { +static MP_INLINE mp_obj_t mp_obj_new_float(mp_float_t f) { union { mp_float_t f; uint64_t r; @@ -249,7 +249,7 @@ static inline mp_obj_t mp_obj_new_float(mp_float_t f) { } #endif -static inline bool mp_obj_is_obj(mp_const_obj_t o) { +static MP_INLINE bool mp_obj_is_obj(mp_const_obj_t o) { return (((uint64_t)(o)) & 0xffff000000000000) == 0x0000000000000000; } #define MP_OBJ_TO_PTR(o) ((void *)(uintptr_t)(o)) @@ -454,7 +454,7 @@ typedef enum _mp_map_lookup_kind_t { MP_MAP_LOOKUP_ADD_IF_NOT_FOUND_OR_REMOVE_IF_FOUND = 3, // only valid for mp_set_lookup } mp_map_lookup_kind_t; -static inline bool mp_map_slot_is_filled(const mp_map_t *map, size_t pos) { +static MP_INLINE bool mp_map_slot_is_filled(const mp_map_t *map, size_t pos) { assert(pos < map->alloc); return (map)->table[pos].key != MP_OBJ_NULL && (map)->table[pos].key != MP_OBJ_SENTINEL; } @@ -476,7 +476,7 @@ typedef struct _mp_set_t { mp_obj_t *table; } mp_set_t; -static inline bool mp_set_slot_is_filled(const mp_set_t *set, size_t pos) { +static MP_INLINE bool mp_set_slot_is_filled(const mp_set_t *set, size_t pos) { return (set)->table[pos] != MP_OBJ_NULL && (set)->table[pos] != MP_OBJ_SENTINEL; } @@ -821,7 +821,7 @@ extern const struct _mp_obj_exception_t mp_static_GeneratorExit_obj; #define mp_obj_is_tuple_compatible(o) (mp_type_get_getiter_slot(mp_obj_get_type(o)) == mp_obj_tuple_getiter) mp_obj_t mp_obj_new_type(qstr name, mp_obj_t bases_tuple, mp_obj_t locals_dict); -static inline mp_obj_t mp_obj_new_bool(mp_int_t x) { +static MP_INLINE mp_obj_t mp_obj_new_bool(mp_int_t x) { return x ? mp_const_true : mp_const_false; } mp_obj_t mp_obj_new_cell(mp_obj_t obj); @@ -893,7 +893,7 @@ mp_obj_t mp_obj_equal_not_equal(mp_binary_op_t op, mp_obj_t o1, mp_obj_t o2); bool mp_obj_equal(mp_obj_t o1, mp_obj_t o2); // returns true if o is bool, small int or long int -static inline bool mp_obj_is_integer(mp_const_obj_t o) { +static MP_INLINE bool mp_obj_is_integer(mp_const_obj_t o) { return mp_obj_is_int(o) || mp_obj_is_bool(o); } @@ -940,7 +940,7 @@ mp_obj_t mp_obj_exception_get_value(mp_obj_t self_in); mp_obj_t mp_obj_exception_make_new(const mp_obj_type_t *type_in, size_t n_args, size_t n_kw, const mp_obj_t *args); mp_obj_t mp_alloc_emergency_exception_buf(mp_obj_t size_in); void mp_init_emergency_exception_buf(void); -static inline mp_obj_t mp_obj_new_exception_arg1(const mp_obj_type_t *exc_type, mp_obj_t arg) { +static MP_INLINE mp_obj_t mp_obj_new_exception_arg1(const mp_obj_type_t *exc_type, mp_obj_t arg) { assert(exc_type->make_new == mp_obj_exception_make_new); return mp_obj_exception_make_new(exc_type, 1, 0, &arg); } @@ -957,42 +957,42 @@ void mp_str_print_quoted(const mp_print_t *print, const byte *str_data, size_t s #if MICROPY_PY_BUILTINS_FLOAT // float #if MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_FLOAT -static inline float mp_obj_get_float_to_f(mp_obj_t o) { +static MP_INLINE float mp_obj_get_float_to_f(mp_obj_t o) { return mp_obj_get_float(o); } -static inline double mp_obj_get_float_to_d(mp_obj_t o) { +static MP_INLINE double mp_obj_get_float_to_d(mp_obj_t o) { return (double)mp_obj_get_float(o); } -static inline mp_obj_t mp_obj_new_float_from_f(float o) { +static MP_INLINE mp_obj_t mp_obj_new_float_from_f(float o) { return mp_obj_new_float(o); } -static inline mp_obj_t mp_obj_new_float_from_d(double o) { +static MP_INLINE mp_obj_t mp_obj_new_float_from_d(double o) { return mp_obj_new_float((mp_float_t)o); } #elif MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_DOUBLE -static inline float mp_obj_get_float_to_f(mp_obj_t o) { +static MP_INLINE float mp_obj_get_float_to_f(mp_obj_t o) { return (float)mp_obj_get_float(o); } -static inline double mp_obj_get_float_to_d(mp_obj_t o) { +static MP_INLINE double mp_obj_get_float_to_d(mp_obj_t o) { return mp_obj_get_float(o); } -static inline mp_obj_t mp_obj_new_float_from_f(float o) { +static MP_INLINE mp_obj_t mp_obj_new_float_from_f(float o) { return mp_obj_new_float((mp_float_t)o); } -static inline mp_obj_t mp_obj_new_float_from_d(double o) { +static MP_INLINE mp_obj_t mp_obj_new_float_from_d(double o) { return mp_obj_new_float(o); } #endif #if MICROPY_FLOAT_HIGH_QUALITY_HASH mp_int_t mp_float_hash(mp_float_t val); #else -static inline mp_int_t mp_float_hash(mp_float_t val) { +static MP_INLINE mp_int_t mp_float_hash(mp_float_t val) { return (mp_int_t)val; } #endif @@ -1031,7 +1031,7 @@ mp_obj_t mp_obj_dict_get(mp_obj_t self_in, mp_obj_t index); mp_obj_t mp_obj_dict_store(mp_obj_t self_in, mp_obj_t key, mp_obj_t value); mp_obj_t mp_obj_dict_delete(mp_obj_t self_in, mp_obj_t key); mp_obj_t mp_obj_dict_copy(mp_obj_t self_in); -static inline mp_map_t *mp_obj_dict_get_map(mp_obj_t dict) { +static MP_INLINE mp_map_t *mp_obj_dict_get_map(mp_obj_t dict) { return &((mp_obj_dict_t *)MP_OBJ_TO_PTR(dict))->map; } diff --git a/py/objdict.c b/py/objdict.c index 306205d12f..02aedacdd6 100644 --- a/py/objdict.c +++ b/py/objdict.c @@ -229,7 +229,7 @@ STATIC mp_obj_t dict_subscr(mp_obj_t self_in, mp_obj_t index, mp_obj_t value) { /******************************************************************************/ /* dict methods */ -STATIC void mp_ensure_not_fixed(const mp_obj_dict_t *dict) { +STATIC void PLACE_IN_ITCM(mp_ensure_not_fixed)(const mp_obj_dict_t * dict) { if (dict->map.is_fixed) { mp_raise_TypeError(NULL); } @@ -643,7 +643,7 @@ size_t mp_obj_dict_len(mp_obj_t self_in) { return self->map.used; } -mp_obj_t mp_obj_dict_store(mp_obj_t self_in, mp_obj_t key, mp_obj_t value) { +mp_obj_t PLACE_IN_ITCM(mp_obj_dict_store)(mp_obj_t self_in, mp_obj_t key, mp_obj_t value) { mp_check_self(mp_obj_is_dict_or_ordereddict(self_in)); mp_obj_dict_t *self = MP_OBJ_TO_PTR(self_in); mp_ensure_not_fixed(self); diff --git a/py/objfun.c b/py/objfun.c index 55c3fbbb06..5a02869fcf 100644 --- a/py/objfun.c +++ b/py/objfun.c @@ -50,7 +50,7 @@ /******************************************************************************/ /* builtin functions */ -STATIC mp_obj_t fun_builtin_0_call(mp_obj_t self_in, size_t n_args, size_t n_kw, const mp_obj_t *args) { +STATIC mp_obj_t PLACE_IN_ITCM(fun_builtin_0_call)(mp_obj_t self_in, size_t n_args, size_t n_kw, const mp_obj_t *args) { (void)args; assert(mp_obj_is_type(self_in, &mp_type_fun_builtin_0)); mp_obj_fun_builtin_fixed_t *self = MP_OBJ_TO_PTR(self_in); @@ -68,7 +68,7 @@ const mp_obj_type_t mp_type_fun_builtin_0 = { ), }; -STATIC mp_obj_t fun_builtin_1_call(mp_obj_t self_in, size_t n_args, size_t n_kw, const mp_obj_t *args) { +STATIC mp_obj_t PLACE_IN_ITCM(fun_builtin_1_call)(mp_obj_t self_in, size_t n_args, size_t n_kw, const mp_obj_t *args) { assert(mp_obj_is_type(self_in, &mp_type_fun_builtin_1)); mp_obj_fun_builtin_fixed_t *self = MP_OBJ_TO_PTR(self_in); mp_arg_check_num(n_args, n_kw, 1, 1, false); @@ -85,7 +85,7 @@ const mp_obj_type_t mp_type_fun_builtin_1 = { ), }; -STATIC mp_obj_t fun_builtin_2_call(mp_obj_t self_in, size_t n_args, size_t n_kw, const mp_obj_t *args) { +STATIC mp_obj_t PLACE_IN_ITCM(fun_builtin_2_call)(mp_obj_t self_in, size_t n_args, size_t n_kw, const mp_obj_t *args) { assert(mp_obj_is_type(self_in, &mp_type_fun_builtin_2)); mp_obj_fun_builtin_fixed_t *self = MP_OBJ_TO_PTR(self_in); mp_arg_check_num(n_args, n_kw, 2, 2, false); @@ -102,7 +102,7 @@ const mp_obj_type_t mp_type_fun_builtin_2 = { ), }; -STATIC mp_obj_t fun_builtin_3_call(mp_obj_t self_in, size_t n_args, size_t n_kw, const mp_obj_t *args) { +STATIC mp_obj_t PLACE_IN_ITCM(fun_builtin_3_call)(mp_obj_t self_in, size_t n_args, size_t n_kw, const mp_obj_t *args) { assert(mp_obj_is_type(self_in, &mp_type_fun_builtin_3)); mp_obj_fun_builtin_fixed_t *self = MP_OBJ_TO_PTR(self_in); mp_arg_check_num(n_args, n_kw, 3, 3, false); @@ -119,7 +119,7 @@ const mp_obj_type_t mp_type_fun_builtin_3 = { ), }; -STATIC mp_obj_t fun_builtin_var_call(mp_obj_t self_in, size_t n_args, size_t n_kw, const mp_obj_t *args) { +STATIC mp_obj_t PLACE_IN_ITCM(fun_builtin_var_call)(mp_obj_t self_in, size_t n_args, size_t n_kw, const mp_obj_t *args) { assert(mp_obj_is_type(self_in, &mp_type_fun_builtin_var)); mp_obj_fun_builtin_var_t *self = MP_OBJ_TO_PTR(self_in); @@ -418,7 +418,7 @@ mp_obj_t mp_obj_new_fun_bc(mp_obj_t def_args_in, mp_obj_t def_kw_args, const byt #if MICROPY_EMIT_NATIVE -STATIC mp_obj_t fun_native_call(mp_obj_t self_in, size_t n_args, size_t n_kw, const mp_obj_t *args) { +STATIC mp_obj_t PLACE_IN_ITCM(fun_native_call)(mp_obj_t self_in, size_t n_args, size_t n_kw, const mp_obj_t *args) { MP_STACK_CHECK(); mp_obj_fun_bc_t *self = self_in; mp_call_fun_t fun = MICROPY_MAKE_POINTER_CALLABLE((void *)self->bytecode); @@ -505,7 +505,7 @@ STATIC mp_uint_t convert_obj_for_inline_asm(mp_obj_t obj) { } } -STATIC mp_obj_t fun_asm_call(mp_obj_t self_in, size_t n_args, size_t n_kw, const mp_obj_t *args) { +STATIC mp_obj_t PLACE_IN_ITCM(fun_asm_call)(mp_obj_t self_in, size_t n_args, size_t n_kw, const mp_obj_t *args) { mp_obj_fun_asm_t *self = self_in; mp_arg_check_num(n_args, n_kw, self->n_args, self->n_args, false); diff --git a/py/objproperty.c b/py/objproperty.c index e8ae5094fa..f55efc8bdb 100644 --- a/py/objproperty.c +++ b/py/objproperty.c @@ -99,7 +99,7 @@ const mp_obj_type_t mp_type_property = { extern const mp_obj_property_t __property_getter_start, __property_getter_end, __property_getset_start, __property_getset_end; #endif -const mp_obj_t *mp_obj_property_get(mp_obj_t self_in, size_t *n_proxy) { +const mp_obj_t *PLACE_IN_ITCM(mp_obj_property_get)(mp_obj_t self_in, size_t *n_proxy) { mp_check_self(mp_obj_is_type(self_in, &mp_type_property)); mp_obj_property_t *self = MP_OBJ_TO_PTR(self_in); #if MICROPY_PY_OPTIMIZE_PROPERTY_FLASH_SIZE diff --git a/py/pystack.c b/py/pystack.c index 43dfd4ed6c..696b033377 100644 --- a/py/pystack.c +++ b/py/pystack.c @@ -36,7 +36,7 @@ void mp_pystack_init(void *start, void *end) { MP_STATE_THREAD(pystack_cur) = start; } -void *mp_pystack_alloc(size_t n_bytes) { +void *PLACE_IN_ITCM(mp_pystack_alloc)(size_t n_bytes) { n_bytes = (n_bytes + (MICROPY_PYSTACK_ALIGN - 1)) & ~(MICROPY_PYSTACK_ALIGN - 1); #if MP_PYSTACK_DEBUG n_bytes += MICROPY_PYSTACK_ALIGN; diff --git a/py/pystack.h b/py/pystack.h index ed51e0c7e3..169d58b6f7 100644 --- a/py/pystack.h +++ b/py/pystack.h @@ -41,7 +41,7 @@ void *mp_pystack_alloc(size_t n_bytes); // This function can free multiple continuous blocks at once: just pass the // pointer to the block that was allocated first and it and all subsequently // allocated blocks will be freed. -static inline void mp_pystack_free(void *ptr) { +static MP_INLINE void mp_pystack_free(void *ptr) { assert((uint8_t *)ptr >= MP_STATE_THREAD(pystack_start)); assert((uint8_t *)ptr <= MP_STATE_THREAD(pystack_cur)); #if MP_PYSTACK_DEBUG @@ -59,16 +59,16 @@ static inline void mp_pystack_free(void *ptr) { MP_STATE_THREAD(pystack_cur) = (uint8_t *)ptr; } -static inline void mp_pystack_realloc(void *ptr, size_t n_bytes) { +static MP_INLINE void mp_pystack_realloc(void *ptr, size_t n_bytes) { mp_pystack_free(ptr); mp_pystack_alloc(n_bytes); } -static inline size_t mp_pystack_usage(void) { +static MP_INLINE size_t mp_pystack_usage(void) { return MP_STATE_THREAD(pystack_cur) - MP_STATE_THREAD(pystack_start); } -static inline size_t mp_pystack_limit(void) { +static MP_INLINE size_t mp_pystack_limit(void) { return MP_STATE_THREAD(pystack_end) - MP_STATE_THREAD(pystack_start); } @@ -78,43 +78,43 @@ static inline size_t mp_pystack_limit(void) { #define mp_local_alloc(n_bytes) alloca(n_bytes) -static inline void mp_local_free(void *ptr) { +static MP_INLINE void mp_local_free(void *ptr) { (void)ptr; } -static inline void *mp_nonlocal_alloc(size_t n_bytes) { +static MP_INLINE void *mp_nonlocal_alloc(size_t n_bytes) { return m_new(uint8_t, n_bytes); } -static inline void *mp_nonlocal_realloc(void *ptr, size_t old_n_bytes, size_t new_n_bytes) { +static MP_INLINE void *mp_nonlocal_realloc(void *ptr, size_t old_n_bytes, size_t new_n_bytes) { return m_renew(uint8_t, ptr, old_n_bytes, new_n_bytes); } -static inline void mp_nonlocal_free(void *ptr, size_t n_bytes) { +static MP_INLINE void mp_nonlocal_free(void *ptr, size_t n_bytes) { m_del(uint8_t, ptr, n_bytes); } #else -static inline void *mp_local_alloc(size_t n_bytes) { +static MP_INLINE void *mp_local_alloc(size_t n_bytes) { return mp_pystack_alloc(n_bytes); } -static inline void mp_local_free(void *ptr) { +static MP_INLINE void mp_local_free(void *ptr) { mp_pystack_free(ptr); } -static inline void *mp_nonlocal_alloc(size_t n_bytes) { +static MP_INLINE void *mp_nonlocal_alloc(size_t n_bytes) { return mp_pystack_alloc(n_bytes); } -static inline void *mp_nonlocal_realloc(void *ptr, size_t old_n_bytes, size_t new_n_bytes) { +static MP_INLINE void *mp_nonlocal_realloc(void *ptr, size_t old_n_bytes, size_t new_n_bytes) { (void)old_n_bytes; mp_pystack_realloc(ptr, new_n_bytes); return ptr; } -static inline void mp_nonlocal_free(void *ptr, size_t n_bytes) { +static MP_INLINE void mp_nonlocal_free(void *ptr, size_t n_bytes) { (void)n_bytes; mp_pystack_free(ptr); } diff --git a/py/qstr.c b/py/qstr.c index 083e12d6f0..96e2a79192 100644 --- a/py/qstr.c +++ b/py/qstr.c @@ -137,7 +137,7 @@ void qstr_init(void) { #endif } -STATIC const char *find_qstr(qstr q, qstr_attr_t *attr) { +STATIC const char *PLACE_IN_ITCM(find_qstr)(qstr q, qstr_attr_t *attr) { // search pool for this qstr // total_prev_len==0 in the final pool, so the loop will always terminate const qstr_pool_t *pool = MP_STATE_VM(last_pool); diff --git a/py/runtime.c b/py/runtime.c index 9227594d83..91d9eb1c6d 100644 --- a/py/runtime.c +++ b/py/runtime.c @@ -1588,7 +1588,7 @@ mp_obj_t mp_parse_compile_execute(mp_lexer_t *lex, mp_parse_input_kind_t parse_i #endif // MICROPY_ENABLE_COMPILER -NORETURN void m_malloc_fail(size_t num_bytes) { +NORETURN MP_COLD void m_malloc_fail(size_t num_bytes) { DEBUG_printf("memory allocation failed, allocating %u bytes\n", (uint)num_bytes); #if MICROPY_ENABLE_GC if (gc_is_locked()) { @@ -1601,25 +1601,25 @@ NORETURN void m_malloc_fail(size_t num_bytes) { #if MICROPY_ERROR_REPORTING == MICROPY_ERROR_REPORTING_NONE -NORETURN void mp_raise_type(const mp_obj_type_t *exc_type) { +NORETURN MP_COLD void mp_raise_type(const mp_obj_type_t *exc_type) { nlr_raise(mp_obj_new_exception(exc_type)); } -NORETURN void mp_raise_ValueError_no_msg(void) { +NORETURN MP_COLD void mp_raise_ValueError_no_msg(void) { mp_raise_type(&mp_type_ValueError); } -NORETURN void mp_raise_TypeError_no_msg(void) { +NORETURN MP_COLD void mp_raise_TypeError_no_msg(void) { mp_raise_type(&mp_type_TypeError); } -NORETURN void mp_raise_NotImplementedError_no_msg(void) { +NORETURN MP_COLD void mp_raise_NotImplementedError_no_msg(void) { mp_raise_type(&mp_type_NotImplementedError); } #else -NORETURN void mp_raise_msg(const mp_obj_type_t *exc_type, const compressed_string_t *msg) { +NORETURN MP_COLD void mp_raise_msg(const mp_obj_type_t *exc_type, const compressed_string_t *msg) { if (msg == NULL) { nlr_raise(mp_obj_new_exception(exc_type)); } else { @@ -1627,19 +1627,19 @@ NORETURN void mp_raise_msg(const mp_obj_type_t *exc_type, const compressed_strin } } -NORETURN void mp_raise_msg_vlist(const mp_obj_type_t *exc_type, const compressed_string_t *fmt, va_list argptr) { +NORETURN MP_COLD void mp_raise_msg_vlist(const mp_obj_type_t *exc_type, const compressed_string_t *fmt, va_list argptr) { mp_obj_t exception = mp_obj_new_exception_msg_vlist(exc_type, fmt, argptr); nlr_raise(exception); } -NORETURN void mp_raise_msg_varg(const mp_obj_type_t *exc_type, const compressed_string_t *fmt, ...) { +NORETURN MP_COLD void mp_raise_msg_varg(const mp_obj_type_t *exc_type, const compressed_string_t *fmt, ...) { va_list argptr; va_start(argptr,fmt); mp_raise_msg_vlist(exc_type, fmt, argptr); va_end(argptr); } -NORETURN void mp_raise_msg_str(const mp_obj_type_t *exc_type, const char *msg) { +NORETURN MP_COLD void mp_raise_msg_str(const mp_obj_type_t *exc_type, const char *msg) { if (msg == NULL) { nlr_raise(mp_obj_new_exception(exc_type)); } else { @@ -1647,56 +1647,56 @@ NORETURN void mp_raise_msg_str(const mp_obj_type_t *exc_type, const char *msg) { } } -NORETURN void mp_raise_AttributeError(const compressed_string_t *msg) { +NORETURN MP_COLD void mp_raise_AttributeError(const compressed_string_t *msg) { mp_raise_msg(&mp_type_AttributeError, msg); } -NORETURN void mp_raise_RuntimeError(const compressed_string_t *msg) { +NORETURN MP_COLD void mp_raise_RuntimeError(const compressed_string_t *msg) { mp_raise_msg(&mp_type_RuntimeError, msg); } -NORETURN void mp_raise_ImportError(const compressed_string_t *msg) { +NORETURN MP_COLD void mp_raise_ImportError(const compressed_string_t *msg) { mp_raise_msg(&mp_type_ImportError, msg); } -NORETURN void mp_raise_IndexError(const compressed_string_t *msg) { +NORETURN MP_COLD void mp_raise_IndexError(const compressed_string_t *msg) { mp_raise_msg(&mp_type_IndexError, msg); } -NORETURN void mp_raise_IndexError_varg(const compressed_string_t *fmt, ...) { +NORETURN MP_COLD void mp_raise_IndexError_varg(const compressed_string_t *fmt, ...) { va_list argptr; va_start(argptr,fmt); mp_raise_msg_vlist(&mp_type_IndexError, fmt, argptr); va_end(argptr); } -NORETURN void mp_raise_ValueError(const compressed_string_t *msg) { +NORETURN MP_COLD void mp_raise_ValueError(const compressed_string_t *msg) { mp_raise_msg(&mp_type_ValueError, msg); } -NORETURN void mp_raise_ValueError_varg(const compressed_string_t *fmt, ...) { +NORETURN MP_COLD void mp_raise_ValueError_varg(const compressed_string_t *fmt, ...) { va_list argptr; va_start(argptr,fmt); mp_raise_msg_vlist(&mp_type_ValueError, fmt, argptr); va_end(argptr); } -NORETURN void mp_raise_TypeError(const compressed_string_t *msg) { +NORETURN MP_COLD void mp_raise_TypeError(const compressed_string_t *msg) { mp_raise_msg(&mp_type_TypeError, msg); } -NORETURN void mp_raise_TypeError_varg(const compressed_string_t *fmt, ...) { +NORETURN MP_COLD void mp_raise_TypeError_varg(const compressed_string_t *fmt, ...) { va_list argptr; va_start(argptr,fmt); mp_raise_msg_vlist(&mp_type_TypeError, fmt, argptr); va_end(argptr); } -NORETURN void mp_raise_OSError_msg(const compressed_string_t *msg) { +NORETURN MP_COLD void mp_raise_OSError_msg(const compressed_string_t *msg) { mp_raise_msg(&mp_type_OSError, msg); } -NORETURN void mp_raise_OSError_errno_str(int errno_, mp_obj_t str) { +NORETURN MP_COLD void mp_raise_OSError_errno_str(int errno_, mp_obj_t str) { mp_obj_t args[2] = { MP_OBJ_NEW_SMALL_INT(errno_), str, @@ -1704,26 +1704,26 @@ NORETURN void mp_raise_OSError_errno_str(int errno_, mp_obj_t str) { nlr_raise(mp_obj_new_exception_args(&mp_type_OSError, 2, args)); } -NORETURN void mp_raise_OSError_msg_varg(const compressed_string_t *fmt, ...) { +NORETURN MP_COLD void mp_raise_OSError_msg_varg(const compressed_string_t *fmt, ...) { va_list argptr; va_start(argptr,fmt); mp_raise_msg_vlist(&mp_type_OSError, fmt, argptr); va_end(argptr); } -NORETURN void mp_raise_ConnectionError(const compressed_string_t *msg) { +NORETURN MP_COLD void mp_raise_ConnectionError(const compressed_string_t *msg) { mp_raise_msg(&mp_type_ConnectionError, msg); } -NORETURN void mp_raise_BrokenPipeError(void) { +NORETURN MP_COLD void mp_raise_BrokenPipeError(void) { mp_raise_type_arg(&mp_type_BrokenPipeError, MP_OBJ_NEW_SMALL_INT(MP_EPIPE)); } -NORETURN void mp_raise_NotImplementedError(const compressed_string_t *msg) { +NORETURN MP_COLD void mp_raise_NotImplementedError(const compressed_string_t *msg) { mp_raise_msg(&mp_type_NotImplementedError, msg); } -NORETURN void mp_raise_NotImplementedError_varg(const compressed_string_t *fmt, ...) { +NORETURN MP_COLD void mp_raise_NotImplementedError_varg(const compressed_string_t *fmt, ...) { va_list argptr; va_start(argptr,fmt); mp_raise_msg_vlist(&mp_type_NotImplementedError, fmt, argptr); @@ -1731,17 +1731,18 @@ NORETURN void mp_raise_NotImplementedError_varg(const compressed_string_t *fmt, } -NORETURN void mp_raise_OverflowError_varg(const compressed_string_t *fmt, ...) { +NORETURN MP_COLD void mp_raise_OverflowError_varg(const compressed_string_t *fmt, ...) { va_list argptr; va_start(argptr,fmt); mp_raise_msg_vlist(&mp_type_OverflowError, fmt, argptr); va_end(argptr); } -NORETURN void mp_raise_type_arg(const mp_obj_type_t *exc_type, mp_obj_t arg) { +NORETURN MP_COLD void mp_raise_type_arg(const mp_obj_type_t *exc_type, mp_obj_t arg) { nlr_raise(mp_obj_new_exception_arg1(exc_type, arg)); } +// Leave this as not COLD because it is used by iterators in normal execution. NORETURN void mp_raise_StopIteration(mp_obj_t arg) { if (arg == MP_OBJ_NULL) { mp_raise_type(&mp_type_StopIteration); @@ -1750,18 +1751,18 @@ NORETURN void mp_raise_StopIteration(mp_obj_t arg) { } } -NORETURN void mp_raise_OSError(int errno_) { +NORETURN MP_COLD void mp_raise_OSError(int errno_) { mp_raise_type_arg(&mp_type_OSError, MP_OBJ_NEW_SMALL_INT(errno_)); } #endif #if MICROPY_STACK_CHECK || MICROPY_ENABLE_PYSTACK -NORETURN void mp_raise_recursion_depth(void) { +NORETURN MP_COLD void mp_raise_recursion_depth(void) { mp_raise_RuntimeError(MP_ERROR_TEXT("maximum recursion depth exceeded")); } #endif -NORETURN void mp_raise_ZeroDivisionError(void) { +NORETURN MP_COLD void mp_raise_ZeroDivisionError(void) { mp_raise_msg(&mp_type_ZeroDivisionError, MP_ERROR_TEXT("division by zero")); } diff --git a/py/runtime.h b/py/runtime.h index 1a7a5d1698..d154772508 100644 --- a/py/runtime.h +++ b/py/runtime.h @@ -86,7 +86,7 @@ bool mp_sched_schedule(mp_obj_t function, mp_obj_t arg); int mp_print_mp_int(const mp_print_t *print, mp_obj_t x, int base, int base_char, int flags, char fill, int width, int prec); void mp_arg_check_num_sig(size_t n_args, size_t n_kw, uint32_t sig); -static inline void mp_arg_check_num(size_t n_args, size_t n_kw, size_t n_args_min, size_t n_args_max, bool takes_kw) { +static MP_INLINE void mp_arg_check_num(size_t n_args, size_t n_kw, size_t n_args_min, size_t n_args_max, bool takes_kw) { mp_arg_check_num_sig(n_args, n_kw, MP_OBJ_FUN_MAKE_SIG(n_args_min, n_args_max, takes_kw)); } void mp_arg_parse_all(size_t n_pos, const mp_obj_t *pos, mp_map_t *kws, size_t n_allowed, const mp_arg_t *allowed, mp_arg_val_t *out_vals); @@ -115,16 +115,16 @@ mp_obj_t mp_arg_validate_type_or_none(mp_obj_t obj, const mp_obj_type_t *type, q mp_int_t mp_arg_validate_type_int(mp_obj_t obj, qstr arg_name); mp_obj_t mp_arg_validate_type_string(mp_obj_t obj, qstr arg_name); -static inline mp_obj_dict_t *PLACE_IN_ITCM(mp_locals_get)(void) { +static MP_INLINE mp_obj_dict_t *mp_locals_get(void) { return MP_STATE_THREAD(dict_locals); } -static inline void PLACE_IN_ITCM(mp_locals_set)(mp_obj_dict_t * d) { +static MP_INLINE void mp_locals_set(mp_obj_dict_t *d) { MP_STATE_THREAD(dict_locals) = d; } -static inline mp_obj_dict_t *PLACE_IN_ITCM(mp_globals_get)(void) { +static MP_INLINE mp_obj_dict_t *mp_globals_get(void) { return MP_STATE_THREAD(dict_globals); } -static inline void PLACE_IN_ITCM(mp_globals_set)(mp_obj_dict_t * d) { +static MP_INLINE void mp_globals_set(mp_obj_dict_t *d) { MP_STATE_THREAD(dict_globals) = d; } @@ -181,7 +181,7 @@ mp_obj_t mp_iternext_allow_raise(mp_obj_t o); // may return MP_OBJ_STOP_ITERATIO mp_obj_t mp_iternext(mp_obj_t o); // will always return MP_OBJ_STOP_ITERATION instead of raising StopIteration(...) mp_vm_return_kind_t mp_resume(mp_obj_t self_in, mp_obj_t send_value, mp_obj_t throw_value, mp_obj_t *ret_val); -static inline mp_obj_t mp_make_stop_iteration(mp_obj_t o) { +static MP_INLINE mp_obj_t mp_make_stop_iteration(mp_obj_t o) { MP_STATE_THREAD(stop_iteration_arg) = o; return MP_OBJ_STOP_ITERATION; } diff --git a/py/stackctrl.c b/py/stackctrl.c index d699d6da61..546987f04d 100644 --- a/py/stackctrl.c +++ b/py/stackctrl.c @@ -38,7 +38,7 @@ void mp_stack_set_top(void *top) { MP_STATE_THREAD(stack_top) = top; } -mp_uint_t mp_stack_usage(void) { +mp_uint_t PLACE_IN_ITCM(mp_stack_usage)(void) { // Assumes descending stack // Force routine to not be inlined. Better guarantee than MP_NOINLINE for -flto. __asm volatile (""); @@ -52,7 +52,7 @@ void mp_stack_set_limit(mp_uint_t limit) { MP_STATE_THREAD(stack_limit) = limit; } -void mp_stack_check(void) { +void PLACE_IN_ITCM(mp_stack_check)(void) { if (mp_stack_usage() >= MP_STATE_THREAD(stack_limit)) { mp_raise_recursion_depth(); } diff --git a/shared-bindings/digitalio/DigitalInOut.c b/shared-bindings/digitalio/DigitalInOut.c index de4d66f9d8..c0195115fa 100644 --- a/shared-bindings/digitalio/DigitalInOut.c +++ b/shared-bindings/digitalio/DigitalInOut.c @@ -125,7 +125,7 @@ STATIC mp_obj_t digitalio_digitalinout_obj___exit__(size_t n_args, const mp_obj_ } STATIC MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(digitalio_digitalinout_obj___exit___obj, 4, 4, digitalio_digitalinout_obj___exit__); -STATIC void check_for_deinit(digitalio_digitalinout_obj_t *self) { +STATIC inline void check_for_deinit(digitalio_digitalinout_obj_t *self) { if (common_hal_digitalio_digitalinout_deinited(self)) { raise_deinited_error(); } diff --git a/shared-module/storage/__init__.c b/shared-module/storage/__init__.c index 7b6eec7e1b..060972d0c3 100644 --- a/shared-module/storage/__init__.c +++ b/shared-module/storage/__init__.c @@ -127,6 +127,7 @@ static bool usb_drive_set_enabled(bool enabled) { if (tud_connected()) { return false; } + filesystem_set_internal_writable_by_usb(enabled); storage_usb_is_enabled = enabled; return true; } diff --git a/supervisor/linker.h b/supervisor/linker.h index 58068c1a4b..c6ee8b4218 100644 --- a/supervisor/linker.h +++ b/supervisor/linker.h @@ -32,7 +32,8 @@ #if defined(IMXRT10XX) || defined(FOMU) || defined(STM32H7) || defined(RASPBERRYPI) #define PLACE_IN_DTCM_DATA(name) name __attribute__((section(".dtcm_data." #name))) #define PLACE_IN_DTCM_BSS(name) name __attribute__((section(".dtcm_bss." #name))) -#define PLACE_IN_ITCM(name) __attribute__((section(".itcm." #name))) name +// Don't inline ITCM functions because that may pull them out of ITCM into other sections. +#define PLACE_IN_ITCM(name) __attribute__((section(".itcm." #name),noinline)) name #else #define PLACE_IN_DTCM_DATA(name) name #define PLACE_IN_DTCM_BSS(name) name diff --git a/supervisor/shared/background_callback.c b/supervisor/shared/background_callback.c index db6d62f8e7..80f70b528a 100644 --- a/supervisor/shared/background_callback.c +++ b/supervisor/shared/background_callback.c @@ -42,7 +42,7 @@ STATIC volatile background_callback_t *volatile callback_head, *volatile callbac MP_WEAK void port_wake_main_task(void) { } -void background_callback_add_core(background_callback_t *cb) { +void PLACE_IN_ITCM(background_callback_add_core)(background_callback_t * cb) { CALLBACK_CRITICAL_BEGIN; if (cb->prev || callback_head == cb) { CALLBACK_CRITICAL_END; @@ -62,13 +62,13 @@ void background_callback_add_core(background_callback_t *cb) { port_wake_main_task(); } -void background_callback_add(background_callback_t *cb, background_callback_fun fun, void *data) { +void PLACE_IN_ITCM(background_callback_add)(background_callback_t * cb, background_callback_fun fun, void *data) { cb->fun = fun; cb->data = data; background_callback_add_core(cb); } -bool PLACE_IN_ITCM(background_callback_pending)(void) { +bool inline background_callback_pending(void) { return callback_head != NULL; } diff --git a/supervisor/shared/filesystem.c b/supervisor/shared/filesystem.c index 1eab59c384..345f55f2f1 100644 --- a/supervisor/shared/filesystem.c +++ b/supervisor/shared/filesystem.c @@ -33,6 +33,7 @@ #include "py/mpstate.h" #include "supervisor/flash.h" +#include "supervisor/linker.h" static mp_vfs_mount_t _mp_vfs; static fs_user_mount_t _internal_vfs; @@ -165,7 +166,7 @@ bool filesystem_init(bool create_allowed, bool force_create) { return true; } -void filesystem_flush(void) { +void PLACE_IN_ITCM(filesystem_flush)(void) { // Reset interval before next flush. filesystem_flush_interval_ms = CIRCUITPY_FILESYSTEM_FLUSH_INTERVAL_MS; supervisor_flash_flush(); diff --git a/supervisor/shared/flash.c b/supervisor/shared/flash.c index dfd7cf2050..53815c9836 100644 --- a/supervisor/shared/flash.c +++ b/supervisor/shared/flash.c @@ -132,7 +132,7 @@ static mp_uint_t flash_write_blocks(const uint8_t *src, uint32_t block_num, uint } } -void supervisor_flash_flush(void) { +void PLACE_IN_ITCM(supervisor_flash_flush)(void) { #if INTERNAL_FLASH_FILESYSTEM port_internal_flash_flush(); #else diff --git a/supervisor/shared/safe_mode.c b/supervisor/shared/safe_mode.c index 5112b17ebf..d3a31de71a 100644 --- a/supervisor/shared/safe_mode.c +++ b/supervisor/shared/safe_mode.c @@ -34,6 +34,7 @@ #include "shared-bindings/microcontroller/Processor.h" #include "shared-bindings/microcontroller/ResetReason.h" +#include "supervisor/linker.h" #include "supervisor/serial.h" #include "supervisor/shared/rgb_led_colors.h" #include "supervisor/shared/status_leds.h" @@ -121,12 +122,12 @@ safe_mode_t wait_for_safe_mode_reset(void) { return SAFE_MODE_NONE; } -void safe_mode_on_next_reset(safe_mode_t reason) { +void PLACE_IN_ITCM(safe_mode_on_next_reset)(safe_mode_t reason) { port_set_saved_word(SAFE_MODE_DATA_GUARD | (reason << 8)); } // Don't inline this so it's easy to break on it from GDB. -void __attribute__((noinline,)) reset_into_safe_mode(safe_mode_t reason) { +void __attribute__((noinline,)) PLACE_IN_ITCM(reset_into_safe_mode)(safe_mode_t reason) { if (_safe_mode > SAFE_MODE_BROWNOUT && reason > SAFE_MODE_BROWNOUT) { while (true) { // This very bad because it means running in safe mode didn't save us. Only ignore brownout diff --git a/supervisor/shared/status_bar.c b/supervisor/shared/status_bar.c index 4f5fa06464..16d23fe541 100644 --- a/supervisor/shared/status_bar.c +++ b/supervisor/shared/status_bar.c @@ -78,8 +78,8 @@ void supervisor_status_bar_update(void) { !shared_module_supervisor_status_bar_get_display(&shared_module_supervisor_status_bar_obj); // Suppress writes to console and/or display if status bar is not enabled for either or both. - bool prev_console_disable; - bool prev_display_disable; + bool prev_console_disable = false; + bool prev_display_disable = false; if (disable_console_writes) { prev_console_disable = serial_console_write_disable(true); diff --git a/supervisor/shared/translate/translate_impl.h b/supervisor/shared/translate/translate_impl.h index 1c144197cb..13da8c656b 100644 --- a/supervisor/shared/translate/translate_impl.h +++ b/supervisor/shared/translate/translate_impl.h @@ -48,7 +48,9 @@ inline #if !CIRCUITPY_LTO || CIRCUITPY_DEBUG < 1 __attribute__((always_inline)) #endif -const compressed_string_t *translate(const char *original) { +// Prevent instrumenting this because that disables the inlining we rely of for code size +// optimization. +__attribute__((no_instrument_function)) const compressed_string_t *translate(const char *original) { #ifndef NO_QSTR #define QDEF(id, hash, len, str) #define TRANSLATION(english_id, number) if (strcmp(original, english_id) == 0) { return &translation##number; } else diff --git a/supervisor/shared/usb/usb.c b/supervisor/shared/usb/usb.c index 5ac8454312..1d600e1158 100644 --- a/supervisor/shared/usb/usb.c +++ b/supervisor/shared/usb/usb.c @@ -220,11 +220,11 @@ static void usb_background_do(void *unused) { usb_background(); } -void usb_background_schedule(void) { +void PLACE_IN_ITCM(usb_background_schedule)(void) { background_callback_add(&usb_callback, usb_background_do, NULL); } -void usb_irq_handler(int instance) { +void PLACE_IN_ITCM(usb_irq_handler)(int instance) { #if CFG_TUSB_MCU != OPT_MCU_RP2040 // For rp2040, IRQ handler is already installed and invoked automatically if (instance == CIRCUITPY_USB_DEVICE_INSTANCE) { diff --git a/tests/perf_bench/benchrun.py b/tests/perf_bench/benchrun.py index 90c303dd29..0092ecaa33 100644 --- a/tests/perf_bench/benchrun.py +++ b/tests/perf_bench/benchrun.py @@ -4,7 +4,7 @@ def bm_run(N, M): except ImportError: import time - ticks_us = lambda: int(time.perf_counter() * 1000000) + ticks_us = lambda: int(time.monotonic_ns() / 1000) ticks_diff = lambda a, b: a - b # Pick sensible parameters given N, M diff --git a/tools/cortex-m-fault-gdb.py b/tools/cortex-m-fault-gdb.py new file mode 100644 index 0000000000..31b76aa1d4 --- /dev/null +++ b/tools/cortex-m-fault-gdb.py @@ -0,0 +1,106 @@ +"""Source this file into gdb `source ../../tools/cortex-m-fault-gdb.py` then run + `cortex-m-fault` to print basic info about the fault registers.""" + +SCS = 0xE000E000 +SCB = SCS + 0x0D00 +CPUID = SCB + 0x000 # (R/ ) CPUID Base Register */ +ICSR = SCB + 0x004 # (R/W) Interrupt Control and State Register */ +VTOR = SCB + 0x008 # (R/W) Vector Table Offset Register */ +AIRCR = SCB + 0x00C # (R/W) Application Interrupt and Reset Control Register */ +SCR = SCB + 0x010 # (R/W) System Control Register */ +CCR = SCB + 0x014 # (R/W) Configuration Control Register */ +SHCSR = SCB + 0x024 # (R/W) System Handler Control and State Register */ +CFSR = SCB + 0x028 # (R/W) Configurable Fault Status Register */ +HFSR = SCB + 0x02C # (R/W) HardFault Status Register */ +DFSR = SCB + 0x030 # (R/W) Debug Fault Status Register */ +MMFAR = SCB + 0x034 # (R/W) MemManage Fault Address Register */ +BFAR = SCB + 0x038 # (R/W) BusFault Address Register */ +AFSR = SCB + 0x03C # (R/W) Auxiliary Fault Status Register */ + +PARTS = {0xC27: "Cortex M7"} + +EXCEPTIONS = { + 0: "Thread mode", + 2: "Non Maskable Interrupt", + 3: "Hard Fault", + 4: "MemManage Fault", + 5: "Bus Fault", + 6: "Usage Fault", + 11: "SVCAll", + 14: "PendSV", + 15: "SysTick", +} + + +class CortexMFault(gdb.Command): + def __init__(self): + super(CortexMFault, self).__init__("cortex-m-fault", gdb.COMMAND_USER) + + def _read(self, address): + i = gdb.selected_inferior() + return i.read_memory(address, 4).cast("I")[0] + + def invoke(self, arg, from_tty): + cpuid = self._read(CPUID) + implementer = cpuid >> 24 + if implementer != 0x41: + raise RuntimeError() + variant = (cpuid >> 20) & 0xF + constant = (cpuid >> 16) & 0xF + if constant != 0xF: + raise RuntimeError() + revision = cpuid & 0xF + part_no = (cpuid >> 4) & 0xFFF + print(PARTS[part_no]) + icsr = self._read(ICSR) + if (icsr & (1 << 11)) != 0: + print("No preempted exceptions") + else: + print("Another exception was preempted") + vectactive = icsr & 0x1FF + if vectactive != 0: + if vectactive in EXCEPTIONS: + print(EXCEPTIONS[vectactive]) + else: + print(vectactive - 16) + + vtor = self._read(VTOR) + # print(hex(self._read(SHCSR))) + cfsr = self._read(CFSR) + ufsr = cfsr >> 16 + bfsr = (cfsr >> 8) & 0xFF + mmfsr = cfsr & 0xFF + print("ufsr", hex(ufsr), "bfsr", hex(bfsr), "mmfsr", hex(mmfsr)) + if (bfsr & (1 << 7)) != 0: + print("Bad address", hex(self._read(BFAR))) + if (bfsr & (1 << 3)) != 0: + print("Unstacking from exception error") + if (bfsr & (1 << 2)) != 0: + print("Imprecise data bus error") + if (bfsr & (1 << 1)) != 0: + print("Precise data bus error") + if (bfsr & (1 << 0)) != 0: + print("Instruction bus error") + + if (mmfsr & (1 << 7)) != 0: + print("Bad address", hex(self._read(MMFAR))) + if (mmfsr & (1 << 3)) != 0: + print("Unstacking from exception error") + if (mmfsr & (1 << 1)) != 0: + print("Data access violation") + if (mmfsr & (1 << 0)) != 0: + print("Instruction access violation") + + if (ufsr & (1 << 8)) != 0: + print("Unaligned access") + if (ufsr & (1 << 0)) != 0: + print("Undefined instruction") + hfsr = self._read(HFSR) + if (hfsr & (1 << 30)) != 0: + print("Forced hard fault") + if (hfsr & (1 << 1)) != 0: + print("Bus fault when reading vector table") + print("VTOR", hex(vtor)) + + +CortexMFault() diff --git a/tools/cpboard.py b/tools/cpboard.py index 1f399d1dfc..658cf512a2 100644 --- a/tools/cpboard.py +++ b/tools/cpboard.py @@ -9,6 +9,7 @@ # SPDX-License-Identifier: MIT import os +import pathlib import re import serial import sys @@ -93,16 +94,18 @@ class REPL: for i in range(0, len(data), chunk_size): chunk = data[i : min(i + chunk_size, len(data))] self.session += chunk - self.serial.write(chunk) + c = self.serial.write(chunk) + if c < len(chunk): + raise RuntimeError() time.sleep(0.01) def reset(self): # Use read() since serial.reset_input_buffer() fails with termios.error now and then self.read() self.session = b"" - self.write(b"\r" + REPL.CHAR_CTRL_C + REPL.CHAR_CTRL_C) # interrupt any running program + self.write(REPL.CHAR_CTRL_C + REPL.CHAR_CTRL_C) # interrupt any running program self.write(b"\r" + REPL.CHAR_CTRL_B) # enter or reset friendly repl - data = self.read_until(b">>> ") + self.read_until(b">>> ", timeout=60) def execute(self, code, timeout=10, wait_for_response=True): self.read() # Throw away @@ -347,7 +350,7 @@ class CPboard: return cls(dev, baudrate=baudrate, wait=wait, timeout=timeout) def __init__(self, device, baudrate=115200, wait=0, timeout=10): - self.device = device + self.device = str(pathlib.Path(device).resolve()) self.usb_dev = None try: # Is it a usb.core.Device? @@ -357,7 +360,7 @@ class CPboard: else: serials = [serial for serial in os.listdir("/dev/serial/by-path") if portstr in serial] if len(serials) != 1: - raise RuntimeError("Can't find excatly one matching usb serial device") + raise RuntimeError("Can't find exactly one matching usb serial device") self.device = os.path.realpath("/dev/serial/by-path/" + serials[0]) self.usb_dev = device @@ -370,6 +373,10 @@ class CPboard: self.bootloader = False self.repl = REPL(self) + # Disable autoreload so that file copies won't mess us up. + with self: + self.exec("import supervisor;supervisor.runtime.autoreload = False") + def __enter__(self): self.open() return self @@ -507,7 +514,7 @@ class CPboard: part = [part for part in disks if "part1" in part] if not part: - raise RuntimeError("Disk not found for: " + self.device) + return None return Disk(part[0]) @@ -557,9 +564,16 @@ class Pyboard: def enter_raw_repl(self): self.board.open() + def exit_raw_repl(self): + self.close() + def execfile(self, filename): return self.board.execfile(filename) + def exec_(self, command, data_consumer=None): + output = self.board.exec(command, timeout=20000) + return output + def eval_namedtuple(board, command): from collections import namedtuple diff --git a/tools/gen_display_resources.py b/tools/gen_display_resources.py index 7165db84cc..350988bab0 100644 --- a/tools/gen_display_resources.py +++ b/tools/gen_display_resources.py @@ -121,7 +121,7 @@ if tile_y == 16: blinka_size = 16 c_file.write( """\ -uint32_t blinka_bitmap_data[32] = { +const uint32_t blinka_bitmap_data[32] = { 0x00000011, 0x11000000, 0x00000111, 0x53100000, 0x00000111, 0x56110000, @@ -145,7 +145,7 @@ else: blinka_size = 12 c_file.write( """\ -uint32_t blinka_bitmap_data[28] = { +const uint32_t blinka_bitmap_data[28] = { 0x00000111, 0x00000000, 0x00001153, 0x10000000, 0x00001156, 0x11000000, @@ -164,11 +164,11 @@ uint32_t blinka_bitmap_data[28] = { c_file.write( """\ -displayio_bitmap_t blinka_bitmap = {{ +const displayio_bitmap_t blinka_bitmap = {{ .base = {{.type = &displayio_bitmap_type }}, .width = {0}, .height = {0}, - .data = blinka_bitmap_data, + .data = (uint32_t*) blinka_bitmap_data, .stride = 2, .bits_per_value = 4, .x_shift = 3, @@ -211,7 +211,7 @@ displayio_palette_t blinka_palette = {{ displayio_tilegrid_t supervisor_blinka_sprite = {{ .base = {{.type = &displayio_tilegrid_type }}, - .bitmap = &blinka_bitmap, + .bitmap = (displayio_bitmap_t*) &blinka_bitmap, .pixel_shader = &blinka_palette, .x = 0, .y = 0, diff --git a/tools/swo_function_trace.py b/tools/swo_function_trace.py new file mode 100644 index 0000000000..bdc1b0f840 --- /dev/null +++ b/tools/swo_function_trace.py @@ -0,0 +1,143 @@ +"""This prints out Chrome Trace Formatted json that can be viewed in Perfetto or Spall. +https://ui.perfetto.dev/ +https://gravitymoth.com/spall/spall.html + +Format: +https://docs.google.com/document/d/1CvAClvFfyA5R-PhYUmn5OOQtYMH4h6I0nSsKchNAySU/preview# + +Connect a USB to Serial converter to the SWO pin and then provide the serial +device to this script. It should be 1MBaud SWO signal. CTRL-C when you've captured enough data and +then it'll process and output. + +pip install pysigrok-libsigrokdecode +python tools/swo_function_trace.py /dev/ttyACM0 build-metro_m7_1011/firmware.elf > trace.json +""" + +import serial +import sys +import sigrokdecode +import time +import json + +from elftools.elf.elffile import ELFFile + +f = open(sys.argv[-1], "rb") +ef = ELFFile(f) + +symtab = ef.get_section_by_name(".symtab") +symbols = {} +for s in symtab.iter_symbols(): + addr = s.entry["st_value"] + symbols[addr] = s.name +f.close() + +# sys.exit(0) + +decoder = sigrokdecode.get_decoder("arm_itm")() + +decoder.reset() +decoder.options = {"objdump": "", "elffile": ""} +decoder.start() + +dwt_timestamp = 0 +last_dwt_timestamp = 0 +streak = 0 +print("[") + +stack = [] + + +def emit(ts, addr, channel): + s = None + if addr in symbols: + s = symbols[addr] + else: + s = hex(addr) + if addr < 0x6000_0000: + s = "R:" + s + else: + s = "F:" + s + if channel[0] == "3": + stack.append(addr) + else: + if not stack or stack[-1] != addr: + return + stack.pop() + event = { + "name": s, + "ph": "B" if channel[0] == "3" else "E", + "ts": ts, + "pid": 0, + "tid": 0, + } + print(json.dumps(event), ",") + + +def decoder_cb(ss, es, data): + global streak + global last_dwt_timestamp + # print(ss, es, data) + ptype = data[0] + ts = (dwt_timestamp + (streak * 32)) / 500 + if ptype == 0: + event = {"name": data[1][0], "ph": "i", "ts": ts, "pid": 0, "tid": 0, "s": "g"} + print(json.dumps(event), ",") + if data[1][0] == "Overflow": + while stack: + emit(ts, stack[-1], "4:") + + if ptype in (0, 1): + return + if ptype == 2 and (data[1][0].startswith("3:") or data[1][0].startswith("4:")): + channel, addr = data[1][0].split() + addr = int(addr[2:], 16) + # if addr & 0x1 != 0: + # addr -= 1 + # print(dwt_timestamp + streak, channel, symbols[addr], hex(addr)) + emit(ts, addr, channel) + else: + # print(dwt_timestamp + streak, data) + pass + if dwt_timestamp == last_dwt_timestamp: + streak += 1 + else: + streak = 0 + + if last_dwt_timestamp > dwt_timestamp: + raise RuntimeError() + last_dwt_timestamp = dwt_timestamp + + +decoder.add_callback(sigrokdecode.OUTPUT_ANN, None, decoder_cb) + +s = serial.Serial(sys.argv[-2], 1000000) + + +buffers = [] +while True: + try: + start_ts = time.monotonic_ns() + b = s.read(s.in_waiting) + if b: + end_ts = time.monotonic_ns() + buffers.append((start_ts, end_ts, b)) + # print(len(b)) + # if len(buffers) > 10: + # break + except KeyboardInterrupt: + break + +time_per_bit = 1_000_000_000 / 1000000 + +min_gap = 100000000 +total_bytes = 0 +for start_ts, end_ts, buf in buffers: + # print(total_bytes, start_ts, end_ts, buf) + ts_per_byte = (end_ts - start_ts) / len(buf) + for i, b in enumerate(buf): + # print(total_bytes, hex(b)) + total_bytes += 1 + decoder.decode( + start_ts + ts_per_byte * i, start_ts + ts_per_byte * (i + 1), ("DATA", None, (b,)) + ) + dwt_timestamp = decoder.dwt_timestamp diff --git a/tools/swo_viewer.py b/tools/swo_viewer.py new file mode 100644 index 0000000000..327c450023 --- /dev/null +++ b/tools/swo_viewer.py @@ -0,0 +1,67 @@ +"""This prints out all parsed ITM packets. + +Connect a USB to Serial converter to the SWO pin and then provide the serial +device to this script. It should be 1MBaud SWO signal. CTRL-C when you've +captured enough data and then it'll process and output. + +pip install pysigrok-libsigrokdecode +python tools/swo_viewer.py /dev/ttyACM0 +""" + +import serial +import sys +import sigrokdecode +import time +import json + +decoder = sigrokdecode.get_decoder("arm_itm")() + +decoder.reset() +decoder.options = {"objdump": "", "elffile": ""} +decoder.start() + +dwt_timestamp = 0 +last_dwt_timestamp = 0 +streak = 0 + +stack = [] + + +def decoder_cb(ss, es, data): + global streak + global last_dwt_timestamp + print(dwt_timestamp, ss, es, data) + + +decoder.add_callback(sigrokdecode.OUTPUT_ANN, None, decoder_cb) + +s = serial.Serial(sys.argv[-2], 1000000) + +buffers = [] +while True: + try: + start_ts = time.monotonic_ns() + b = s.read(s.in_waiting) + if b: + end_ts = time.monotonic_ns() + buffers.append((start_ts, end_ts, b)) + # print(len(b)) + # if len(buffers) > 10: + # break + except KeyboardInterrupt: + break + +time_per_bit = 1_000_000_000 / 1000000 + +min_gap = 100000000 +total_bytes = 0 +for start_ts, end_ts, buf in buffers: + # print(total_bytes, start_ts, end_ts, buf) + ts_per_byte = (end_ts - start_ts) / len(buf) + for i, b in enumerate(buf): + # print(total_bytes, hex(b)) + total_bytes += 1 + decoder.decode( + start_ts + ts_per_byte * i, start_ts + ts_per_byte * (i + 1), ("DATA", None, (b,)) + ) + dwt_timestamp = decoder.dwt_timestamp