diff --git a/src/cmake/rp2_common.cmake b/src/cmake/rp2_common.cmake index 273839f70..0753ef2cf 100644 --- a/src/cmake/rp2_common.cmake +++ b/src/cmake/rp2_common.cmake @@ -3,7 +3,7 @@ include(cmake/on_device.cmake) # PICO_CMAKE_CONFIG: PICO_NO_FLASH, Option to default all binaries to not use flash i.e. run from SRAM, type=bool, default=0, group=build, docref=cmake-binary-type-config -option(PICO_NO_FLASH "Default binaries to not not use flash") +option(PICO_NO_FLASH "Default binaries to not use flash") # PICO_CMAKE_CONFIG: PICO_COPY_TO_RAM, Option to default all binaries to copy code from flash to SRAM before running, type=bool, default=0, group=build, docref=cmake-binary-type-config option(PICO_COPY_TO_RAM "Default binaries to copy code to RAM when booting from flash") diff --git a/src/rp2040/boot_stage2/BUILD.bazel b/src/rp2040/boot_stage2/BUILD.bazel index 56ed5f3c3..3a4f318ef 100644 --- a/src/rp2040/boot_stage2/BUILD.bazel +++ b/src/rp2040/boot_stage2/BUILD.bazel @@ -35,6 +35,7 @@ BOOT2_CHOICE_DEFINE_MAP["compile_time_choice"] = [] cc_library( name = "config", hdrs = [ + "asminclude/embedded_xip_cache_enable_block.inc.S", "asminclude/boot2_helpers/exit_from_boot2.S", "asminclude/boot2_helpers/read_flash_sreg.S", "asminclude/boot2_helpers/wait_ssi_ready.S", diff --git a/src/rp2040/boot_stage2/asminclude/embedded_xip_cache_enable_block.inc.S b/src/rp2040/boot_stage2/asminclude/embedded_xip_cache_enable_block.inc.S new file mode 100644 index 000000000..b73dc3f02 --- /dev/null +++ b/src/rp2040/boot_stage2/asminclude/embedded_xip_cache_enable_block.inc.S @@ -0,0 +1,16 @@ +#include "hardware/regs/addressmap.h" +#include "hardware/regs/xip.h" + +#if defined(PICO_USE_XIP_CACHE_AS_RAM) && PICO_USE_XIP_CACHE_AS_RAM + // Disable the XIP cache making its 16k of XIP RAM available for storage +_b2_disable_xip_cache: + ldr r0, =(REG_ALIAS_CLR_BITS + XIP_CTRL_BASE + XIP_CTRL_OFFSET) + movs r1, #XIP_CTRL_EN_BITS + str r1, [r0] +#else + // Enable the XIP cache (hardware default) +_b2_enable_xip_cache: + ldr r0, =(REG_ALIAS_SET_BITS + XIP_CTRL_BASE + XIP_CTRL_OFFSET) + movs r1, #XIP_CTRL_EN_BITS + str r1, [r0] +#endif diff --git a/src/rp2040/boot_stage2/boot2_at25sf128a.S b/src/rp2040/boot_stage2/boot2_at25sf128a.S index 72f751ed9..689c43850 100644 --- a/src/rp2040/boot_stage2/boot2_at25sf128a.S +++ b/src/rp2040/boot_stage2/boot2_at25sf128a.S @@ -266,6 +266,8 @@ configure_ssi: // Bus accesses to the XIP window will now be transparently serviced by the // external flash on cache miss. We are ready to run code from flash. +#include "embedded_xip_cache_enable_block.inc.S" + // Pull in standard exit routine #include "boot2_helpers/exit_from_boot2.S" diff --git a/src/rp2040/boot_stage2/boot2_generic_03h.S b/src/rp2040/boot_stage2/boot2_generic_03h.S index effef930b..d60310a97 100644 --- a/src/rp2040/boot_stage2/boot2_generic_03h.S +++ b/src/rp2040/boot_stage2/boot2_generic_03h.S @@ -96,6 +96,8 @@ regular_func _stage2_boot // translated by the SSI into 03h read commands to the external flash (if cache is missed), // and the data will be returned to the bus. +#include "embedded_xip_cache_enable_block.inc.S" + // Pull in standard exit routine #include "boot2_helpers/exit_from_boot2.S" diff --git a/src/rp2040/boot_stage2/boot2_is25lp080.S b/src/rp2040/boot_stage2/boot2_is25lp080.S index fda0f992f..eeefafa9b 100644 --- a/src/rp2040/boot_stage2/boot2_is25lp080.S +++ b/src/rp2040/boot_stage2/boot2_is25lp080.S @@ -248,6 +248,8 @@ configure_ssi: // We are now in XIP mode, with all transactions using Dual I/O and only // needing to send 24-bit addresses (plus mode bits) for each read transaction. +#include "embedded_xip_cache_enable_block.inc.S" + // Pull in standard exit routine #include "boot2_helpers/exit_from_boot2.S" diff --git a/src/rp2040/boot_stage2/boot2_w25q080.S b/src/rp2040/boot_stage2/boot2_w25q080.S index c35fb81fa..c773030af 100644 --- a/src/rp2040/boot_stage2/boot2_w25q080.S +++ b/src/rp2040/boot_stage2/boot2_w25q080.S @@ -268,6 +268,8 @@ configure_ssi: // Bus accesses to the XIP window will now be transparently serviced by the // external flash on cache miss. We are ready to run code from flash. +#include "embedded_xip_cache_enable_block.inc.S" + // Pull in standard exit routine #include "boot2_helpers/exit_from_boot2.S" diff --git a/src/rp2040/boot_stage2/boot2_w25x10cl.S b/src/rp2040/boot_stage2/boot2_w25x10cl.S index b0e6a10fc..bf86c8b54 100644 --- a/src/rp2040/boot_stage2/boot2_w25x10cl.S +++ b/src/rp2040/boot_stage2/boot2_w25x10cl.S @@ -187,6 +187,8 @@ regular_func _stage2_boot // We are now in XIP mode, with all transactions using Dual I/O and only // needing to send 24-bit addresses (plus mode bits) for each read transaction. +#include "embedded_xip_cache_enable_block.inc.S" + // Pull in standard exit routine #include "boot2_helpers/exit_from_boot2.S" diff --git a/src/rp2_common/pico_crt0/BUILD.bazel b/src/rp2_common/pico_crt0/BUILD.bazel index e9d5fa838..3c556cb18 100644 --- a/src/rp2_common/pico_crt0/BUILD.bazel +++ b/src/rp2_common/pico_crt0/BUILD.bazel @@ -28,6 +28,14 @@ alias( }), ) +alias( + name = "copy_to_ram_using_xip_ram_linker_script", + actual = select({ + "//bazel/constraint:rp2040": "//src/rp2_common/pico_crt0/rp2040:copy_to_ram_using_xip_ram_linker_script", + "//conditions:default": "//bazel:incompatible_cc_lib", + }), +) + alias( name = "no_flash_linker_script", actual = select({ @@ -37,6 +45,14 @@ alias( }), ) +alias( + name = "no_flash_using_xip_ram_linker_script", + actual = select({ + "//bazel/constraint:rp2040": "//src/rp2_common/pico_crt0/rp2040:no_flash_using_xip_ram_linker_script", + "//conditions:default": "//bazel:incompatible_cc_lib", + }), +) + cc_library( name = "no_warn_rwx_flag", linkopts = select({ diff --git a/src/rp2_common/pico_crt0/crt0.S b/src/rp2_common/pico_crt0/crt0.S index ea3b99a5a..4d21aa8e5 100644 --- a/src/rp2_common/pico_crt0/crt0.S +++ b/src/rp2_common/pico_crt0/crt0.S @@ -11,6 +11,7 @@ #include "hardware/regs/addressmap.h" #include "hardware/regs/sio.h" +#include "hardware/regs/xip.h" #include "pico/binary_info/defs.h" #include "boot/picobin.h" #include "pico/bootrom.h" @@ -457,6 +458,22 @@ hold_non_core0_in_bootrom: b _enter_vtable_in_r0 1: +#if PICO_RP2040 && PICO_NO_FLASH + #if PICO_USE_XIP_CACHE_AS_RAM + // Disable the XIP cache making its 16k of XIP RAM available for storage +_disable_xip_cache: + ldr r0, =(REG_ALIAS_CLR_BITS + XIP_CTRL_BASE + XIP_CTRL_OFFSET) + movs r1, #XIP_CTRL_EN_BITS + str r1, [r0] + #else + // Enable the XIP cache (hardware default) +_enable_xip_cache: + ldr r0, =(REG_ALIAS_SET_BITS + XIP_CTRL_BASE + XIP_CTRL_OFFSET) + movs r1, #XIP_CTRL_EN_BITS + str r1, [r0] + #endif +#endif + #if !PICO_RP2040 && PICO_EMBED_XIP_SETUP && !PICO_NO_FLASH // Execute boot2 on the core 0 stack (it also gets copied into BOOTRAM due // to inclusion in the data copy table below). Note the reference @@ -552,6 +569,12 @@ data_cpy_table: .word __data_start__ .word __data_end__ +#if PICO_RP2040 && PICO_USE_XIP_CACHE_AS_RAM +.word __xip_ram_text_source__ +.word __xip_ram_text_start__ +.word __xip_ram_text_end__ +#endif + .word __scratch_x_source__ .word __scratch_x_start__ .word __scratch_x_end__ diff --git a/src/rp2_common/pico_crt0/rp2040/BUILD.bazel b/src/rp2_common/pico_crt0/rp2040/BUILD.bazel index dcf727cc5..d65800530 100644 --- a/src/rp2_common/pico_crt0/rp2040/BUILD.bazel +++ b/src/rp2_common/pico_crt0/rp2040/BUILD.bazel @@ -4,8 +4,10 @@ exports_files( [ "memmap_blocked_ram.ld", "memmap_copy_to_ram.ld", + "memmap_copy_to_ram_using_xip_ram.ld", "memmap_default.ld", "memmap_no_flash.ld", + "memmap_no_flash_using_xip_ram.ld", ] ) @@ -45,7 +47,7 @@ cc_library( # PICO_BUILD_DEFINE: PICO_COPY_TO_RAM, whether this is a 'copy_to_ram' build, type=bool, default=0, but dependent on CMake options, group=pico_standard_link cc_library( name = "copy_to_ram_linker_script", - defines = ["PICO_COPY_TO_RAM=1"], + defines = ["PICO_COPY_TO_RAM=1", "PICO_USE_XIP_CACHE_AS_RAM=0"], linkopts = ["-T$(location memmap_copy_to_ram.ld)"], target_compatible_with = ["//bazel/constraint:rp2040"], deps = [ @@ -55,10 +57,23 @@ cc_library( ], ) +# PICO_BUILD_DEFINE: PICO_COPY_TO_RAM && PICO_USE_XIP_CACHE_AS_RAM, whether this is a 'copy_to_ram_using_xip_ram' build, type=bool, default=0, but dependent on CMake options, group=pico_standard_link +cc_library( + name = "copy_to_ram_using_xip_ram_linker_script", + defines = ["PICO_COPY_TO_RAM=1", "PICO_USE_XIP_CACHE_AS_RAM=1"], + linkopts = ["-T$(location memmap_copy_to_ram_using_xip_ram.ld)"], + target_compatible_with = ["//bazel/constraint:rp2040"], + deps = [ + "memmap_copy_to_ram_using_xip_ram.ld", + "//src/rp2_common/pico_crt0:no_warn_rwx_flag", + "//src/rp2_common/pico_standard_link:default_flash_region", + ], +) + # PICO_BUILD_DEFINE: PICO_NO_FLASH, whether this is a 'no_flash' build, type=bool, default=0, but dependent on CMake options, group=pico_standard_link cc_library( name = "no_flash_linker_script", - defines = ["PICO_NO_FLASH=1"], + defines = ["PICO_NO_FLASH=1", "PICO_USE_XIP_CACHE_AS_RAM=0"], linkopts = ["-T$(location memmap_no_flash.ld)"], target_compatible_with = ["//bazel/constraint:rp2040"], deps = [ @@ -66,3 +81,15 @@ cc_library( "//src/rp2_common/pico_crt0:no_warn_rwx_flag", ], ) + +# PICO_BUILD_DEFINE: PICO_NO_FLASH && PICO_USE_XIP_CACHE_AS_RAM, whether this is a 'no_flash_using_xip_ram' build, type=bool, default=0, but dependent on CMake options, group=pico_standard_link +cc_library( + name = "no_flash_using_xip_ram_linker_script", + defines = ["PICO_NO_FLASH=1", "PICO_USE_XIP_CACHE_AS_RAM=1"], + linkopts = ["-T$(location memmap_no_flash_using_xip_ram.ld)"], + target_compatible_with = ["//bazel/constraint:rp2040"], + deps = [ + "memmap_no_flash_using_xip_ram.ld", + "//src/rp2_common/pico_crt0:no_warn_rwx_flag", + ], +) diff --git a/src/rp2_common/pico_crt0/rp2040/memmap_copy_to_ram.ld b/src/rp2_common/pico_crt0/rp2040/memmap_copy_to_ram.ld index 842ebfd3c..4071d1234 100644 --- a/src/rp2_common/pico_crt0/rp2040/memmap_copy_to_ram.ld +++ b/src/rp2_common/pico_crt0/rp2040/memmap_copy_to_ram.ld @@ -66,7 +66,8 @@ SECTIONS KEEP (*(.embedded_block)) __embedded_block_end = .; KEEP (*(.reset)) - } + . = ALIGN(4); + } > FLASH .rodata : { /* segments not marked as .flashdata are instead pulled into .data (in RAM) to avoid accidental flash accesses */ diff --git a/src/rp2_common/pico_crt0/rp2040/memmap_copy_to_ram_using_xip_ram.ld b/src/rp2_common/pico_crt0/rp2040/memmap_copy_to_ram_using_xip_ram.ld new file mode 100644 index 000000000..54c51241e --- /dev/null +++ b/src/rp2_common/pico_crt0/rp2040/memmap_copy_to_ram_using_xip_ram.ld @@ -0,0 +1,301 @@ +/* Based on GCC ARM embedded samples. + Defines the following symbols for use by code: + __exidx_start + __exidx_end + __etext + __data_start__ + __preinit_array_start + __preinit_array_end + __init_array_start + __init_array_end + __fini_array_start + __fini_array_end + __data_end__ + __bss_start__ + __bss_end__ + __end__ + end + __HeapLimit + __StackLimit + __StackTop + __stack (== StackTop) +*/ + +MEMORY +{ + INCLUDE "pico_flash_region.ld" + XIP_RAM(rwx) : ORIGIN = 0x15000000, LENGTH = 16k + RAM(rwx) : ORIGIN = 0x20000000, LENGTH = 256k + SCRATCH_X(rwx) : ORIGIN = 0x20040000, LENGTH = 4k + SCRATCH_Y(rwx) : ORIGIN = 0x20041000, LENGTH = 4k +} + +ENTRY(_entry_point) + +SECTIONS +{ + /* Second stage bootloader is prepended to the image. It must be 256 bytes big + and checksummed. It is usually built by the boot_stage2 target + in the Raspberry Pi Pico SDK + */ + + .flash_begin : { + __flash_binary_start = .; + } > FLASH + + .boot2 : { + __boot2_start__ = .; + KEEP (*(.boot2)) + __boot2_end__ = .; + } > FLASH + + ASSERT(__boot2_end__ - __boot2_start__ == 256, + "ERROR: Pico second stage bootloader must be 256 bytes in size") + + /* The second stage will always enter the image at the start of .text. + The debugger will use the ELF entry point, which is the _entry_point + symbol if present, otherwise defaults to start of .text. + This can be used to transfer control back to the bootrom on debugger + launches only, to perform proper flash setup. + */ + + .flashtext : { + __logical_binary_start = .; + KEEP (*(.vectors)) + KEEP (*(.binary_info_header)) + __binary_info_header_end = .; + KEEP (*(.embedded_block)) + __embedded_block_end = .; + KEEP (*(.reset)) + . = ALIGN(4); + } > FLASH + + .rodata : { + /* segments not marked as .flashdata are instead pulled into .data (in RAM) to avoid accidental flash accesses */ + *(SORT_BY_ALIGNMENT(SORT_BY_NAME(.flashdata*))) + . = ALIGN(4); + } > FLASH + + .ARM.extab : + { + *(.ARM.extab* .gnu.linkonce.armextab.*) + } > FLASH + + __exidx_start = .; + .ARM.exidx : + { + *(.ARM.exidx* .gnu.linkonce.armexidx.*) + } > FLASH + __exidx_end = .; + + /* Machine inspectable binary information */ + . = ALIGN(4); + __binary_info_start = .; + .binary_info : + { + KEEP(*(.binary_info.keep.*)) + *(.binary_info.*) + } > FLASH + __binary_info_end = .; + . = ALIGN(4); + + .xip_text : { + __xip_ram_text_start__ = .; + . = ALIGN(4); + + *(.time_critical.text*) + + . = ALIGN(4); + __xip_ram_text_end__ = .; + } > XIP_RAM AT> FLASH + __xip_ram_text_source__ = LOADADDR(.xip_text); + . = ALIGN(4); + + /* Vector table goes first in RAM, to avoid large alignment hole */ + .ram_vector_table (NOLOAD): { + *(.ram_vector_table) + } > RAM + + .uninitialized_data (NOLOAD): { + . = ALIGN(4); + *(.uninitialized_data*) + } > RAM + + .text : { + __ram_text_start__ = .; + *(.init) + *(.text*) + *(.fini) + /* Pull all c'tors into .text */ + *crtbegin.o(.ctors) + *crtbegin?.o(.ctors) + *(EXCLUDE_FILE(*crtend?.o *crtend.o) .ctors) + *(SORT(.ctors.*)) + *(.ctors) + /* Followed by destructors */ + *crtbegin.o(.dtors) + *crtbegin?.o(.dtors) + *(EXCLUDE_FILE(*crtend?.o *crtend.o) .dtors) + *(SORT(.dtors.*)) + *(.dtors) + + *(.eh_frame*) + . = ALIGN(4); + __ram_text_end__ = .; + } > RAM AT> FLASH + __ram_text_source__ = LOADADDR(.text); + . = ALIGN(4); + + .data : { + __data_start__ = .; + *(vtable) + + *(.time_critical*) + + . = ALIGN(4); + *(.rodata*) + . = ALIGN(4); + + *(.data*) + + . = ALIGN(4); + *(.after_data.*) + . = ALIGN(4); + /* preinit data */ + PROVIDE_HIDDEN (__mutex_array_start = .); + KEEP(*(SORT(.mutex_array.*))) + KEEP(*(.mutex_array)) + PROVIDE_HIDDEN (__mutex_array_end = .); + + . = ALIGN(4); + /* preinit data */ + PROVIDE_HIDDEN (__preinit_array_start = .); + KEEP(*(SORT(.preinit_array.*))) + KEEP(*(.preinit_array)) + PROVIDE_HIDDEN (__preinit_array_end = .); + + . = ALIGN(4); + /* init data */ + PROVIDE_HIDDEN (__init_array_start = .); + KEEP(*(SORT(.init_array.*))) + KEEP(*(.init_array)) + PROVIDE_HIDDEN (__init_array_end = .); + + . = ALIGN(4); + /* finit data */ + PROVIDE_HIDDEN (__fini_array_start = .); + *(SORT(.fini_array.*)) + *(.fini_array) + PROVIDE_HIDDEN (__fini_array_end = .); + + *(.jcr) + . = ALIGN(4); + } > RAM AT> FLASH + + .tdata : { + . = ALIGN(4); + *(.tdata .tdata.* .gnu.linkonce.td.*) + /* All data end */ + __tdata_end = .; + } > RAM AT> FLASH + PROVIDE(__data_end__ = .); + + /* __etext is (for backwards compatibility) the name of the .data init source pointer (...) */ + __etext = LOADADDR(.data); + + .tbss (NOLOAD) : { + . = ALIGN(4); + __bss_start__ = .; + __tls_base = .; + *(.tbss .tbss.* .gnu.linkonce.tb.*) + *(.tcommon) + + __tls_end = .; + } > RAM + + .bss : { + . = ALIGN(4); + __tbss_end = .; + + *(SORT_BY_ALIGNMENT(SORT_BY_NAME(.bss*))) + *(COMMON) + . = ALIGN(4); + __bss_end__ = .; + } > RAM + + .heap (NOLOAD): + { + __end__ = .; + end = __end__; + KEEP(*(.heap*)) + } > RAM + /* historically on GCC sbrk was growing past __HeapLimit to __StackLimit, however + to be more compatible, we now set __HeapLimit explicitly to where the end of the heap is */ + __HeapLimit = ORIGIN(RAM) + LENGTH(RAM); + + /* Start and end symbols must be word-aligned */ + .scratch_x : { + __scratch_x_start__ = .; + *(.scratch_x.*) + . = ALIGN(4); + __scratch_x_end__ = .; + } > SCRATCH_X AT > FLASH + __scratch_x_source__ = LOADADDR(.scratch_x); + + .scratch_y : { + __scratch_y_start__ = .; + *(.scratch_y.*) + . = ALIGN(4); + __scratch_y_end__ = .; + } > SCRATCH_Y AT > FLASH + __scratch_y_source__ = LOADADDR(.scratch_y); + + /* .stack*_dummy section doesn't contains any symbols. It is only + * used for linker to calculate size of stack sections, and assign + * values to stack symbols later + * + * stack1 section may be empty/missing if platform_launch_core1 is not used */ + + /* by default we put core 0 stack at the end of scratch Y, so that if core 1 + * stack is not used then all of SCRATCH_X is free. + */ + .stack1_dummy (NOLOAD): + { + *(.stack1*) + } > SCRATCH_X + .stack_dummy (NOLOAD): + { + KEEP(*(.stack*)) + } > SCRATCH_Y + + .flash_end : { + KEEP(*(.embedded_end_block*)) + PROVIDE(__flash_binary_end = .); + } > FLASH + + /* stack limit is poorly named, but historically is maximum heap ptr */ + __StackLimit = ORIGIN(RAM) + LENGTH(RAM); + __StackOneTop = ORIGIN(SCRATCH_X) + LENGTH(SCRATCH_X); + __StackTop = ORIGIN(SCRATCH_Y) + LENGTH(SCRATCH_Y); + __StackOneBottom = __StackOneTop - SIZEOF(.stack1_dummy); + __StackBottom = __StackTop - SIZEOF(.stack_dummy); + PROVIDE(__stack = __StackTop); + + /* picolibc and LLVM */ + PROVIDE (__heap_start = __end__); + PROVIDE (__heap_end = __HeapLimit); + PROVIDE( __tls_align = MAX(ALIGNOF(.tdata), ALIGNOF(.tbss)) ); + PROVIDE( __tls_size_align = (__tls_size + __tls_align - 1) & ~(__tls_align - 1)); + PROVIDE( __arm32_tls_tcb_offset = MAX(8, __tls_align) ); + + /* llvm-libc */ + PROVIDE (_end = __end__); + PROVIDE (__llvm_libc_heap_limit = __HeapLimit); + + /* Check if data + heap + stack exceeds RAM limit */ + ASSERT(__StackLimit >= __HeapLimit, "region RAM overflowed") + + ASSERT( __binary_info_header_end - __logical_binary_start <= 256, "Binary info must be in first 256 bytes of the binary") + /* todo assert on extra code */ +} + diff --git a/src/rp2_common/pico_crt0/rp2040/memmap_no_flash_using_xip_ram.ld b/src/rp2_common/pico_crt0/rp2040/memmap_no_flash_using_xip_ram.ld new file mode 100644 index 000000000..ab9a0be3e --- /dev/null +++ b/src/rp2_common/pico_crt0/rp2040/memmap_no_flash_using_xip_ram.ld @@ -0,0 +1,262 @@ +/* Based on GCC ARM embedded samples. + Defines the following symbols for use by code: + __exidx_start + __exidx_end + __etext + __data_start__ + __preinit_array_start + __preinit_array_end + __init_array_start + __init_array_end + __fini_array_start + __fini_array_end + __data_end__ + __bss_start__ + __bss_end__ + __end__ + end + __HeapLimit + __StackLimit + __StackTop + __stack (== StackTop) +*/ + +MEMORY +{ + XIP_RAM(rwx) : ORIGIN = 0x15000000, LENGTH = 16k + RAM(rwx) : ORIGIN = 0x20000000, LENGTH = 256k + SCRATCH_X(rwx) : ORIGIN = 0x20040000, LENGTH = 4k + SCRATCH_Y(rwx) : ORIGIN = 0x20041000, LENGTH = 4k +} + +ENTRY(_entry_point) + +SECTIONS +{ + /* Note in NO_FLASH builds the entry point for both the bootrom, and debugger + entry (ELF entry point), are *first* in the image, and the vector table + follows immediately afterward. This is because the bootrom enters RAM + binaries directly at their lowest address (preferring main RAM over XIP + cache-as-SRAM if both are used). + */ + + .xip_text : { + __xip_ram_text_start__ = .; + . = ALIGN(4); + + *(.time_critical.text*) + + . = ALIGN(4); + __xip_ram_text_end__ = .; + } > XIP_RAM + __xip_ram_text_source__ = LOADADDR(.xip_text); + . = ALIGN(4); + + .text : { + __logical_binary_start = .; + __reset_start = .; + KEEP (*(.reset)) + __reset_end = .; + KEEP (*(.binary_info_header)) + __binary_info_header_end = .; + KEEP (*(.embedded_block)) + __embedded_block_end = .; + . = ALIGN(256); + KEEP (*(.vectors)) + *(.time_critical*) + *(.text*) + . = ALIGN(4); + *(.init) + *(.fini) + /* Pull all c'tors into .text */ + *crtbegin.o(.ctors) + *crtbegin?.o(.ctors) + *(EXCLUDE_FILE(*crtend?.o *crtend.o) .ctors) + *(SORT(.ctors.*)) + *(.ctors) + /* Followed by destructors */ + *crtbegin.o(.dtors) + *crtbegin?.o(.dtors) + *(EXCLUDE_FILE(*crtend?.o *crtend.o) .dtors) + *(SORT(.dtors.*)) + *(.dtors) + + *(.eh_frame*) + } > RAM + + .rodata : { + . = ALIGN(4); + *(.rodata*) + . = ALIGN(4); + *(SORT_BY_ALIGNMENT(SORT_BY_NAME(.flashdata*))) + . = ALIGN(4); + } > RAM + + .ARM.extab : + { + *(.ARM.extab* .gnu.linkonce.armextab.*) + } > RAM + + __exidx_start = .; + .ARM.exidx : + { + *(.ARM.exidx* .gnu.linkonce.armexidx.*) + } > RAM + __exidx_end = .; + + /* Machine inspectable binary information */ + . = ALIGN(4); + __binary_info_start = .; + .binary_info : + { + KEEP(*(.binary_info.keep.*)) + *(.binary_info.*) + } > RAM + __binary_info_end = .; + . = ALIGN(4); + + .data : { + __data_start__ = .; + *(vtable) + *(.data*) + + . = ALIGN(4); + *(.after_data.*) + . = ALIGN(4); + /* preinit data */ + PROVIDE_HIDDEN (__mutex_array_start = .); + KEEP(*(SORT(.mutex_array.*))) + KEEP(*(.mutex_array)) + PROVIDE_HIDDEN (__mutex_array_end = .); + + . = ALIGN(4); + /* preinit data */ + PROVIDE_HIDDEN (__preinit_array_start = .); + KEEP(*(SORT(.preinit_array.*))) + KEEP(*(.preinit_array)) + PROVIDE_HIDDEN (__preinit_array_end = .); + + . = ALIGN(4); + /* init data */ + PROVIDE_HIDDEN (__init_array_start = .); + KEEP(*(SORT(.init_array.*))) + KEEP(*(.init_array)) + PROVIDE_HIDDEN (__init_array_end = .); + + . = ALIGN(4); + /* finit data */ + PROVIDE_HIDDEN (__fini_array_start = .); + *(SORT(.fini_array.*)) + *(.fini_array) + PROVIDE_HIDDEN (__fini_array_end = .); + + *(.jcr) + . = ALIGN(4); + } > RAM + + .tdata : { + . = ALIGN(4); + *(.tdata .tdata.* .gnu.linkonce.td.*) + /* All data end */ + __tdata_end = .; + } > RAM + PROVIDE(__data_end__ = .); + + .uninitialized_data (NOLOAD): { + . = ALIGN(4); + *(.uninitialized_data*) + } > RAM + /* __etext is (for backwards compatibility) the name of the .data init source pointer (...) */ + __etext = LOADADDR(.data); + + .tbss (NOLOAD) : { + . = ALIGN(4); + __bss_start__ = .; + __tls_base = .; + *(.tbss .tbss.* .gnu.linkonce.tb.*) + *(.tcommon) + + __tls_end = .; + } > RAM + + .bss (NOLOAD) : { + . = ALIGN(4); + __tbss_end = .; + + *(SORT_BY_ALIGNMENT(SORT_BY_NAME(.bss*))) + *(COMMON) + . = ALIGN(4); + __bss_end__ = .; + } > RAM + + .heap (NOLOAD): + { + __end__ = .; + end = __end__; + KEEP(*(.heap*)) + } > RAM + /* historically on GCC sbrk was growing past __HeapLimit to __StackLimit, however + to be more compatible, we now set __HeapLimit explicitly to where the end of the heap is */ + __HeapLimit = ORIGIN(RAM) + LENGTH(RAM); + + /* Start and end symbols must be word-aligned */ + .scratch_x : { + __scratch_x_start__ = .; + *(.scratch_x.*) + . = ALIGN(4); + __scratch_x_end__ = .; + } > SCRATCH_X + __scratch_x_source__ = LOADADDR(.scratch_x); + + .scratch_y : { + __scratch_y_start__ = .; + *(.scratch_y.*) + . = ALIGN(4); + __scratch_y_end__ = .; + } > SCRATCH_Y + __scratch_y_source__ = LOADADDR(.scratch_y); + + /* .stack*_dummy section doesn't contains any symbols. It is only + * used for linker to calculate size of stack sections, and assign + * values to stack symbols later + * + * stack1 section may be empty/missing if platform_launch_core1 is not used */ + + /* by default we put core 0 stack at the end of scratch Y, so that if core 1 + * stack is not used then all of SCRATCH_X is free. + */ + .stack1_dummy (NOLOAD): + { + *(.stack1*) + } > SCRATCH_X + .stack_dummy (NOLOAD): + { + KEEP(*(.stack*)) + } > SCRATCH_Y + + /* stack limit is poorly named, but historically is maximum heap ptr */ + __StackLimit = ORIGIN(RAM) + LENGTH(RAM); + __StackOneTop = ORIGIN(SCRATCH_X) + LENGTH(SCRATCH_X); + __StackTop = ORIGIN(SCRATCH_Y) + LENGTH(SCRATCH_Y); + __StackOneBottom = __StackOneTop - SIZEOF(.stack1_dummy); + __StackBottom = __StackTop - SIZEOF(.stack_dummy); + PROVIDE(__stack = __StackTop); + + /* picolibc and LLVM */ + PROVIDE (__heap_start = __end__); + PROVIDE (__heap_end = __HeapLimit); + PROVIDE( __tls_align = MAX(ALIGNOF(.tdata), ALIGNOF(.tbss)) ); + PROVIDE( __tls_size_align = (__tls_size + __tls_align - 1) & ~(__tls_align - 1)); + PROVIDE( __arm32_tls_tcb_offset = MAX(8, __tls_align) ); + + /* llvm-libc */ + PROVIDE (_end = __end__); + PROVIDE (__llvm_libc_heap_limit = __HeapLimit); + + /* Check if data + heap + stack exceeds RAM limit */ + ASSERT(__StackLimit >= __HeapLimit, "region RAM overflowed") + + ASSERT( __binary_info_header_end - __logical_binary_start <= 256, "Binary info must be in first 256 bytes of the binary") + /* todo assert on extra code */ +} + diff --git a/src/rp2_common/pico_platform_sections/CMakeLists.txt b/src/rp2_common/pico_platform_sections/CMakeLists.txt index f0c36bd06..287cb5f0b 100644 --- a/src/rp2_common/pico_platform_sections/CMakeLists.txt +++ b/src/rp2_common/pico_platform_sections/CMakeLists.txt @@ -3,3 +3,61 @@ if (NOT TARGET pico_platform_sections) target_include_directories(pico_platform_sections_headers SYSTEM INTERFACE ${CMAKE_CURRENT_LIST_DIR}/include) endif() + +# pico_sections_time_critical(TARGET [SOURCES]) +# \brief\ Prefix target's object file sections with ".time_critical" +# +# This function will apply "objcopy --prefix-alloc-sections .time_critical" to all the object files of +# TARGET that match either an optionally specified list of source files or the target's "TARGET_OBJECTS" list. +# +# Examples: +# pico_sections_time_critical(MyTarget) +# +# pico_sections_time_critical(MyTarget +# some_time_critical_code.c +# other_time_critical_code.c +# ) +# +# \param\ TARGET The build target +# \param\ SOURCES Optional, source files of the object files to be modified. If not specified, uses the build +# target's "TARGET_OBJECTS" list. +function(pico_sections_time_critical TARGET) + add_custom_command( + TARGET ${TARGET} + PRE_LINK + COMMAND ${CMAKE_COMMAND} -E echo "execute_process($,REPLACE,/\./,/>,INCLUDE,$,$,${ARGN}>,REPLACE,^$/,>,EXCLUDE,^/|\.h$>,PREPEND,/$.dir/>,APPEND,.o$>,REPLACE,\\\.,\\\\.>,|>>,PREPEND,COMMAND ${CMAKE_OBJCOPY} --prefix-alloc-sections .time_critical >)" > ${TARGET}_sections_time_critical.cmake + COMMAND ${CMAKE_COMMAND} -P ${TARGET}_sections_time_critical.cmake + COMMAND ${CMAKE_COMMAND} -E echo "$,All,Selected> \"$\" object file alloc-section names have been updated for \"time_critical\" linker placement" + VERBATIM + COMMAND_EXPAND_LISTS + ) +endfunction() + +# pico_sections_not_in_flash(TARGET [SOURCES]) +# \brief\ Prefix target's object file sections with ".time_critical_ram" +# +# This function will apply "objcopy --prefix-alloc-sections .time_critical_ram" to all the object files of +# TARGET that match either an optionally specified list of source files or the target's "TARGET_OBJECTS" list. +# +# Examples: +# pico_sections_not_in_flash(MyTarget) +# +# pico_sections_not_in_flash(MyTarget +# some_code.c +# other_code.c +# ) +# +# \param\ TARGET The build target +# \param\ SOURCES Optional, source files of the object files to be modified. If not specified, uses the build +# target's "TARGET_OBJECTS" list. +function(pico_sections_not_in_flash TARGET) + add_custom_command( + TARGET ${TARGET} + PRE_LINK + COMMAND ${CMAKE_COMMAND} -E echo "execute_process($,REPLACE,/\./,/>,INCLUDE,$,$,${ARGN}>,REPLACE,^$/,>,EXCLUDE,^/|\.h$>,PREPEND,/$.dir/>,APPEND,.o$>,REPLACE,\\\.,\\\\.>,|>>,PREPEND,COMMAND ${CMAKE_OBJCOPY} --prefix-alloc-sections .time_critical_ram >)" > ${TARGET}_sections_not_in_flash.cmake + COMMAND ${CMAKE_COMMAND} -P ${TARGET}_sections_not_in_flash.cmake + COMMAND ${CMAKE_COMMAND} -E echo "$,All,Selected> \"$\" object file section names have been updated for \"not_in_flash\" linker placement" + VERBATIM + COMMAND_EXPAND_LISTS + ) +endfunction() diff --git a/src/rp2_common/pico_platform_sections/include/pico/platform/sections.h b/src/rp2_common/pico_platform_sections/include/pico/platform/sections.h index e85700295..2820ea1b5 100644 --- a/src/rp2_common/pico_platform_sections/include/pico/platform/sections.h +++ b/src/rp2_common/pico_platform_sections/include/pico/platform/sections.h @@ -136,21 +136,21 @@ * \ingroup pico_platform * * Decorates a function name, such that the function will execute from RAM (assuming it is not inlined - * into a flash function by the compiler) to avoid possible flash latency. Currently this macro is identical - * in implementation to `__not_in_flash_func`, however the semantics are distinct and a `__time_critical_func` - * may in the future be treated more specially to reduce the overhead when calling such function from a flash - * function. + * into a flash function by the compiler) to avoid possible flash latency. The semantics of + * `__time_critical_func` are distinct from `__not_in_flash_func` in that its function may be run from + * its own dedicated block of instruction RAM when available, like when PICO_USE_XIP_CACHE_AS_RAM=1 is set + * for a PICO_COPY_TO_RAM=1 RP2040 build. * * For example a function called my_func taking an int parameter: * * void __time_critical_func(my_func)(int some_arg) { * - * The function is placed in the `.time_critical.` linker section + * The function is placed in the `.time_critical.text.` linker section * * \see __not_in_flash_func */ #ifndef __time_critical_func -#define __time_critical_func(func_name) __not_in_flash_func(func_name) +#define __time_critical_func(func_name) __attribute__((section(".time_critical.text." __STRING(func_name)))) func_name #endif /*! \brief Indicate a function should not be stored in flash and should not be inlined @@ -164,13 +164,37 @@ * void __no_inline_not_in_flash_func(my_func)(int some_arg) { * * The function is placed in the `.time_critical.` linker section + * + * \see __not_in_flash_func */ #ifndef __no_inline_not_in_flash_func #define __no_inline_not_in_flash_func(func_name) __noinline __not_in_flash_func(func_name) #endif +/*! \brief Indicate a function is time/latency critical, should not be stored in flash, and should not be inlined + * \ingroup pico_platform + * + * Decorates a function name, such that the function will execute from RAM, explicitly marking it as + * noinline to prevent it being inlined into a flash function by the compiler + * + * For example a function called my_func taking an int parameter: + * + * void __no_inline_time_critical_func(my_func)(int some_arg) { + * + * The function is placed in the `.time_critical.text.` linker section + * + * \see __time_critical_func +*/ +#ifndef __no_inline_time_critical_func +#define __no_inline_time_critical_func(func_name) __noinline __time_critical_func(func_name) +#endif + #else +#ifndef TIME_CRITICAL_SECTION_NAME +#define TIME_CRITICAL_SECTION_NAME(x) .time_critical.text.##x +#endif + #ifndef RAM_SECTION_NAME #define RAM_SECTION_NAME(x) .time_critical.##x #endif diff --git a/src/rp2_common/pico_standard_link/CMakeLists.txt b/src/rp2_common/pico_standard_link/CMakeLists.txt index c16968bba..31ac2d2e6 100644 --- a/src/rp2_common/pico_standard_link/CMakeLists.txt +++ b/src/rp2_common/pico_standard_link/CMakeLists.txt @@ -76,30 +76,75 @@ if (NOT TARGET pico_standard_link) # \param\ TYPE The binary type to set function(pico_set_binary_type TARGET TYPE) set_target_properties(${TARGET} PROPERTIES PICO_TARGET_BINARY_TYPE ${TYPE}) + if (TARGET boot_stage2_headers) + set_target_properties(bs2_default PROPERTIES PICO_TARGET_BINARY_TYPE ${TYPE}) + endif() endfunction() # slightly messy as we support both the preferred PICO_DEFAULT_BINARY_TYPE and the individual variables if (NOT PICO_DEFAULT_BINARY_TYPE) - if (PICO_NO_FLASH) - set(PICO_DEFAULT_BINARY_TYPE no_flash) - elseif (PICO_USE_BLOCKED_RAM) - set(PICO_DEFAULT_BINARY_TYPE blocked_ram) - elseif (PICO_COPY_TO_RAM) - set(PICO_DEFAULT_BINARY_TYPE copy_to_ram) + if (PICO_USE_XIP_CACHE_AS_RAM) + if (PICO_RP2040) + if (PICO_NO_FLASH) + set(PICO_DEFAULT_BINARY_TYPE no_flash_using_xip_ram) + elseif (PICO_COPY_TO_RAM) + set(PICO_DEFAULT_BINARY_TYPE copy_to_ram_using_xip_ram) + else() + message(FATAL_ERROR "PICO_USE_XIP_CACHE_AS_RAM requires setting either PICO_NO_FLASH or PICO_COPY_TO_RAM") + endif() + else() + message(FATAL_ERROR "Use of PICO_USE_XIP_CACHE_AS_RAM requires RP2040 hardware") + endif() else() - set(PICO_DEFAULT_BINARY_TYPE default) + if (PICO_NO_FLASH) + set(PICO_DEFAULT_BINARY_TYPE no_flash) + elseif (PICO_USE_BLOCKED_RAM) + set(PICO_DEFAULT_BINARY_TYPE blocked_ram) + elseif (PICO_COPY_TO_RAM) + set(PICO_DEFAULT_BINARY_TYPE copy_to_ram) + else() + set(PICO_DEFAULT_BINARY_TYPE default) + endif() endif() else() # we must set the individual variables here, as they are used in generator expressions, # but also for our checks below - if (PICO_DEFAULT_BINARY_TYPE STREQUAL no_flash) - set(PICO_NO_FLASH 1) - endif() - if (PICO_DEFAULT_BINARY_TYPE STREQUAL blocked_ram) - set(PICO_USE_BLOCKED_RAM 1) - endif() - if (PICO_DEFAULT_BINARY_TYPE STREQUAL copy_to_ram) - set(PICO_COPY_TO_RAM 1) + if (PICO_USE_XIP_CACHE_AS_RAM) + if (PICO_RP2040) + if (PICO_DEFAULT_BINARY_TYPE STREQUAL no_flash_using_xip_ram) + set(PICO_NO_FLASH 1) + elseif (PICO_DEFAULT_BINARY_TYPE STREQUAL copy_to_ram_using_xip_ram) + set(PICO_COPY_TO_RAM 1) + else() + message(FATAL_ERROR "PICO_USE_XIP_CACHE_AS_RAM requires setting PICO_DEFAULT_BINARY_TYPE to either no_flash_using_xip_ram or copy_to_ram_using_xip_ram") + endif() + else() + message(FATAL_ERROR "Use of PICO_USE_XIP_CACHE_AS_RAM requires RP2040 hardware") + endif() + else() + if (PICO_DEFAULT_BINARY_TYPE STREQUAL no_flash) + set(PICO_NO_FLASH 1) + elseif (PICO_DEFAULT_BINARY_TYPE STREQUAL no_flash_using_xip_ram) + if (PICO_RP2040) + set(PICO_NO_FLASH 1) + set(PICO_USE_XIP_CACHE_AS_RAM 1) +# set(PICO_USE_XIP_CACHE_AS_RAM ON CACHE BOOL "Default binaries to run time critical code from XIP RAM" FORCE) + else() + message(FATAL_ERROR "Use of PICO_DEFAULT_BINARY_TYPE no_flash_using_xip_ram requires RP2040 hardware") + endif() + elseif (PICO_DEFAULT_BINARY_TYPE STREQUAL blocked_ram) + set(PICO_USE_BLOCKED_RAM 1) + elseif (PICO_DEFAULT_BINARY_TYPE STREQUAL copy_to_ram) + set(PICO_COPY_TO_RAM 1) + elseif (PICO_DEFAULT_BINARY_TYPE STREQUAL copy_to_ram_using_xip_ram) + if (PICO_RP2040) + set(PICO_COPY_TO_RAM 1) + set(PICO_USE_XIP_CACHE_AS_RAM 1) +# set(PICO_USE_XIP_CACHE_AS_RAM ON CACHE BOOL "Default binaries to run time critical code from XIP RAM" FORCE) + else() + message(FATAL_ERROR "Use of PICO_DEFAULT_BINARY_TYPE copy_to_ram_using_xip_ram requires RP2040 hardware") + endif() + endif() endif() endif() if ((PICO_NO_FLASH AND PICO_USE_BLOCKED_RAM) OR @@ -138,13 +183,23 @@ if (NOT TARGET pico_standard_link) # PICO_NO_FLASH will be set based on PICO_TARGET_BUILD_TYPE target property being equal to no_flash if set, otherwise to the value of the PICO_NO_FLASH cmake variable unless PICO_TARGET_TYPE is set to something else # PICO_BUILD_DEFINE: PICO_NO_FLASH, whether this is a 'no_flash' build, type=bool, default=0, but dependent on CMake options, group=pico_standard_link - target_compile_definitions(pico_standard_link INTERFACE PICO_NO_FLASH=$,no_flash>,1,$,$>>>) + target_compile_definitions(pico_standard_link INTERFACE PICO_NO_FLASH=$,no_flash>,$,no_flash_using_xip_ram>>,1,$,$>>>) # PICO_USE_BLOCKED_RAM will be set based on PICO_TARGET_BUILD_TYPE target property being equal to use_blocked_ram if set, otherwise to the value of the PICO_USE_BLOCKED_RAM cmake variable unless PICO_TARGET_TYPE is set to something else # PICO_BUILD_DEFINE: PICO_USE_BLOCKED_RAM, whether this is a 'blocked_ram' build, type=bool, default=0, but dependent on CMake options, group=pico_standard_link target_compile_definitions(pico_standard_link INTERFACE PICO_USE_BLOCKED_RAM=$,use_blocked_ram>,1,$,$>>>) # PICO_COPY_TO_RAM will be set based on PICO_TARGET_BUILD_TYPE target property being equal to copy_to_ram if set, otherwise to the value of the PICO_COPY_TO_RAM cmake variable unless PICO_TARGET_TYPE is set to something else # PICO_BUILD_DEFINE: PICO_COPY_TO_RAM, whether this is a 'copy_to_ram' build, type=bool, default=0, but dependent on CMake options, group=pico_standard_link - target_compile_definitions(pico_standard_link INTERFACE PICO_COPY_TO_RAM=$,copy_to_ram>,1,$,$>>>) + target_compile_definitions(pico_standard_link INTERFACE PICO_COPY_TO_RAM=$,copy_to_ram>,$,copy_to_ram_using_xip_ram>>,1,$,$>>>) + # PICO_USE_XIP_CACHE_AS_RAM will be set based on the value of the PICO_USE_XIP_CACHE_AS_RAM cmake variable + # PICO_BUILD_DEFINE: PICO_USE_XIP_CACHE_AS_RAM, whether this is a '*_using_xip_ram' build, type=bool, default=0, but dependent on CMake options, group=pico_standard_link + set(_PICO_USE_XIP_CACHE_AS_RAM_EXPRESSION "$,no_flash_using_xip_ram>,$,copy_to_ram_using_xip_ram>>,1,$,$>>>") + if (TARGET boot_stage2_headers) + target_compile_definitions(boot_stage2_headers INTERFACE PICO_USE_XIP_CACHE_AS_RAM=${_PICO_USE_XIP_CACHE_AS_RAM_EXPRESSION}) + else() + target_compile_definitions(pico_standard_link INTERFACE PICO_USE_XIP_CACHE_AS_RAM=${_PICO_USE_XIP_CACHE_AS_RAM_EXPRESSION}) + endif() + unset(_PICO_USE_XIP_CACHE_AS_RAM_EXPRESSION) + target_compile_definitions(pico_standard_link INTERFACE PICO_CMAKE_BUILD_TYPE="${CMAKE_BUILD_TYPE}") if (PICO_DEOPTIMIZED_DEBUG AND "${CMAKE_BUILD_TYPE}" STREQUAL "Debug") @@ -153,10 +208,10 @@ if (NOT TARGET pico_standard_link) # this (arguably wrong) code is restored for 1.5.1 as setting -nostartfiles on many C++ binaries causes link errors. see issue #1368 # -nostartfiles will be added if PICO_NO_FLASH would be defined to 1 - target_link_options(pico_standard_link INTERFACE $<$,no_flash>,1,$,$>>>:-nostartfiles>) + target_link_options(pico_standard_link INTERFACE $<$,no_flash>,$,no_flash_using_xip_ram>>,1,$,$>>>:-nostartfiles>) # boot_stage2 will be linked if PICO_NO_FLASH would be defined to 0; note if boot_stage2 headers not present, then boot_stage2 is omitted from build anyway if (TARGET boot_stage2_headers) - target_link_libraries(pico_standard_link INTERFACE $<$,no_flash>,1,$,$>>>>:$>,$,bs2_default>_library>) + target_link_libraries(pico_standard_link INTERFACE $<$,no_flash>,$,no_flash_using_xip_ram>>,1,$,$>>>>:$>,$,bs2_default>_library>) endif() # PICO_CMAKE_CONFIG: PICO_USE_DEFAULT_MAX_PAGE_SIZE, Don't shrink linker max page to 4096, type=bool, default=0, advanced=true, group=pico_standard_link