2025-09-11 3:46 AM - edited 2025-09-11 5:16 AM
Can you please help me out with this one?
NemaGFX works fine on the STM32U5G9J-DK2 with an STM32U5G9ZJ MCU (Cortex-M33), without using TouchGFX or RTOS, just bare metal.
Now I'm trying to get it working on the STM32H7S78-DK with an STM32H7S7L8 Bootflash MCU (Cortex-M7).
The software runs on external flash (XSPI2 0x70000000) and the frame buffer is in external RAM (XSPI1 0x90000000).
I've configured the LTDC, and when the MCU writes to the frame buffer, it's displayed correctly on the screen.
As with the STM32U5G9J-DK2 kit, I ran MX_GPU2D_Init(), nema_sys_init(), and nema_init().
Then I tried to draw a rectangle, but it failed because nema_cl_submit hangs when I added a command after nema_cl_bind.
This all works fine on the STM32U5G9J-DK2 kit, just not on the STM32H7S78-DK.
The 'test' code:
MX_GPU2D_Init();
nema_sys_init();
nema_init();
NemaGFX.fbo.w = DISPLAY_SIZE_W;
NemaGFX.fbo.h = DISPLAY_SIZE_H;
NemaGFX.fbo.stride = (DISPLAY_SIZE_W * DISPLAY_BYTES_PER_PIXEL);
NemaGFX.fbo.format = NEMA_BGR24;
NemaGFX.fbo.bo = nema_buffer_create_pool(0, NemaGFX.fbo.stride * NemaGFX.fbo.h);
NemaGFX.fbo.bo.base_phys = (uintptr_t)&FrameBuffer[0];
NemaGFX.fbo.bo.size = NemaGFX.fbo.stride * NemaGFX.fbo.h;
nema_vg_init(NemaGFX.fbo.w, NemaGFX.fbo.h);
NemaGFX.cl = nema_cl_create_sized(8192);
nema_cl_bind_circular(&NemaGFX.cl);
nema_bind_dst_tex(NemaGFX.fbo.bo.base_phys, NemaGFX.fbo.w, NemaGFX.fbo.h, NemaGFX.fbo.format, NemaGFX.fbo.stride);
nema_bind_src_tex(NemaGFX.fbo.bo.base_phys, NemaGFX.fbo.w, NemaGFX.fbo.h, NemaGFX.fbo.format, NemaGFX.fbo.stride, NEMA_FILTER_PS);
nema_set_blend_fill(NEMA_BL_SIMPLE);
nema_set_clip(0, 0, DISPLAY_SIZE_W, DISPLAY_SIZE_H);
NemaGFX.paint = nema_vg_paint_create();
nema_vg_paint_clear(NemaGFX.paint);
nema_mat3x3_load_identity(NemaGFX.m);
nema_mat3x3_translate(NemaGFX.m, 0.0, 0.0);
nema_mat3x3_rotate(NemaGFX.m, 0.0);
nema_mat3x3_scale(NemaGFX.m, 1.0, 1.0);
nema_vg_set_fill_rule(NEMA_VG_STROKE);
nema_vg_paint_set_type(NemaGFX.paint, NEMA_VG_PAINT_COLOR);
nema_vg_paint_set_paint_color(NemaGFX.paint, nema_rgba(0xFF, 0x00, 0xFF, 0xFF));
nema_vg_stroke_set_width(20);
nema_vg_draw_rect(100, 100, 600, 280, NemaGFX.m, NemaGFX.paint);
printf("nema_cl_submit\n");
nema_cl_submit(&NemaGFX.cl); // <- This hangs
printf("nema_cl_wait\n");
nema_cl_wait(&NemaGFX.cl);
printf("nema_cl_destroy\n");
nema_cl_destroy(&NemaGFX.cl);
nema_vg_paint_destroy(NemaGFX.paint);
I know NemaGFX works with RAM and not cache, so I've checked my MPU settings and even disabled dcache to rule out cache issues.
My ld file
/*
******************************************************************************
**
** @file : LinkerScript.ld
**
** @author : STM32CubeIDE
**
** Abstract : Linker script for STM32H7Sxx Device
** 128KBytes FLASH
** 456KBytes RAM
**
** Set heap size, stack size and stack location according
** to application requirements.
**
** Set memory bank area and size if external memory is used
**
** Target : STMicroelectronics STM32
**
** Distribution: The file is distributed as is, without any warranty
** of any kind.
**
******************************************************************************
** @attention
**
** Copyright (c) 2025 STMicroelectronics.
** All rights reserved.
**
** This software is licensed under terms that can be found in the LICENSE file
** in the root directory of this software component.
** If no LICENSE file comes with this software, it is provided AS-IS.
**
******************************************************************************
*/
/* Entry Point */
ENTRY(Reset_Handler)
/* Highest address of the user mode stack */
_estack = ORIGIN(DTCM) + LENGTH(DTCM); /* end of "DTCM" Ram type memory */
_Min_Heap_Size = 0x1000; /* required amount of heap */
_Min_Stack_Size = 0x2000; /* required amount of stack */
__RAM_BEGIN = 0x24000000;
__RAM_SIZE = 0x00071C00;
__RAM_NONCACHEABLEBUFFER_SIZE = 0x0400;
__FLASH_BEGIN = 0x70000000; /* XSPI2 0x70000000 - 0x7FFFFFFF EXTFLASH 256MB */
__FLASH_SIZE = 0x10000000; /* XSPI2 EXTFLASH 256MB */
__EXTFLASH_BEGIN = 0x70000000; /* XSPI2 0x70000000 - 0x7FFFFFFF EXTFLASH 256MB */
__EXTFLASH_SIZE = 0x10000000; /* XSPI2 EXTFLASH 256MB */
__EXTRAM_BEGIN = 0x90000000; /* XSPI1 0x90000000 - 0x92000000 EXTRAM 32MB */
__EXTRAM_SIZE = 0x02000000; /* XSPI1 EXTRAM 32MB */
/* Memories definition */
MEMORY
{
/* RAM (xrw) : ORIGIN = __RAM, LENGTH = __RAM_SIZE -__RAM_NONCACHEABLEBUFFER_SIZE */
/* RAM_NONCACHEABLEBUFFER (xrw) : ORIGIN = __RAM + __RAM_SIZE - __RAM_NONCACHEABLEBUFFER_SIZE, LENGTH = __RAM_NONCACHEABLEBUFFER_SIZE */
RAM (rw) : ORIGIN = 0x24000000, LENGTH = 0x0006e000 /* 0x24000000 - 0x2406e000 */
RAM_CMD (rw) : ORIGIN = 0x2406e000, LENGTH = 0x00004000 /* 0x2406e000 - 0x24072000 */
ITCM (xrw) : ORIGIN = 0x00000000, LENGTH = 0x00010000
DTCM (rw) : ORIGIN = 0x20000000, LENGTH = 0x00010000
SRAMAHB (rw) : ORIGIN = 0x30000000, LENGTH = 0x00008000
BKPSRAM (rw) : ORIGIN = 0x38800000, LENGTH = 0x00001000
FLASH (xr) : ORIGIN = 0x70000000, LENGTH = 0x00200000 /* XSPI2 0x70000000 - 0x701FFFFF EXTFLASH 256MB */
FLASH_GFX (r) : ORIGIN = 0x70200000, LENGTH = 0x0FE00000 /* XSPI2 0x70200000 - 0x7FFFFFFF EXTFLASH 256MB */
EXTFLASH (xr) : ORIGIN = 0x70000000, LENGTH = 0x10000000 /* XSPI2 0x70000000 - 0x7FFFFFFF EXTFLASH 256MB */
EXTRAM (rw) : ORIGIN = 0x90000000, LENGTH = 0x02000000 /* XSPI1 0x90000000 - 0x92000000 EXTRAM 32MB */
}
/* Sections */
SECTIONS
{
/* The startup code into "FLASH" FLASH type memory */
.isr_vector :
{
. = ALIGN(4);
KEEP(*(.isr_vector)) /* Startup code */
. = ALIGN(4);
} >FLASH
/* The program code and other data into "FLASH" FLASH type memory */
.text :
{
. = ALIGN(4);
*(.text) /* .text sections (code) */
*(.text*) /* .text* sections (code) */
*(.glue_7) /* glue arm to thumb code */
*(.glue_7t) /* glue thumb to arm code */
*(.eh_frame)
KEEP (*(.init))
KEEP (*(.fini))
. = ALIGN(4);
_etext = .; /* define a global symbols at end of code */
} >FLASH
/* Constant data into "FLASH" FLASH type memory */
.rodata :
{
. = ALIGN(4);
*(.rodata) /* .rodata sections (constants, strings, etc.) */
*(.rodata*) /* .rodata* sections (constants, strings, etc.) */
. = ALIGN(4);
} >FLASH
.ARM.extab (READONLY) : /* The READONLY keyword is only supported in GCC11 and later, remove it if using GCC10 or earlier. */
{
. = ALIGN(4);
*(.ARM.extab* .gnu.linkonce.armextab.*)
. = ALIGN(4);
} >FLASH
.ARM (READONLY) : /* The READONLY keyword is only supported in GCC11 and later, remove it if using GCC10 or earlier. */
{
. = ALIGN(4);
__exidx_start = .;
*(.ARM.exidx*)
__exidx_end = .;
. = ALIGN(4);
} >FLASH
.preinit_array (READONLY) : /* The READONLY keyword is only supported in GCC11 and later, remove it if using GCC10 or earlier. */
{
. = ALIGN(4);
PROVIDE_HIDDEN (__preinit_array_start = .);
KEEP (*(.preinit_array*))
PROVIDE_HIDDEN (__preinit_array_end = .);
. = ALIGN(4);
} >FLASH
.init_array (READONLY) : /* The READONLY keyword is only supported in GCC11 and later, remove it if using GCC10 or earlier. */
{
. = ALIGN(4);
PROVIDE_HIDDEN (__init_array_start = .);
KEEP (*(SORT(.init_array.*)))
KEEP (*(.init_array*))
PROVIDE_HIDDEN (__init_array_end = .);
. = ALIGN(4);
} >FLASH
.fini_array (READONLY) : /* The READONLY keyword is only supported in GCC11 and later, remove it if using GCC10 or earlier. */
{
. = ALIGN(4);
PROVIDE_HIDDEN (__fini_array_start = .);
KEEP (*(SORT(.fini_array.*)))
KEEP (*(.fini_array*))
PROVIDE_HIDDEN (__fini_array_end = .);
. = ALIGN(4);
} >FLASH
/* Used by the startup to initialize data */
_sidata = LOADADDR(.data);
/* Initialized data sections into "RAM" Ram type memory */
.data :
{
. = ALIGN(4);
_sdata = .; /* create a global symbol at data start */
*(.data) /* .data sections */
*(.data*) /* .data* sections */
*(.RamFunc) /* .RamFunc sections */
*(.RamFunc*) /* .RamFunc* sections */
. = ALIGN(4);
_edata = .; /* define a global symbol at data end */
} >RAM AT> FLASH
/* Uninitialized data section into "RAM" Ram type memory */
. = ALIGN(4);
.bss :
{
/* This is used by the startup in order to initialize the .bss section */
_sbss = .; /* define a global symbol at bss start */
__bss_start__ = _sbss;
*(.bss)
*(.bss*)
*(COMMON)
. = ALIGN(4);
_ebss = .; /* define a global symbol at bss end */
__bss_end__ = _ebss;
} >RAM
/* User_heap_stack section, used to check that there is enough "RAM" Ram type memory left */
._user_heap_stack :
{
. = ALIGN(8);
PROVIDE ( end = . );
PROVIDE ( _end = . );
. = . + _Min_Heap_Size;
. = . + _Min_Stack_Size;
. = ALIGN(8);
} >DTCM
/* Remove information from the compiler libraries */
/DISCARD/ :
{
libc.a ( * )
libm.a ( * )
libgcc.a ( * )
}
.ARM.attributes 0 : { *(.ARM.attributes) }
BufferSection (NOLOAD) :
{
*(TouchGFX_Framebuffer TouchGFX_Framebuffer.*)
*(.gnu.linkonce.r.*)
. = ALIGN(0x8);
*(Nemagfx_Stencil_Buffer Nemagfx_Stencil_Buffer.*)
*(.gnu.linkonce.r.*)
. = ALIGN(0x8);
} >EXTRAM
UncachedSection (NOLOAD) :
{
*(Nemagfx_Memory_Pool_Buffer Nemagfx_Memory_Pool_Buffer.*)
*(.gnu.linkonce.r.*)
. = ALIGN(0x8);
} >RAM_CMD
FontFlashSection :
{
*(FontFlashSection FontFlashSection.*)
*(.gnu.linkonce.r.*)
. = ALIGN(0x4);
} >FLASH_GFX
TextFlashSection :
{
*(TextFlashSection TextFlashSection.*)
*(.gnu.linkonce.r.*)
. = ALIGN(0x4);
} >FLASH_GFX
ExtFlashSection :
{
*(ExtFlashSection ExtFlashSection.*)
*(.gnu.linkonce.r.*)
. = ALIGN(0x4);
} >FLASH_GFX
}
It has a part on the end, used for TouchGFX, and I located the frame buffer at the TouchGFX_Framebuffer section.
Honestly, I don''t know how to use these sections in bare-metal, yet:
- Nemagfx_Stencil_Buffer
- Nemagfx_Memory_Pool_Buffer
The calls to use these are abstarcted away in the TouchGFX RTOS examples.
This is an area where the problem can be.
I just assigned buffer to these area, but don't know where to make use of them:
#define RING_SIZE 1024 /* Ring Buffer Size in byte */
#define NEMAGFX_MEM_POOL_SIZE 16128 /* NemaGFX byte pool size in byte */
#define NEMAGFX_STENCIL_POOL_SIZE 389120 /* NemaGFX stencil buffer pool size in byte */
/* NemaGFX memory pool */
static uint8_t nemagfx_pool_mem[NEMAGFX_MEM_POOL_SIZE] __attribute__((section("Nemagfx_Memory_Pool_Buffer")));
/* NemaGFX stencil buffer memory */
static uint8_t nemagfx_stencil_buffer_mem[NEMAGFX_STENCIL_POOL_SIZE] __attribute__((section("Nemagfx_Stencil_Buffer")));
But I use the framebuffer section, and that works, lots of space in external RAM.
uint8_t FrameBuffer[2][DISPLAY_FRAMEBUFFER_SIZE] __attribute__((section("TouchGFX_Framebuffer")));
And this is my MPU configuration:
static void MPU_Config(void)
{
MPU_Region_InitTypeDef MPU_InitStruct = {0};
/* Disables the MPU */
HAL_MPU_Disable();
/* Disables all MPU regions */
for(uint8_t i=0; i<__MPU_REGIONCOUNT; i++)
{
HAL_MPU_DisableRegion(i);
}
/** Initializes and configures the Region and the memory to be protected
*/
MPU_InitStruct.Enable = MPU_REGION_ENABLE;
MPU_InitStruct.Number = MPU_REGION_NUMBER0;
MPU_InitStruct.BaseAddress = 0x00000000;
MPU_InitStruct.Size = MPU_REGION_SIZE_4GB;
MPU_InitStruct.SubRegionDisable = 0x87;
MPU_InitStruct.TypeExtField = MPU_TEX_LEVEL0;
MPU_InitStruct.AccessPermission = MPU_REGION_NO_ACCESS;
MPU_InitStruct.DisableExec = MPU_INSTRUCTION_ACCESS_DISABLE;
MPU_InitStruct.IsShareable = MPU_ACCESS_SHAREABLE;
MPU_InitStruct.IsCacheable = MPU_ACCESS_NOT_CACHEABLE;
MPU_InitStruct.IsBufferable = MPU_ACCESS_NOT_BUFFERABLE;
HAL_MPU_ConfigRegion(&MPU_InitStruct);
/** Initializes and configures the Region and the memory to be protected
*/
MPU_InitStruct.Number = MPU_REGION_NUMBER1;
MPU_InitStruct.BaseAddress = 0x70000000; /* XSPI2 0x70000000 - 0x7FFFFFFF 0x10000000 = 256MB */
MPU_InitStruct.Size = MPU_REGION_SIZE_256MB;
MPU_InitStruct.SubRegionDisable = 0x0;
MPU_InitStruct.TypeExtField = MPU_TEX_LEVEL1;
MPU_InitStruct.AccessPermission = MPU_REGION_FULL_ACCESS;
MPU_InitStruct.DisableExec = MPU_INSTRUCTION_ACCESS_DISABLE;
MPU_InitStruct.IsShareable = MPU_ACCESS_NOT_SHAREABLE;
MPU_InitStruct.IsCacheable = MPU_ACCESS_CACHEABLE;
MPU_InitStruct.IsBufferable = MPU_ACCESS_BUFFERABLE;
HAL_MPU_ConfigRegion(&MPU_InitStruct);
/** Initializes and configures the Region and the memory to be protected
*/
MPU_InitStruct.Number = MPU_REGION_NUMBER2;
MPU_InitStruct.BaseAddress = 0x70000000; /* XSPI2 0x70000000 - 0x7FFFFFFF 0x10000000 = 256MB */
MPU_InitStruct.Size = MPU_REGION_SIZE_2MB;
MPU_InitStruct.SubRegionDisable = 0x0;
MPU_InitStruct.TypeExtField = MPU_TEX_LEVEL1;
MPU_InitStruct.AccessPermission = MPU_REGION_FULL_ACCESS;
MPU_InitStruct.DisableExec = MPU_INSTRUCTION_ACCESS_ENABLE;
MPU_InitStruct.IsShareable = MPU_ACCESS_NOT_SHAREABLE;
MPU_InitStruct.IsCacheable = MPU_ACCESS_CACHEABLE;
MPU_InitStruct.IsBufferable = MPU_ACCESS_BUFFERABLE;
HAL_MPU_ConfigRegion(&MPU_InitStruct);
/** Initializes and configures the Region and the memory to be protected
*/
MPU_InitStruct.Number = MPU_REGION_NUMBER3;
MPU_InitStruct.BaseAddress = 0x90000000; /* XSPI1 0x90000000 - 0x91FFFFFF 0x02000000 = 32MB */
MPU_InitStruct.Size = MPU_REGION_SIZE_32MB;
MPU_InitStruct.TypeExtField = MPU_TEX_LEVEL1;
MPU_InitStruct.AccessPermission = MPU_REGION_FULL_ACCESS;
MPU_InitStruct.DisableExec = MPU_INSTRUCTION_ACCESS_DISABLE;
MPU_InitStruct.IsShareable = MPU_ACCESS_NOT_SHAREABLE;
MPU_InitStruct.IsCacheable = MPU_ACCESS_NOT_CACHEABLE;
MPU_InitStruct.IsBufferable = MPU_ACCESS_NOT_BUFFERABLE;
HAL_MPU_ConfigRegion(&MPU_InitStruct);
/** Initializes and configures the Region and the memory to be protected
*/
MPU_InitStruct.Number = MPU_REGION_NUMBER4;
MPU_InitStruct.BaseAddress = 0x20000000;
MPU_InitStruct.Size = MPU_REGION_SIZE_64KB;
MPU_InitStruct.TypeExtField = MPU_TEX_LEVEL1;
MPU_InitStruct.AccessPermission = MPU_REGION_FULL_ACCESS;
MPU_InitStruct.DisableExec = MPU_INSTRUCTION_ACCESS_DISABLE;
MPU_InitStruct.IsShareable = MPU_ACCESS_NOT_SHAREABLE;
MPU_InitStruct.IsCacheable = MPU_ACCESS_NOT_CACHEABLE;
MPU_InitStruct.IsBufferable = MPU_ACCESS_NOT_BUFFERABLE;
HAL_MPU_ConfigRegion(&MPU_InitStruct);
/** Initializes and configures the Region and the memory to be protected
*/
MPU_InitStruct.Number = MPU_REGION_NUMBER5;
MPU_InitStruct.BaseAddress = 0x24000000; /* RAM */
MPU_InitStruct.Size = MPU_REGION_SIZE_512KB;
MPU_InitStruct.TypeExtField = MPU_TEX_LEVEL1;
MPU_InitStruct.AccessPermission = MPU_REGION_FULL_ACCESS;
MPU_InitStruct.DisableExec = MPU_INSTRUCTION_ACCESS_DISABLE;
MPU_InitStruct.IsShareable = MPU_ACCESS_SHAREABLE;
MPU_InitStruct.IsCacheable = MPU_ACCESS_CACHEABLE;
MPU_InitStruct.IsBufferable = MPU_ACCESS_BUFFERABLE;
HAL_MPU_ConfigRegion(&MPU_InitStruct);
/** Initializes and configures the Region and the memory to be protected
*/
MPU_InitStruct.Number = MPU_REGION_NUMBER6;
MPU_InitStruct.BaseAddress = 0x2406E000; /* RAM_CMD */
MPU_InitStruct.Size = MPU_REGION_SIZE_16KB;
MPU_InitStruct.TypeExtField = MPU_TEX_LEVEL0;
MPU_InitStruct.AccessPermission = MPU_REGION_FULL_ACCESS;
MPU_InitStruct.DisableExec = MPU_INSTRUCTION_ACCESS_DISABLE;
MPU_InitStruct.IsShareable = MPU_ACCESS_SHAREABLE;
MPU_InitStruct.IsCacheable = MPU_ACCESS_NOT_CACHEABLE;
MPU_InitStruct.IsBufferable = MPU_ACCESS_BUFFERABLE;
HAL_MPU_ConfigRegion(&MPU_InitStruct);
/* Enables the MPU */
HAL_MPU_Enable(MPU_PRIVILEGED_DEFAULT);
}
And my cache:
/* Enable the CPU Cache */
/* Enable I-Cache---------------------------------------------------------*/
SCB_EnableICache();
/* Enable D-Cache---------------------------------------------------------*/
// SCB_EnableDCache();
SCB_DisableDCache();
I've attached the IOC file, but that probably won't reveal the problem.
I'm doing something wrong, but what can I check?
How do I use these sections in bare-metal?
- Nemagfx_Stencil_Buffer
- Nemagfx_Memory_Pool_Buffer
Are there any working examples without TouchGFX and without the operating system?
Thanks for helping me out!