2025-10-22 11:09 PM - edited 2025-10-23 1:30 AM
Hello,
I'm trying to use the GPU2D on the STM32N6570-DK, with NemaGFX.
However, the GPU hangs on random functions.
Well actually every time the same funtions, but depending on the number of functions to be executed, so it seems it can only execute a limited number of functions, and the it hangs.
And many of these doen't even complete the drawing of the shape.
The nema_vg_draw_circle doesn't complete drawing the circle and hangs.
When only drawing the rounded rectangle results in an incomplte draw and hang of the function:
If I remove some functions, the app hangs on nema_cl_wait.
I think it might be related to the buffer initialization in nema_hal.c, or that nema_hal.c contains other errors.
/*
* File : nema_hal.c
*/
/* Includes ----------------------------------------------------------------- */
#include <stdlib.h>
#include <string.h>
#include "tsi_malloc.h"
#include "nema_sys_defs.h"
#include "nema_core.h"
#include "nema_vg.h"
#include "main.h"
/* Include platform-specific header files ----------------------------------- */
#if defined(STM32V7R7xx) || defined(STM32V7R5xx) || defined(STM32V7S7xx) || defined(STM32V7S5xx)
#include "stm32v7xx_hal.h"
#include "stm32v7xx_hal_gpu2d.h"
#elif defined(STM32H7S7xx)
#include "stm32h7rsxx_hal.h"
#include "stm32h7rsxx_hal_gpu2d.h"
#elif defined(STM32N647xx) || defined(STM32N657xx)
#include "stm32n6xx_hal.h"
#include "stm32n6xx_hal_gpu2d.h"
#elif defined(STM32U599xx) || defined(STM32U5A9xx) || defined(STM32U5G9xx)
#include "stm32u5xx_hal.h"
#include "stm32u5xx_hal_gpu2d.h"
#else
#error "Unsupported Platform"
#endif /* STM32V7R7xx | STM32V7R5xx | STM32V7S7xx | STM32V7S5xx */
/* Defines ------------------------------------------------------------------ */
#define RING_SIZE 1024 /* Ring Buffer Size in byte */
#define NEMAGFX_MEM_POOL_SIZE 24320 /* NemaGFX byte pool size in byte */
#define NEMAGFX_STENCIL_POOL_SIZE 389120 /* NemaGFX stencil buffer pool size in byte 800*480+5120 */
#if defined(STM32H7S7xx) || defined(STM32N647xx) || defined(STM32N657xx)
/* RAM_CMD */
static uint8_t nemagfx_pool_mem[NEMAGFX_MEM_POOL_SIZE] __attribute__((section("Nemagfx_Memory_Pool_Buffer"))); /* NemaGFX memory pool */
static uint8_t nemagfx_stencil_buffer_mem[NEMAGFX_STENCIL_POOL_SIZE] __attribute__((section("Nemagfx_Stencil_Buffer"))); /* NemaGFX stencil buffer memory */
//static uint8_t nemagfx_ring_buffer_mem[RING_SIZE] __attribute__((section("Nemagfx_Ring_buffer")));
/* NemaGFX frame buffer memory 0x90000000 */
#if (DISPLAY_BYTES_PER_PIXEL == 2)
Color_RGB565_t FrameBuffer[DISPLAY_NO_OF_FRAMEBUFFERS][DISPLAY_SIZE_H][DISPLAY_SIZE_W] __attribute__((section("Nemagfx_Framebuffer")));
#elif (DISPLAY_BYTES_PER_PIXEL == 3)
Color_RGB888_t FrameBuffer[DISPLAY_NO_OF_FRAMEBUFFERS][DISPLAY_SIZE_H][DISPLAY_SIZE_W] __attribute__((section("Nemagfx_Framebuffer")));
#elif (DISPLAY_BYTES_PER_PIXEL == 4)
Color_RGBA8888_t FrameBuffer[DISPLAY_NO_OF_FRAMEBUFFERS][DISPLAY_SIZE_H][DISPLAY_SIZE_W] __attribute__((section("Nemagfx_Framebuffer")));
#endif /* DISPLAY_BYTES_PER_PIXEL */
#endif /* STM32H7S7xx */
/* NemaGFX ring buffer memory */
static nema_ringbuffer_t ring_buffer_str = {{0}};
static volatile int last_cl_id = -1;
/* Function implementations ------------------------------------------------- */
#if (USE_HAL_GPU2D_REGISTER_CALLBACKS == 1)
static void GPU2D_CommandListCpltCallback(GPU2D_HandleTypeDef* hgpu2d, uint32_t CmdListID)
#else /* USE_HAL_GPU2D_REGISTER_CALLBACKS = 0 */
void HAL_GPU2D_CommandListCpltCallback(GPU2D_HandleTypeDef* hgpu2d, uint32_t CmdListID)
#endif /* USE_HAL_GPU2D_REGISTER_CALLBACKS = 1 */
{
UNUSED(hgpu2d);
last_cl_id = CmdListID;
}
void HAL_GPU2D_ErrorCallback(GPU2D_HandleTypeDef *hgpu2d)
{
#if 1
uint32_t val = nema_reg_read(GPU2D_SYS_INTERRUPT); /* clear the ER interrupt */
nema_reg_write(GPU2D_SYS_INTERRUPT, val);
/* external GPU2D cache maintenance */
if (val & (1UL << 2))
{
HAL_ICACHE_Disable();
nema_ext_hold_deassert_imm(2);
}
if (val & (1UL << 3))
{
HAL_ICACHE_Enable();
HAL_ICACHE_Invalidate();
nema_ext_hold_deassert_imm(3);
}
#endif
}
int32_t nema_sys_init(void)
{
/*
* NEMA| GFX includes the following API calls for memory allocation, deallocation and mapping:
* - nema_buffer_create() - Allocate memory
* - nema_buffer_create_pool() - Allocate memory from specific memory pool
* - nema_buffer_map() - Map allocated memory space for CPU access
* - nema_buffer_unmap() - Unmap previously mapped memory space
* - nema_buffer_destroy() - Deallocate memory space
*
*/
int error_code = 0;
/* Initialize GPU2D */
hgpu2d.Instance = GPU2D;
HAL_GPU2D_Init(&hgpu2d);
#if (USE_HAL_GPU2D_REGISTER_CALLBACKS == 1)
/* Register Command List Complete Callback */
HAL_GPU2D_RegisterCommandListCpltCallback(&hgpu2d, GPU2D_CommandListCpltCallback);
#endif /* USE_HAL_GPU2D_REGISTER_CALLBACKS = 1 */
/* Initialize Mem Space */
error_code = tsi_malloc_init_pool_aligned(0, (void*)nemagfx_pool_mem, (uintptr_t)nemagfx_pool_mem, NEMAGFX_MEM_POOL_SIZE, 1, 8);
assert(error_code == 0);
error_code = tsi_malloc_init_pool_aligned(1, (void*)nemagfx_stencil_buffer_mem, (uintptr_t)nemagfx_stencil_buffer_mem, NEMAGFX_STENCIL_POOL_SIZE, 1, 8);
assert(error_code == 0);
/* Allocate ring_buffer memory */
#if 1
// ring_buffer_str.bo = nema_buffer_create_pool(0, RING_SIZE);
ring_buffer_str.bo = nema_buffer_create(RING_SIZE);
#else
#if defined(STM32U599xx) || defined(STM32U5A9xx) || defined(STM32U5G9xx)
bo.base_virt = malloc(size);
#elif defined(STM32H7S7xx)
bo.base_virt = tsi_malloc_pool(pool, size);
#elif defined(STM32N647xx) || defined(STM32N657xx)
ring_buffer_str.bo.base_virt = nemagfx_ring_buffer_mem;
// ring_buffer_str.bo.base_virt = tsi_malloc_pool(2, RING_SIZE);
// bo.base_virt = malloc(size);
#endif
ring_buffer_str.bo.base_phys = (uint32_t)ring_buffer_str.bo.base_virt;
ring_buffer_str.bo.size = RING_SIZE;
ring_buffer_str.bo.fd = 0;
#endif
assert(ring_buffer_str.bo.base_virt);
/* Initialize Ring Buffer */
error_code = nema_rb_init(&ring_buffer_str, 1);
if (error_code < 0)
{
return error_code;
}
/* Reset last_cl_id counter */
last_cl_id = 0;
return error_code;
}
void nema_components_init()
{
/* Initialize NemaGFX library */
nema_init();
nema_reg_write(0xFFC, 0x7E); /* Enable bus error interrupts */
nema_vg_init_stencil_pool(DISPLAY_SIZE_W, DISPLAY_SIZE_H, 1);
// nema_vg_init(DISPLAY_SIZE_W, DISPLAY_SIZE_H);
nema_vg_handle_large_coords(1, 1);
nema_ext_hold_enable(2);
nema_ext_hold_irq_enable(2);
nema_ext_hold_enable(3);
nema_ext_hold_irq_enable(3);
nema_sys_init();
}
uint32_t nema_reg_read(uint32_t reg)
{
return HAL_GPU2D_ReadRegister(&hgpu2d, reg);
}
void nema_reg_write(uint32_t reg, uint32_t value)
{
HAL_GPU2D_WriteRegister(&hgpu2d, reg, value);
}
int nema_wait_irq(void)
{
/* Wait indefinitely for a free semaphore - baremetal, not implemented */
return 0;
}
int nema_wait_irq_cl(int cl_id)
{
while (last_cl_id < cl_id)
{
(void)nema_wait_irq();
}
return 0;
}
int nema_wait_irq_brk(int brk_id)
{
UNUSED(brk_id);
while (nema_reg_read(GPU2D_BREAKPOINT) == 0U)
{
(void)nema_wait_irq();
}
return 0;
}
void nema_host_free(void *ptr)
{
if (ptr)
{
#if defined(STM32U599xx) || defined(STM32U5A9xx) || defined(STM32U5G9xx)
free(ptr);
#elif defined(STM32H7S7xx) || defined(STM32N647xx) || defined(STM32N657xx)
tsi_free(ptr);
#endif
}
}
void *nema_host_malloc(unsigned size)
{
#if defined(STM32U599xx) || defined(STM32U5A9xx) || defined(STM32U5G9xx)
return malloc(size);
#elif defined(STM32H7S7xx) || defined(STM32N647xx) || defined(STM32N657xx)
return tsi_malloc(size);
#endif
}
nema_buffer_t nema_buffer_create(int size)
{
nema_buffer_t bo;
memset(&bo, 0, sizeof(bo));
#if defined(STM32U599xx) || defined(STM32U5A9xx) || defined(STM32U5G9xx)
bo.base_virt = malloc(size);
#elif defined(STM32H7S7xx) || defined(STM32N647xx) || defined(STM32N657xx)
bo.base_virt = tsi_malloc(size);
#endif
bo.base_phys = (uint32_t)bo.base_virt;
bo.size = size;
assert(bo.base_virt != 0 && "Unable to allocate memory in nema_buffer_create");
return bo;
}
nema_buffer_t nema_buffer_create_pool(int pool, int size)
{
nema_buffer_t bo;
memset(&bo, 0, sizeof(bo));
#if defined(STM32U599xx) || defined(STM32U5A9xx) || defined(STM32U5G9xx)
bo.base_virt = malloc(size);
#elif defined(STM32H7S7xx) || defined(STM32N647xx) || defined(STM32N657xx)
bo.base_virt = tsi_malloc_pool(pool, size);
#endif
bo.base_phys = (uint32_t)bo.base_virt;
bo.size = size;
bo.fd = 0;
assert(bo.base_virt != 0 && "Unable to allocate memory in nema_buffer_create_pool");
return bo;
}
/* Used to select the framebuffer */
nema_buffer_t nema_select_framebuffer(int index)
{
nema_buffer_t bo;
memset(&bo, 0, sizeof(bo));
#if defined(STM32U599xx) || defined(STM32U5A9xx) || defined(STM32U5G9xx)
bo.base_virt = FrameBuffer[index];
#elif defined(STM32H7S7xx) || defined(STM32N647xx) || defined(STM32N657xx)
bo.base_virt = FrameBuffer[index];
#endif
bo.base_phys = (uint32_t)bo.base_virt;
bo.size = DISPLAY_FRAMEBUFFER_SIZE;
bo.fd = 0; /* Buffer allocated */
return bo;
}
void *nema_buffer_map(nema_buffer_t *bo)
{
return bo->base_virt;
}
void nema_buffer_unmap(nema_buffer_t *bo)
{
UNUSED(bo);
}
void nema_buffer_destroy(nema_buffer_t *bo)
{
if (bo->fd == -1)
{
return; /* Buffer wasn't allocated! */
}
#if defined(STM32U599xx) || defined(STM32U5A9xx) || defined(STM32U5G9xx)
free(bo->base_virt);
#else
tsi_free(bo->base_virt);
#endif
bo->base_virt = (void*)0;
bo->base_phys = 0;
bo->size = 0;
bo->fd = -1; /* Buffer not allocated */
}
uintptr_t nema_buffer_phys(nema_buffer_t *bo)
{
return bo->base_phys;
}
void nema_buffer_flush(nema_buffer_t * bo)
{
UNUSED(bo);
#if 1
#if defined(NEMA_CACHED_MEMORY)
SCB_CleanInvalidateDCache_by_Addr((uint32_t *)bo->base_virt, bo->size + 32);
#else /* !NEMA_CACHED_MEMORY */
UNUSED(bo);
#endif /* NEMA_CACHED_MEMORY */
#endif
}
int nema_mutex_lock(int mutex_id)
{
UNUSED(mutex_id);
return 0;
}
int nema_mutex_unlock(int mutex_id)
{
UNUSED(mutex_id);
return 0;
}
void platform_disable_cache(void)
{
nema_ext_hold_assert(2, 1);
}
void platform_invalidate_cache(void)
{
nema_ext_hold_assert(3, 1);
}
In main.c I try to execute some NemaGFX funtions:
/* GPU2D */
printf("MX_GPU2D_Init\n");
MX_GPU2D_Init();
printf("nema_components_init\n");
nema_components_init();
printf("Done\n");
NemaGFX.fbo.w = DISPLAY_SIZE_W;
NemaGFX.fbo.h = DISPLAY_SIZE_H;
NemaGFX.fbo.stride = DISPLAY_STRIDE;
#if (DISPLAY_BYTES_PER_PIXEL == 2)
// NemaGFX.fbo.format = NEMA_BGR565;
NemaGFX.fbo.format = NEMA_RGB565;
#elif (DISPLAY_BYTES_PER_PIXEL == 3)
NemaGFX.fbo.format = NEMA_BGR24;
// NemaGFX.fbo.format = NEMA_BGRA8888;
// NemaGFX.fbo.stride = 800*4;
#elif (DISPLAY_BYTES_PER_PIXEL == 4)
NemaGFX.fbo.format = NEMA_BGRA8888;
#endif
// NemaGFX.fbo.format = NEMA_BGR565;
// NemaGFX.fbo.stride = 800*2;
// MX_LTDC_Reset_FrameBufferLayer2(1);
printf("nema_calls...\n");
/* Circle */
// NemaGFX.fbos[buffer_pixel].bo = nema_select_framebuffer(buffer_pixel);
NemaGFX.fbo.bo = nema_select_framebuffer(buffer_pixel);
printf("nema_cl_create_sized\n");
NemaGFX.cl = nema_cl_create_sized(8192);
nema_cl_bind_circular(&NemaGFX.cl);
printf("nema_bind_dst_tex\n");
// nema_bind_dst_tex(NemaGFX.fbo.bo.base_phys, NemaGFX.fbo.w, NemaGFX.fbo.h, NemaGFX.fbo.format, NemaGFX.fbo.stride);
// nema_bind_src_tex(NemaGFX.fbo.bo.base_phys, NemaGFX.fbo.w, NemaGFX.fbo.h, NemaGFX.fbo.format, NemaGFX.fbo.stride, NEMA_FILTER_PS);
#if (DISPLAY_BYTES_PER_PIXEL == 3)
nema_bind_dst_tex(GFXMMU_VIRTUAL_BUFFER0_BASE, NemaGFX.fbo.w, NemaGFX.fbo.h, NEMA_RGBA8888, -1);
nema_bind_src_tex(GFXMMU_VIRTUAL_BUFFER0_BASE, NemaGFX.fbo.w, NemaGFX.fbo.h, NEMA_RGBA8888, -1, NEMA_FILTER_PS);
#else
nema_bind_dst_tex(NemaGFX.fbo.bo.base_phys, NemaGFX.fbo.w, NemaGFX.fbo.h, NemaGFX.fbo.format, NemaGFX.fbo.stride);
// nema_bind_dst_tex(NemaGFX.fbo.bo.base_phys, NemaGFX.fbo.w, NemaGFX.fbo.h, NemaGFX.fbo.format, -1);
nema_bind_src_tex(NemaGFX.fbo.bo.base_phys, NemaGFX.fbo.w, NemaGFX.fbo.h, NemaGFX.fbo.format, NemaGFX.fbo.stride, NEMA_FILTER_PS);
// nema_bind_src2_tex(NemaGFX.fbo.bo.base_phys, NemaGFX.fbo.w, NemaGFX.fbo.h, NemaGFX.fbo.format, NemaGFX.fbo.stride, NEMA_FILTER_BL);
#endif
printf("nema_set_clip\n");
nema_set_clip(0, 0, DISPLAY_SIZE_W, DISPLAY_SIZE_H);
// printf("nema_enable_depth\n");
// nema_enable_depth(0);
// printf("nema_debug_overdraws\n");
// nema_debug_overdraws(0);
// nema_set_const_color(0xFF0000FF);
// nema_set_src_color_key(0xFF0000FF);
// nema_set_dst_color_key(0xFF0000FF);
// nema_set_recolor_color(0xFF0000FF);
// nema_set_blend_fill(NEMA_BL_SIMPLE);
nema_set_blend_fill(NEMA_BL_SRC_OVER);
// nema_set_blend_fill(0x00000000);
// nema_set_blend_blit(NEMA_BL_SRC_OVER);
// nema_blending_mode(nema_blend_factor_t src, nema_blend_factor_t dst, nema_blend_op_t ops);
// nema_blending_mode(NEMA_BF_DESTCOLOR, NEMA_BF_INVSRCALPHA, NEMA_BLOP_SRC_PREMULT);
// nema_set_blend_blit(nema_blending_mode(NEMA_BF_ONE, NEMA_BF_INVSRCALPHA, NEMA_BLOP_SRC_PREMULT));
// nema_set_blend_blit(nema_blending_mode(NEMA_BF_SRCALPHA, NEMA_BF_INVSRCALPHA, NEMA_BLOP_MODULATE_A));
nema_blending_mode(NEMA_BF_SRCALPHA, NEMA_BF_INVSRCALPHA, NEMA_BLOP_MODULATE_A);
printf("nema_vg_paint_create\n");
NemaGFX.paint = nema_vg_paint_create();
nema_vg_paint_clear(NemaGFX.paint);
nema_mat3x3_load_identity(NemaGFX.m);
nema_mat3x3_translate(NemaGFX.m, 0.0, 0.0);
nema_mat3x3_rotate(NemaGFX.m, 0.0);
nema_mat3x3_scale(NemaGFX.m, 1.0, 1.0);
printf("nema_vg_paint_set_type\n");
nema_vg_paint_set_type(NemaGFX.paint, NEMA_VG_PAINT_COLOR);
nema_vg_paint_set_opacity(NemaGFX.paint, 1.0);
nema_vg_paint_set_paint_color(NemaGFX.paint, nema_rgba(0xFF, 0x00, 0xFF, 0xFF));
nema_vg_stroke_set_width(15.0);
// nema_vg_set_fill_rule(NEMA_VG_FILL_EVEN_ODD);
nema_vg_set_blend(NEMA_BL_SRC_OVER | NEMA_BLOP_SRC_PREMULT);
// nema_vg_set_blend(NEMA_BL_SRC_OVER); /* No sharp rectangle! */
// nema_vg_set_blend(NEMA_BL_SRC_OVER | NEMA_BLOP_RECOLOR); /* No sharp rectangle! */
// nema_vg_set_blend(NEMA_BL_SRC_OVER | NEMA_BLOP_STENCIL_XY);
// nema_vg_set_blend(NEMA_BL_SRC_OVER | NEMA_BLOP_NO_USE_ROPBL);
// nema_vg_set_blend(NEMA_BL_SRC_OVER | NEMA_BLOP_MODULATE_RGB); /* No sharp rectangle! */
// nema_vg_set_blend(NEMA_BL_SRC_OVER | NEMA_BLOP_SRC_CKEY);
// nema_vg_set_blend(NEMA_BL_SRC_OVER| NEMA_BLOP_SRC_PREMULT | NEMA_BLOP_SRC_CKEY);
// nema_vg_set_blend(nema_blending_mode(NEMA_BF_SRCALPHA, NEMA_BF_INVSRCALPHA, NEMA_BLOP_MODULATE_A));
// nema_vg_set_blend(nema_blending_mode(NEMA_BF_SRCALPHA, NEMA_BF_INVSRCALPHA, NEMA_BLOP_MODULATE_RGB | NEMA_BLOP_MODULATE_A)); /* Takes A, non-AA */
// nema_set_blend_fill_compose(NEMA_BL_SRC_OVER);
#if 1
printf("nema_enable_aa_flags\n");
nema_enable_aa_flags(1);
nema_enable_aa(1, 1, 1, 1);
#endif
#if 1
/* This is required to get well rounded corners and avoid displaying a 45 degrees rotated square! */
printf("nema_vg_set_quality\n");
nema_vg_set_quality(NEMA_VG_QUALITY_MAXIMUM);
#endif
nema_vg_set_fill_rule(NEMA_VG_FILL_NON_ZERO);
printf("nema_vg_draw_rect\n");
nema_vg_paint_set_paint_color(NemaGFX.paint, nema_rgba(0x00, 0x00, 0x00, 0xFF)); // Black
nema_vg_draw_rect(0, 0, 800, 480, NemaGFX.m, NemaGFX.paint);
nema_vg_set_fill_rule(NEMA_VG_STROKE);
nema_vg_paint_set_paint_color(NemaGFX.paint, nema_rgba(0xFF, 0x00, 0xFF, 0xFF));
nema_vg_draw_rect(80, 140, 200, 200, NemaGFX.m, NemaGFX.paint);
#if 0
printf("nema_vg_draw_circle\n");
nema_vg_draw_circle(400, 240, 100, NemaGFX.m, NemaGFX.paint);
#endif
#if 0
printf("nema_vg_draw_rounded_rect\n");
nema_vg_draw_rounded_rect(520, 140, 200, 200, 40, 40, NemaGFX.m, NemaGFX.paint);
#endif
printf("nema_cl_unbind\n");
nema_cl_unbind();
printf("nema_cl_submit\n");
nema_cl_submit(&NemaGFX.cl);
printf("nema_cl_wait\n");
nema_cl_wait(&NemaGFX.cl);
printf("nema_cl_destroy\n");
nema_cl_destroy(&NemaGFX.cl);
printf("nema_vg_paint_destroy\n");
nema_vg_paint_destroy(NemaGFX.paint);
printf("Done\n");
I noticed that the m55 NemaGFX works well with NEMA_RGB565, but not at all with NEMA_BGR565, which also took me a while to figure out. So there could be more surprises why it's not working right now.
I probably forgot something important.
Any help is appreciated.
I posted my test project on hithub:
https://github.com/AngryCarrot61/STM32N6-digits-lcd