2026-04-11 3:51 AM
I'm recently working on benchmarking a various implementations and configurations of cryptographic algorithms such AES-GCM on STM32. Two of these tested function are the following :
void test_aes_hardware(void)
{
uint32_t plaintext[DATA_SIZE/4];
uint32_t ciphertext[DATA_SIZE/4];
uint32_t decrypted[DATA_SIZE/4];
uint8_t key_bytes[32];
uint8_t iv_bytes[16];
uint32_t key[8];
uint32_t iv[4];
uint8_t ikm[4];
uint8_t salt[4];
uint8_t info1[] = "HW_AES_KEY";
uint8_t info2[] = "HW_AES_IV";
uint32_t rnd;
/* RNG */
HAL_RNG_GenerateRandomNumber(&hrng, &rnd);
memcpy(ikm, &rnd, 4);
HAL_RNG_GenerateRandomNumber(&hrng, &rnd);
memcpy(salt, &rnd, 4);
/* KDF */
xkdfBlake(key_bytes, 32, ikm, 4, salt, 4, info1, sizeof(info1));
xkdfBlake(iv_bytes, 16, ikm, 4, salt, 4, info2, sizeof(info2));
memcpy(key, key_bytes, 32);
memcpy(iv, iv_bytes, 16);
for (int i = 0; i < DATA_SIZE / 4; i++) {
uint32_t w = (i*4 + 1)
|((i*4 + 2) << 8)
|((i*4 + 3) << 16)
|((i*4 + 4) << 24);
plaintext[i] = w;
}
/* Configure CRYP */
hcryp.Instance = CRYP;
hcryp.Init.DataType = CRYP_DATATYPE_32B;
hcryp.Init.KeySize = CRYP_KEYSIZE_256B;
hcryp.Init.pKey = key;
hcryp.Init.Algorithm = CRYP_AES_GCM;
hcryp.Init.pInitVect = iv;
hcryp.Init.Header = NULL;
hcryp.Init.HeaderSize = 0;
HAL_CRYP_Init(&hcryp);
bench_start();
HAL_CRYP_Encrypt(&hcryp,
plaintext,
DATA_SIZE/4,
ciphertext,HAL_MAX_DELAY);
while (HAL_CRYP_GetState(&hcryp) != HAL_CRYP_STATE_READY);
uint32_t time = bench_stop();
HAL_CRYP_Decrypt(&hcryp,
ciphertext,
DATA_SIZE/4,
decrypted,
HAL_MAX_DELAY);
while (HAL_CRYP_GetState(&hcryp) != HAL_CRYP_STATE_READY);
if (memcmp(plaintext, decrypted, DATA_SIZE) != 0) {
printf("Mismatch!\n");
Error_Handler();;
}
printf("AES-GCM Hardware without DMA: %lu us\r\n", time);
}The first one demonstarte the use of AES-GCM through CRYP module.
void test_aes_hardware_dma(void)
{
uint32_t plaintext[DATA_SIZE/4] __attribute__((aligned(32)));
uint32_t ciphertext[DATA_SIZE/4] __attribute__((aligned(32)));
uint32_t decrypted[DATA_SIZE/4] __attribute__((aligned(32)));
uint8_t key_bytes[32];
uint8_t iv_bytes[16];
uint32_t key[8] __attribute__((aligned(32)));
uint32_t iv[4] __attribute__((aligned(32)));
uint8_t ikm[4];
uint8_t salt[4];
uint8_t info1[] = "HW_AES_KEY";
uint8_t info2[] = "HW_AES_IV";
uint32_t rnd;
/* RNG */
HAL_RNG_GenerateRandomNumber(&hrng, &rnd);
memcpy(ikm, &rnd, 4);
HAL_RNG_GenerateRandomNumber(&hrng, &rnd);
memcpy(salt, &rnd, 4);
/* KDF */
xkdfBlake(key_bytes, 32, ikm, 4, salt, 4, info1, sizeof(info1));
xkdfBlake(iv_bytes, 16, ikm, 4, salt, 4, info2, sizeof(info2));
memcpy(key, key_bytes, 32);
memcpy(iv, iv_bytes, 16);
for (int i = 0; i < DATA_SIZE / 4; i++) {
uint32_t w = (i*4 + 1)
| ((i*4 + 2) << 8)
| ((i*4 + 3) << 16)
| ((i*4 + 4) << 24);
plaintext[i] = w;
}
SCB_CleanDCache_by_Addr((uint32_t*)plaintext, DATA_SIZE);
SCB_CleanDCache_by_Addr((uint32_t*)key, sizeof(key));
SCB_CleanDCache_by_Addr((uint32_t*)iv, sizeof(iv));
/* Configure CRYP */
hcryp.Instance = CRYP;
hcryp.Init.DataType = CRYP_DATATYPE_32B;
hcryp.Init.KeySize = CRYP_KEYSIZE_256B;
hcryp.Init.pKey = key;
hcryp.Init.Algorithm = CRYP_AES_GCM;
hcryp.Init.pInitVect = iv;
hcryp.Init.Header = NULL;
hcryp.Init.HeaderSize = 0;
HAL_CRYP_Init(&hcryp);
CrypCompleteDetected = 0;
bench_start();
HAL_CRYP_Encrypt_DMA(&hcryp,
plaintext,
DATA_SIZE/4,
ciphertext);
while (!CrypCompleteDetected);
SCB_InvalidateDCache_by_Addr((uint32_t*)ciphertext, DATA_SIZE);
uint32_t time = bench_stop();
CrypCompleteDetected = 0;
HAL_CRYP_Decrypt_DMA(&hcryp,
ciphertext,
DATA_SIZE/4,
decrypted);
while (!CrypCompleteDetected);
SCB_InvalidateDCache_by_Addr((uint32_t*)decrypted, DATA_SIZE);
if (memcmp(plaintext, decrypted, DATA_SIZE) != 0) {
printf("Mismatch!\n");
Error_Handler();
}
printf("AES-GCM Hardware DMA: %lu us\r\n", time);
}The second one use DMA to transfer data between RAM and CRYP peripheral
When starting the program i got normal results with DATA_SIZE<50Kib. But after that I start getting Mismatches results. When I compare for example only the 100 fisrt bytes it seems that the decrypted data match the initial plaintext and I got an absurd results showing that the encryption exec time of 100Kib < the encryption exec time of 50Kib.
Please feel free to ask me for additional informations.
Thank you.
Solved! Go to Solution.
2026-04-11 4:03 PM
I see. One more thing why it doesn't truncate to the max oon 16 bit which is 65537.
2026-04-11 4:15 PM
It multiply it by 4 meaning that it excepts the size of data to encrypt divided by 4. So i f we want to encrypt 1024 byte i need to give size of 1024/4?
2026-04-11 4:16 PM
That's just how math works on processors. The high bits are dropped.
uint32_t x = 102400;
uint16_t y = x;
printf("y=%u\n", (unsigned)y);102400 in binary is 0b1 1001 0000 0000 0000. Lowest 16 bits are 0b1001 0000 0000 0000, or 36864.
2026-04-11 4:20 PM - edited 2026-04-11 4:21 PM
HAL_CRYP_Encrypt_DMA expects the number of items (as defined by DataWidthUnit), as the documentation states. In your case, this is the number of words.
/**
* @brief Encryption in DMA mode.
* @PAram hcryp: pointer to a CRYP_HandleTypeDef structure that contains
* the configuration information for CRYP module
* @PAram Input: Pointer to the input buffer (plaintext)
* @PAram Size: Length of the plaintext buffer either in word or in byte, according to DataWidthUnit
* @PAram Output: Pointer to the output buffer(ciphertext)
* @retval HAL status
*/DataWidthUnit
It's a bug. You won't be able to encrypt/decrypt more than 16383 words at a time until it is fixed.
2026-04-13 5:15 AM
Hello TDK, thank you for your response earlier. I just want to know if changing the type of Size filed of the struct ti uint32_t will solve the issue?
2026-04-13 6:47 AM
Yes, changing it to a uint32_t in the HAL library will likely solve the issue.