cancel
Showing results for 
Search instead for 
Did you mean: 

STM32WBA55CG Modular Multiplication not working

elopezpe
Associate II

Hello all,

I have been working on a bare-metal implementation of the PKA engine in the STM32WBA55CG microcontroller using Rust. So far, I have been able to run modular addition and subtraction, modular exponentiation, arithmetic operations, ECC point check, ECC Multiplication and ECDSA sign and verify. However, there is an issue with the modular multiplication. Whenever I try to multiply operand_a * operand_b mod N, I get zero as a result (or some incorrect value). According to the datasheet:
Simple modular multiplication A x B mod n
a) Compute r2modn using Montgomery parameter computation.
b) Compute AR = A x r2modn mod n. Output is in the Montgomery domain.
c) Compute AB= AR x B mod n. Output is in natural domain

And the memory addresses are as follow:

elopezpe_0-1743170160263.png

I have tried two approaches:

1. Loading into the memory operand_a and operand_b and perform the multiplication using mode 0x10

2. Perform the steps mentioned above, i.e., compute AR, and then AB using AR.

In both cases, I get a zero result in the end. Below you can find my code. and the result I get:

#![no_std]
#![no_main]
// Test vectors: https://github.com/scogliani/ecc-test-vectors?tab=readme-ov-file
// Reference Manual: file:///C:/Users/elopezpe/OneDrive/Documentos/PhD/micro/stm32eba55cg/rm0493-multiprotocol-wireless-bluetooth-low-energy-and-ieee802154-stm32wba5xxx-arm-based-32-bit-mcus-stmicroelectronics-en.pdf
use stm32wba::stm32wba55::{self};
use {defmt_rtt as _, panic_probe as _};
use cortex_m_rt::entry;
use cortex_m::asm;
use defmt::info;
use core::{
    mem::size_of,
    ptr::{read_volatile, write_volatile},
};

const BASE: usize = 0x520C_2000;
const PKA_RAM_OFFSET: usize = 0x400; 
const RAM_BASE: usize = BASE + PKA_RAM_OFFSET;
const MODE: u8 = 0x10;
const RAM_NUM_DW: usize = 667;

// PKA RAM locations for multiplication
const OPERAND_LENGTH_OFFSET: usize = BASE + 0x408 ;
const OPERAND_A_OFFSET: usize = BASE + 0xA50;
const OPERAND_B_OFFSET: usize = BASE + 0xC68;
const MODULUS_OFFSET: usize = BASE + 0x1088;
const RESULT_OFFSET: usize = BASE + 0xE78;
const MONTGOMERY_OFFSET: usize = BASE + 0x620;

const N: [u32; 8] = [
    0xffffffff, 0x00000001, 0x00000000, 0x00000000, 
    0x00000000, 0xffffffff, 0xffffffff, 0xffffffff,
];

const A: [u32; 8] = [
    0xffffffff, 0x00000001, 0x00000000, 0x00000000, 
    0x00000000, 0xffffffff, 0xffffffff, 0xfffffffe,
];

const B: [u32; 8] = [
    0x00000000, 0x00000000, 0x00000000, 0x00000000, 
    0x00000000, 0x00000000, 0x00000000, 0x00000002,
];

const R2MODN: [u32; 8] = [
    0xFFFFFFFC, 0xFFFFFFFC, 0xFFFFFFFB, 0xFFFFFFF9, 
    0xFFFFFFFE, 0x00000003, 0x00000005, 0x00000002
];
const OPERAND_LENGTH: u32 = 8 * 32;
const WORD_LENGTH: usize = 8; //(OPERAND_LENGTH as usize)/32;    

unsafe fn write_ram(offset: usize, buf: &[u32]) {
    debug_assert_eq!(offset % 4, 0);
    debug_assert!(offset + buf.len() * size_of::<u32>() < 0x520C_33FF);
    buf.iter().rev().enumerate().for_each(|(idx, &dw)| {
        write_volatile((offset + idx * size_of::<u32>()) as *mut u32, dw)
    });
}

unsafe fn read_ram(offset: usize, buf: &mut [u32]) {
    debug_assert_eq!(offset % 4, 0);
    debug_assert!(offset + buf.len() * size_of::<u32>() < 0x520C_33FF);
    buf.iter_mut().rev().enumerate().for_each(|(idx, dw)| {
        *dw = read_volatile((offset + idx * size_of::<u32>()) as *const u32);
    });
}

unsafe fn zero_ram() {
    (0..RAM_NUM_DW)
        .into_iter()
        .for_each(|dw| unsafe { write_volatile((dw * 4 + RAM_BASE) as *mut u32, 0) });
}

#[entry]
unsafe fn main() -> ! {
    let p = stm32wba55::Peripherals::take().unwrap();
    let pka = &p.PKA;
    let clock = &p.RCC;
    let rng = &p.RNG;

    // Enable HSI as a stable clock source
    clock.rcc_cr().modify(|_, w| w
    .hseon().set_bit()
    );
    while clock.rcc_cr().read().hserdy().bit_is_clear() {
        asm::nop();
    }

    // Enable RNG clock. Select the source clock. Select the AHB clock
    clock.rcc_ccipr2().write(|w| w.rngsel().b_0x2());
    clock.rcc_ahb2enr().modify(|_, w| w.rngen().set_bit());
    while clock.rcc_ahb2enr().read().rngen().bit_is_clear() {
        asm::nop();
    }

    // Configure RNG
    // To configure, CONDRST bit is set to 1 in the same access and CONFIGLOCK remains at 0
    rng.rng_cr().write(|w| w
        .rngen().clear_bit()
        .condrst().set_bit()
        .configlock().clear_bit()
        .nistc().clear_bit()   // Hardware default values for NIST compliant RNG
        .ced().clear_bit()     // Clock error detection enabled
    );

    // First clear CONDRST while keeping RNGEN disabled
    rng.rng_cr().modify(|_, w| w
        .condrst().clear_bit()
    );

    // Then enable RNG in a separate step
    rng.rng_cr().modify(|_, w| w
        .rngen().set_bit()
        .ie().set_bit()
    );
    
    while rng.rng_sr().read().drdy().bit_is_clear() {
        asm::nop();
    }
    info!("RNG enabled successfully");

    // Enable PKA peripheral clock via RCC_AHB2ENR register
    clock.rcc_ahb2enr().modify(|_, w| w.pkaen().set_bit());

    // Reset PKA before enabling (sometimes helps with initialization)
    pka.pka_cr().modify(|_, w| w.en().clear_bit());
    for _ in 0..10 {
        asm::nop();
    }

    // Enable PKA peripheral
    pka.pka_cr().write(|w| w
        .en().set_bit()
        .mode().bits(MODE)
    );
 
    // Wait for PKA to initialize
    while pka.pka_sr().read().initok().bit_is_clear() {
        asm::nop();
    }
    info!("PKA initialized successfully!");

    // Clear any previous error flags
    pka.pka_clrfr().write(|w| w
        .addrerrfc().set_bit()
        .ramerrfc().set_bit()
        .procendfc().set_bit()
    );

    // First compute AR = A x r2modn mod n
    zero_ram();
    write_ram(OPERAND_LENGTH_OFFSET, &[OPERAND_LENGTH]);
    write_ram(OPERAND_A_OFFSET, &A);
    write_ram(OPERAND_B_OFFSET, &R2MODN);
    write_ram(MODULUS_OFFSET, &N);

    // Configure PKA operation mode and start
    info!("Starting PKA operation...");
    pka.pka_cr().modify(|_, w| w
        .mode().bits(MODE)
        .start().set_bit()  // Start the operation
    );

    // Wait for processing to complete - PROCENDF is 1 when done
    info!("Waiting for operation to complete...");
    while pka.pka_sr().read().procendf().bit_is_clear() {
        asm::nop();
    }
    info!("Operation complete!");

    // Add error checking after PKA operations
    if pka.pka_sr().read().addrerrf().bit_is_set() {
        info!("Address Error detected");
    } else if pka.pka_sr().read().ramerrf().bit_is_set() {
        info!("RAM Error detected");
    } else {
        info!("No errors");
    }
    
    // Read the result
    let mut AR = [0u32; WORD_LENGTH];
    read_ram(RESULT_OFFSET, &mut AR);
    info!("AR = A({:#X}) * R2MODN({:#X}) (mod {:#X}) = {:#X}", A, R2MODN, N, AR);
    
    // Clear the completion flag
    pka.pka_clrfr().write(|w| w.procendfc().set_bit());

    // Compute AB= AR x B mod n
    zero_ram();
    write_ram(OPERAND_LENGTH_OFFSET, &[OPERAND_LENGTH]);
    write_ram(OPERAND_A_OFFSET, &AR);
    write_ram(OPERAND_B_OFFSET, &B);
    write_ram(MODULUS_OFFSET, &N);

    // Configure PKA operation mode and start
    info!("Starting PKA operation...");
    pka.pka_cr().modify(|_, w| w
        .mode().bits(MODE)
        .start().set_bit()  // Start the operation
    );

    // Wait for processing to complete - PROCENDF is 1 when done
    info!("Waiting for operation to complete...");
    while pka.pka_sr().read().procendf().bit_is_clear() {
        asm::nop();
    }
    info!("Operation complete!");

    // Read the result
    let mut result = [0u32; WORD_LENGTH];
    read_ram(RESULT_OFFSET, &mut result);
    info!("AB = AR({:#X}) * B({:#X}) (mod {:#X}) = {:#X}", AR, B, N, result);
    
    // Clear the completion flag
    pka.pka_clrfr().write(|w| w.procendfc().set_bit());


    loop {}
}

INFO RNG enabled successfully
INFO PKA initialized successfully!
INFO Starting PKA operation...
INFO Waiting for operation to complete...
INFO Operation complete!
INFO No errors
INFO AR = A([0xFFFFFFFF, 0x1, 0x0, 0x0, 0x0, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE]) * R2MODN([0xFFFFFFFC, 0xFFFFFFFC, 0xFFFFFFFB, 0xFFFFFFF9, 0xFFFFFFFE, 0x3, 0x5, 0x2]) (mod [0xFFFFFFFF, 0x1, 0x0, 0x0, 0x0, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF]) = [0x0, 0xFFFFFFFC, 0x0, 0xFFFFFFFB, 0xFFFFFFFB, 0x0, 0xFFFFFFFB, 0xFFFFFFFC]
INFO Starting PKA operation...
INFO Waiting for operation to complete...
INFO Operation complete!
INFO AB = AR([0x0, 0xFFFFFFFC, 0x0, 0xFFFFFFFB, 0xFFFFFFFB, 0x0, 0xFFFFFFFB, 0xFFFFFFFC]) * B([0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x2]) (mod [0xFFFFFFFF, 0x1, 0x0, 0x0, 0x0, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF]) = [0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0] 

 

R2MODN has been calculated using the corresponding operation in the PKA engine for modulus N in P-256 curve.

Any help will be appreciated. Thanks!

0 REPLIES 0