cancel
Showing results for 
Search instead for 
Did you mean: 

Fastest 3P3Z (3 pole - 3 zero) controller on STM32F334

LS. B.1
Associate II

Hello!

I'm trying to write the "fastest as possible" code for a 3P3Z (3 pole, 3 zero) controller, but even with SIMD32 instructions, the C code+compiler optimizations seems to win this battle.

Here's the C code:

static __INLINE int16_t q15_cntrl3z3p(q15_3z3pStruct_TypeDef_t *controller)

{

q63_t acc; /* Accumlator */

q31_t fcurr; /* Variaveis temporarias */

// y(k) = u(k) * g0 + u(k-1) * g1 + u(k-2) * g2 + u(k-3) * g3

//  - y(k-1) * h1 - y(k-2) * h2 - y(k-3) * h3;

controller->umem[0] = controller->setpoint - controller->input;

acc = (controller->umem[0]*controller->gCoefs[0]<<controller->gShifts[0]);

acc += (controller->umem[1]*controller->gCoefs[1]<<controller->gShifts[1]);

acc += (controller->umem[2]*controller->gCoefs[2]<<controller->gShifts[2]);

acc += (controller->umem[3]*controller->gCoefs[3]<<controller->gShifts[3]);

acc -= (controller->ymem[1]*controller->hCoefs[1]<<controller->hShifts[1]);

acc -= (controller->ymem[2]*controller->hCoefs[2]<<controller->hShifts[2]);

acc -= (controller->ymem[3]*controller->hCoefs[3]<<controller->hShifts[3]);

fcurr = __SSAT(acc>>(controller->QNbase),16);

fcurr = limitador(fcurr,controller->minmax[0],controller->minmax[1]);

controller->ymem[0] = fcurr;

controller->ymem[3] = controller->ymem[2];

controller->ymem[2] = controller->ymem[1];

controller->ymem[1] = controller->ymem[0];

controller->umem[3] = controller->umem[2];

controller->umem[2] = controller->umem[1];

controller->umem[1] = controller->umem[0];

};

And Here's my Mixed ASM version ... *MAY contain bugs*

static __INLINE int16_t q15_cntrl3z3pFAST(q15_3z3pStruct_TypeDef_t *controller)

{

static uint8_t u8_init = 0;

// Mantém as variáveis que apontam para o controlador de forma estática...

static q31_t x0, x1, x2, x3;

static q31_t c0, c1, c2, c3;

q31_t *g_coeffPtr = (q31_t *)&(controller->gCoefs); // É um ponteiro para numeros em q31

q31_t *h_coeffPtr = (q31_t *)&(controller->hCoefs); // É um ponteiro para numeros em q31

q31_t *u_statePtr = (q15_t *)&(controller->umem); // É um ponteiro para números em q15

q31_t *y_statePtr = (q15_t *)&(controller->ymem); // É um ponteiro para números em q15

volatile q63_t sum0, sum1, sum2, sum3;

sum0 = sum1 = sum2 = sum3 = 0;

if (u8_init != 5)

{

u8_init = 5;

x0 = *(q31_t *)(g_coeffPtr); // x0 = 0x00020001

x1 = *(q31_t *)(g_coeffPtr+1);// x1 = 0x00040003

x2 = *(q31_t *)(h_coeffPtr); // x0 = 0x00020001

x3 = *(q31_t *)(h_coeffPtr+1);// x1 = 0x00040003

}

// Só esse bloco toma 250nS

// HAL_GPIO_WritePin(GPIOB, KEY_PB2_Pin, GPIO_PIN_SET);

//HAL_GPIO_WritePin(GPIOB, KEY_PB2_Pin, GPIO_PIN_RESET);

// Só esse bloco toma 250nS

// *****************************************

// Esse bloco ocupa 250ns para execução

// *****************************************

c0 = *(q31_t *)(u_statePtr); // c0 = 0x001e0028

c1 = *(q31_t *)(u_statePtr+1);// c1 = 0x000a0014

c2 = *(q31_t *)(y_statePtr); // c2 = 0x001e0028

c3 = *(q31_t *)(y_statePtr+1);// c3 = 0x000a0014

// *****************************************

// Esse bloco ocupa 250ns para execução

// *****************************************

// *****************************************

// Esse bloco ocupa 500ns para execução

// *****************************************

sum0 = __SMLALD(x0, c0, sum0);

  // sum0 = p0*q0 + p1*q1 + sum0;

  // sum0 = g0*u0 + g1*u1 + sum0;

sum1 = __SMLALD(x1, c1, sum0);

  // sum1 = p2*q2 + p3*q3 + sum0;

  // sum1 = g2*u2 + g3*u4 + g0*u0 + g1*u1 + sum0;

// *****************************************

// Esse bloco ocupa 500ns para execução

// *****************************************

sum2 = __SMLALD(x2, c2, sum2);

  // sum2 = h0*y0 + h1*y1 + sum1;

sum3 = __SMLALD(x3, c3, sum2);

  // sum3 = h2*y2 + h3*y3 + sum2;

  // sum3 = g2*u2 + g3*u4 + g0*u0 + g1*u1 + h2*y2 + h3*h3 + h0*y0 + h1*y1;

sum0 = __SSAT((sum1-sum3)>>15, 16);

// *****************************************

// Esse bloco ocupa 500ns para execução

// *****************************************

HAL_GPIO_WritePin(GPIOB, KEY_PB2_Pin, GPIO_PIN_RESET);

}

The C version uses abou 2.0 us , and the ASM (and not so complete version) take 2.2us (+200ns more) and also do not have the output limits... (on a F334 - 72MHz machine).

I'm trying to figure out many ways to NOT write pure ASM/THUMB/ARM, but ... any help?

Thanks.

2 REPLIES 2
AErma.2
Associate II

Just dont USE HAL GPIO

victagayun
Senior III

the point is, HAL is used on both codes and yet there is an increase of 200nS.

That is the question.

Why not use G474 instead?