cancel
Showing results for 
Search instead for 
Did you mean: 

Execution time changes over time, why?

NicRoberts
Senior II

STM32F411RE - NUCLEO

I'm working through a course on power optimization on STM32s & I'm genuinely confused about some results I'm getting off the board.

 I've set up TIM5 to count execution time in us like so,

 

void Timer_Init_For_Timing(void)
{
	RCC->APB1ENR |= RCC_APB1ENR_TIM5EN;

	/* Calculate TIM5 prescaler for 1MHx clock (1us tick)
	 * TIMxCLK = PCKL1 if APB1 prescaler = 1
	 * TIMxCLK = 2 * PCLK1 if APB1 prescaler > 1 */
	uint32_t pclk1_freq = SystemCoreClock; // start with HSI
	uint32_t ppre1_rcc_bits = (RCC->CFGR & RCC_CFGR_PPRE1) >> RCC_CFGR_PPRE1_Pos;

	if(ppre1_rcc_bits >= 0x4U)
	{
		uint32_t ppre1_divider = 1 << (ppre1_rcc_bits - 3);
		pclk1_freq = SystemCoreClock / ppre1_divider;
	}

	uint32_t tim_clk_input_freq = pclk1_freq;
	if(ppre1_rcc_bits >= 0x4U)
	{
		tim_clk_input_freq *= 2; // timer clock 2x PCLK1
	}

	TIM5->PSC = (tim_clk_input_freq / 1000000U) - 1;
	TIM5->ARR = 0xFFFFFFFFUL;
	TIM5->CR1 |= TIM_CR1_CEN;
}

uint32_t Timer_Get_Time_us(void)
{
	return TIM5->CNT;
}

 

And I have the following function that configures the system clock so I can take measurements of power consumption at different clock speed.

void ConfigureSystemClock(uint32_t target_sysclk_freq)
{
	uint32_t pllm = 8;
	uint32_t plln = 0;
	uint32_t pllp_field_value = 0;
	uint32_t flash_latency = FLASH_ACR_LATENCY_0WS;
	uint32_t hpre_val = RCC_CFGR_HPRE_DIV1;
	uint32_t ppre1_val = RCC_CFGR_PPRE1_DIV1;
	uint32_t ppre2_val = RCC_CFGR_PPRE2_DIV1;

	/* Enable HSI (Always available) */
	RCC->CR |= RCC_CR_HSION;
	while(!(RCC->CR & RCC_CR_HSIRDY)){}

	/* Configure Voltage Output Scaling (VOS) */
	/* Enable PWR interface clock */
	RCC->APB1ENR |= RCC_APB1ENR_PWREN;

	/* Small delay */
	for(volatile int i = 0; i < 100; i++){}

	if(target_sysclk_freq > 84000000UL)
	{
		if((PWR->CR & PWR_CR_VOS) == 0) // Check scale
		{
			PWR->CR |= PWR_CR_VOS;
//			while((PWR->CSR & PWR_CSR_VOSRDY) == 0){} // Wait for VOS
		}
	}
	else if ((target_sysclk_freq > 64000000UL) && (target_sysclk_freq <= 84000000UL))
	{
		PWR->CR &=~PWR_CR_VOS;
		PWR->CR |= (1U<<15);
//		while((PWR->CSR & PWR_CSR_VOSRDY) == 0){} // Wait for VOS
	}
	else if (target_sysclk_freq <= 64000000UL)
	{
		PWR->CR &=~PWR_CR_VOS;
		PWR->CR |= (1U<<14);
//		while((PWR->CSR & PWR_CSR_VOSRDY) == 0){} // Wait for VOS
	}


	/* Flash latency (RM0383 Table 5 p.45 - Assuming VDD 2.7-3.6V
	 * Actual HCLK is target_sysclk_freq / (value of HPRE divider)
	 * For simplicity, assuming HPRE_DIV1 for now when selecting latency */
	uint32_t hclk_for_latency = target_sysclk_freq; // assuming HPRE = DIV1
	if(hclk_for_latency <= 30000000UL){ flash_latency = FLASH_ACR_LATENCY_0WS; }
	else if(hclk_for_latency <= 60000000UL){ flash_latency = FLASH_ACR_LATENCY_1WS; }
	else if(hclk_for_latency <= 90000000UL){ flash_latency = FLASH_ACR_LATENCY_2WS; }
	else { flash_latency = FLASH_ACR_LATENCY_3WS; } // up to 100 MHz

	FLASH->ACR = FLASH_ACR_ICEN | FLASH_ACR_DCEN | FLASH_ACR_PRFTEN | flash_latency;
	while((FLASH->ACR & FLASH_ACR_LATENCY) != flash_latency){}

	/* Configure AHB, APB1, APB2 prescalers */
	hpre_val = RCC_CFGR_HPRE_DIV1; // HCLK = SYSCLK (HCLK is AHB clock)

	/* PCLK1 (APB1 clock) max 50MHz for F411 */
	if((target_sysclk_freq / 1) > 50000000)
	{
		ppre1_val = RCC_CFGR_PPRE1_DIV2; // PCLK1 = HCLK / 2
	}
	else
	{
		ppre1_val = RCC_CFGR_PPRE1_DIV1;
	}

	/* PCLK2 (APB2 clock) max 100MHz on F411 */
	ppre2_val = RCC_CFGR_PPRE2_DIV1;

	RCC->CFGR = (RCC->CFGR & ~(RCC_CFGR_HPRE | RCC_CFGR_PPRE1 | RCC_CFGR_PPRE2)) // clear old prescalers
			    | hpre_val | ppre1_val | ppre2_val; // set new prescalers


	/* Configure PLL or use HSI directly */
	if(target_sysclk_freq == SYSCLK_FREQ_HSI_16MHZ)
	{
		RCC->CFGR &=~RCC_CFGR_SW; // clear software bits & select HSI
		while((RCC->CFGR & RCC_CFGR_SWS) != RCC_CFGR_SWS_HSI){}
		if(RCC->CR & RCC_CR_PLLON)
		{
			RCC->CR &=~RCC_CR_PLLON; // if PLL on turn it off
			while(RCC->CR & RCC_CR_PLLRDY){}
		}
	}
	else
	{
		if(RCC->CR & RCC_CR_PLLON)
		{
			RCC->CR &=~RCC_CR_PLLON; // if PLL on turn it off
			while(RCC->CR & RCC_CR_PLLRDY){}
		}
		/* PLLP field (bits 17:16): 00 => /2, 01 => /4, 10 => /6, 11 => /8 */
		pllp_field_value = 0x00U;

		if(target_sysclk_freq == SYSCLK_FREQ_PLL_48MHZ){ plln = 48; }
		else if(target_sysclk_freq == SYSCLK_FREQ_PLL_72MHZ){ plln = 72; }
		else if(target_sysclk_freq == SYSCLK_FREQ_PLL_96MHZ){ plln = 96; }
		else // fall back to HSI if target not recognised for PLL
		{
			RCC->CFGR &=~RCC_CFGR_SW;
			while((RCC->CFGR & RCC_CFGR_SWS) != RCC_CFGR_SWS_HSI);
			return;
		}

		/* PLLQ: Set to valid value (eg /4 for USB if needed, max /15 for F411 */
		uint32_t pllq_val = 4;

		RCC->PLLCFGR = (RCC_PLLCFGR_PLLSRC_HSI) |
				       (pllm << RCC_PLLCFGR_PLLM_Pos) |
					   (plln << RCC_PLLCFGR_PLLN_Pos) |
					   (pllp_field_value << RCC_PLLCFGR_PLLM_Pos) |
					   (pllq_val << RCC_PLLCFGR_PLLQ_Pos);

		RCC->CR |= RCC_CR_PLLON;
		while(!(RCC->CR & RCC_CR_PLLRDY)){}

		RCC->CFGR = (RCC->CFGR & ~RCC_CFGR_SW) | RCC_CFGR_SW_PLL; // Select PLL
		while((RCC->CFGR & RCC_CFGR_SWS) != RCC_CFGR_SWS_PLL){}
	}
}

 

The task I'm timing,

void Task_Run_Computation(void)
{
	const uint32_t iterations = 1000;

	for(volatile uint32_t i = 0; i < iterations; i++)
	{
		__NOP();
	}
}

 

The full code listing,

/**
 * @file           : main.c
 */

#include <stdint.h>
#include <stdio.h>
#include "stm32f4xx.h"
#include "fpu.h"
#include "uart.h"
#include "timebase.h"
#include "bsp.h"
#include "adc.h"

#define SYSCLK_FREQ_HSI_16MHZ	16000000UL
#define SYSCLK_FREQ_PLL_48MHZ	48000000UL
#define SYSCLK_FREQ_PLL_72MHZ	72000000UL
#define SYSCLK_FREQ_PLL_96MHZ	96000000UL

//#define TARGET_SYSCLK_FREQ		SYSCLK_FREQ_HSI_16MHZ
//#define TARGET_SYSCLK_FREQ		SYSCLK_FREQ_PLL_48MHZ
//#define TARGET_SYSCLK_FREQ		SYSCLK_FREQ_PLL_72MHZ
#define TARGET_SYSCLK_FREQ		SYSCLK_FREQ_PLL_96MHZ

void ConfigureSystemClock(uint32_t target_sysclk_freq);
void Timer_Init_For_Timing(void);
uint32_t Timer_Get_Time_us(void);
void Task_Run_Computation(void);

uint32_t SystemCoreClock = TARGET_SYSCLK_FREQ;

volatile uint32_t g_task_start_time_us = 0;
volatile uint32_t g_task_end_time_us = 0;
volatile uint32_t g_task_execution_time_us = 0;


int main(void)
{
	fpu_enable();

	ConfigureSystemClock(SystemCoreClock);

	uart_debug_init();

	Timer_Init_For_Timing();

	TIM5->CNT = 0;

    /* Loop forever */
	while(1)
	{
		g_task_start_time_us = Timer_Get_Time_us();
		Task_Run_Computation();
		g_task_end_time_us = Timer_Get_Time_us();

		if(g_task_end_time_us >= g_task_start_time_us)
		{
			g_task_execution_time_us = g_task_end_time_us - g_task_start_time_us;
		}
		else // overflow
		{
			g_task_execution_time_us = (0xFFFFFFFFUL - g_task_start_time_us) + g_task_end_time_us + 1;
		}

		printf("Execution time: %ld\n\r", g_task_execution_time_us);

	}
}

void ConfigureSystemClock(uint32_t target_sysclk_freq)
{
	uint32_t pllm = 8;
	uint32_t plln = 0;
	uint32_t pllp_field_value = 0;
	uint32_t flash_latency = FLASH_ACR_LATENCY_0WS;
	uint32_t hpre_val = RCC_CFGR_HPRE_DIV1;
	uint32_t ppre1_val = RCC_CFGR_PPRE1_DIV1;
	uint32_t ppre2_val = RCC_CFGR_PPRE2_DIV1;

	/* Enable HSI (Always available) */
	RCC->CR |= RCC_CR_HSION;
	while(!(RCC->CR & RCC_CR_HSIRDY)){}

	/* Configure Voltage Output Scaling (VOS) */
	/* Enable PWR interface clock */
	RCC->APB1ENR |= RCC_APB1ENR_PWREN;

	/* Small delay */
	for(volatile int i = 0; i < 100; i++){}

	if(target_sysclk_freq > 84000000UL)
	{
		if((PWR->CR & PWR_CR_VOS) == 0) // Check scale
		{
			PWR->CR |= PWR_CR_VOS;
//			while((PWR->CSR & PWR_CSR_VOSRDY) == 0){} // Wait for VOS
		}
	}
	else if ((target_sysclk_freq > 64000000UL) && (target_sysclk_freq <= 84000000UL))
	{
		PWR->CR &=~PWR_CR_VOS;
		PWR->CR |= (1U<<15);
//		while((PWR->CSR & PWR_CSR_VOSRDY) == 0){} // Wait for VOS
	}
	else if (target_sysclk_freq <= 64000000UL)
	{
		PWR->CR &=~PWR_CR_VOS;
		PWR->CR |= (1U<<14);
//		while((PWR->CSR & PWR_CSR_VOSRDY) == 0){} // Wait for VOS
	}


	/* Flash latency (RM0383 Table 5 p.45 - Assuming VDD 2.7-3.6V
	 * Actual HCLK is target_sysclk_freq / (value of HPRE divider)
	 * For simplicity, assuming HPRE_DIV1 for now when selecting latency */
	uint32_t hclk_for_latency = target_sysclk_freq; // assuming HPRE = DIV1
	if(hclk_for_latency <= 30000000UL){ flash_latency = FLASH_ACR_LATENCY_0WS; }
	else if(hclk_for_latency <= 60000000UL){ flash_latency = FLASH_ACR_LATENCY_1WS; }
	else if(hclk_for_latency <= 90000000UL){ flash_latency = FLASH_ACR_LATENCY_2WS; }
	else { flash_latency = FLASH_ACR_LATENCY_3WS; } // up to 100 MHz

	FLASH->ACR = FLASH_ACR_ICEN | FLASH_ACR_DCEN | FLASH_ACR_PRFTEN | flash_latency;
	while((FLASH->ACR & FLASH_ACR_LATENCY) != flash_latency){}

	/* Configure AHB, APB1, APB2 prescalers */
	hpre_val = RCC_CFGR_HPRE_DIV1; // HCLK = SYSCLK (HCLK is AHB clock)

	/* PCLK1 (APB1 clock) max 50MHz for F411 */
	if((target_sysclk_freq / 1) > 50000000)
	{
		ppre1_val = RCC_CFGR_PPRE1_DIV2; // PCLK1 = HCLK / 2
	}
	else
	{
		ppre1_val = RCC_CFGR_PPRE1_DIV1;
	}

	/* PCLK2 (APB2 clock) max 100MHz on F411 */
	ppre2_val = RCC_CFGR_PPRE2_DIV1;

	RCC->CFGR = (RCC->CFGR & ~(RCC_CFGR_HPRE | RCC_CFGR_PPRE1 | RCC_CFGR_PPRE2)) // clear old prescalers
			    | hpre_val | ppre1_val | ppre2_val; // set new prescalers


	/* Configure PLL or use HSI directly */
	if(target_sysclk_freq == SYSCLK_FREQ_HSI_16MHZ)
	{
		RCC->CFGR &=~RCC_CFGR_SW; // clear software bits & select HSI
		while((RCC->CFGR & RCC_CFGR_SWS) != RCC_CFGR_SWS_HSI){}
		if(RCC->CR & RCC_CR_PLLON)
		{
			RCC->CR &=~RCC_CR_PLLON; // if PLL on turn it off
			while(RCC->CR & RCC_CR_PLLRDY){}
		}
	}
	else
	{
		if(RCC->CR & RCC_CR_PLLON)
		{
			RCC->CR &=~RCC_CR_PLLON; // if PLL on turn it off
			while(RCC->CR & RCC_CR_PLLRDY){}
		}
		/* PLLP field (bits 17:16): 00 => /2, 01 => /4, 10 => /6, 11 => /8 */
		pllp_field_value = 0x00U;

		if(target_sysclk_freq == SYSCLK_FREQ_PLL_48MHZ){ plln = 48; }
		else if(target_sysclk_freq == SYSCLK_FREQ_PLL_72MHZ){ plln = 72; }
		else if(target_sysclk_freq == SYSCLK_FREQ_PLL_96MHZ){ plln = 96; }
		else // fall back to HSI if target not recognised for PLL
		{
			RCC->CFGR &=~RCC_CFGR_SW;
			while((RCC->CFGR & RCC_CFGR_SWS) != RCC_CFGR_SWS_HSI);
			return;
		}

		/* PLLQ: Set to valid value (eg /4 for USB if needed, max /15 for F411 */
		uint32_t pllq_val = 4;

		RCC->PLLCFGR = (RCC_PLLCFGR_PLLSRC_HSI) |
				       (pllm << RCC_PLLCFGR_PLLM_Pos) |
					   (plln << RCC_PLLCFGR_PLLN_Pos) |
					   (pllp_field_value << RCC_PLLCFGR_PLLM_Pos) |
					   (pllq_val << RCC_PLLCFGR_PLLQ_Pos);

		RCC->CR |= RCC_CR_PLLON;
		while(!(RCC->CR & RCC_CR_PLLRDY)){}

		RCC->CFGR = (RCC->CFGR & ~RCC_CFGR_SW) | RCC_CFGR_SW_PLL; // Select PLL
		while((RCC->CFGR & RCC_CFGR_SWS) != RCC_CFGR_SWS_PLL){}
	}
}

void Timer_Init_For_Timing(void)
{
	RCC->APB1ENR |= RCC_APB1ENR_TIM5EN;

	/* Calculate TIM5 prescaler for 1MHx clock (1us tick)
	 * TIMxCLK = PCKL1 if APB1 prescaler = 1
	 * TIMxCLK = 2 * PCLK1 if APB1 prescaler > 1 */
	uint32_t pclk1_freq = SystemCoreClock; // start with HSI
	uint32_t ppre1_rcc_bits = (RCC->CFGR & RCC_CFGR_PPRE1) >> RCC_CFGR_PPRE1_Pos;

	if(ppre1_rcc_bits >= 0x4U)
	{
		uint32_t ppre1_divider = 1 << (ppre1_rcc_bits - 3);
		pclk1_freq = SystemCoreClock / ppre1_divider;
	}

	uint32_t tim_clk_input_freq = pclk1_freq;
	if(ppre1_rcc_bits >= 0x4U)
	{
		tim_clk_input_freq *= 2; // timer clock 2x PCLK1
	}

	TIM5->PSC = (tim_clk_input_freq / 1000000U) - 1;
	TIM5->ARR = 0xFFFFFFFFUL;
	TIM5->CR1 |= TIM_CR1_CEN;
}

uint32_t Timer_Get_Time_us(void)
{
	return TIM5->CNT;
}

void Task_Run_Computation(void)
{
	const uint32_t iterations = 1000;

	for(volatile uint32_t i = 0; i < iterations; i++)
	{
		__NOP();
	}
}

 

When I run this at my chosen frequency I get the increase in current I expect with higher frequencies. But what I did not expect was that the execution time for the first couple of minutes regardless of system frequency is 12048 to 12062. After a few minutes this switches to the values expected {16MHz: 753, 48MHz: 251, 72MHz: 167, 96MHz: 126}

Initially I thought this might be a thermal issue as the transition time appears to be shorter at higher frequencies but then it occurred to me, why doesn't that effect TIM5 as well? How come TIM5 remains unaffected?

 

So yes I'm confused. Anyone know what's happening here?

0 REPLIES 0