2024-12-23 04:39 AM - last edited on 2024-12-23 08:16 AM by Andrew Neil
Hello,
I have simple project in assembly and can't understand/explain why one time delay loop takes much longer than the other one (LEDs should be blinking one on, second off and vice versa). But one such state takes approx. 5 times more time than the other, although loops seem to be written equally demanding
Maybe I'm doing something obvious wrong. BTW, we have also spotted such behaviour also on some other projects - one loop taking much more time, although it should take same time. I've tried running the code from Flash or RAM and same happens in both situations.
Any hint, advice? Thanks...
Short description: main loop has two identical SW delay loops, in between on/off state is written to 3 variables. SysTick Handler is triggered each ms and reads the state of variables and reflects to real outputs. Green LED is on approx. 5 times longer that red one.
/*
* Main.s
*/
.syntax unified
.cpu cortex-m7
.thumb
///////////////////////////////////////////////////////////////////////////////
// Definitions
///////////////////////////////////////////////////////////////////////////////
// Definitions section. Define all the registers and
// constants here for code readability.
// Constants
.equ LEDDELAY, 6400
// For LOOPTC Software delay
// By default 64MHz internal HSI clock is enabled
// Internal loop takes N cycles
// Register Addresses
// You can find the base addresses for all peripherals from Memory Map section 2.3.2
// RM0433 on page 131. Then the offsets can be found on their relevant sections.
// RCC base address is 0x58024400
// AHB4ENR register offset is 0xE0
.equ RCC_AHB4ENR, 0x580244E0 // RCC AHB4 peripheral clock reg
// GPIOA base address is 0x58020000
.equ GPIOA_BASE, 0x58020000 // GPIOI base address)
// GPIOI base address is 0x58022000
.equ GPIOI_BASE, 0x58022000 // GPIOI base address)
// GPIOJ base address is 0x58022000
.equ GPIOJ_BASE, 0x58022400 // GPIOJ base address)
// MODER register offset is 0x00
.equ GPIOx_MODER, 0x00 // GPIOx port mode register
// ODR register offset is 0x14
.equ GPIOx_ODR, 0x14 // GPIOx output data register
// BSSR register offset is 0x18
.equ GPIOx_BSRR, 0x18 // GPIOx port set/reset register
// Values for BSRR register - pin PI13: LED is on, when GPIO is off (Red)
.equ LED2_OFF, 0x00002000 // Setting pin to 1 -> LED is off
.equ LED2_ON, 0x20000000 // Setting pin to 0 -> LED is on
// Values for BSRR register - pin PJ2: LED is on, when GPIO is off (Green)
.equ LED1_OFF, 0x00000004 // Setting pin to 1 -> LED is off
.equ LED1_ON, 0x00040000 // Setting pin to 0 -> LED is on
// Values for BSRR register - pin PA3: PA3
.equ PA3_ON, 0x00000008 // Setting pin to 1
.equ PA3_OFF, 0x00080000 // Setting pin to 0
// Vector table offset register definition
// Important for relocated Vector table on running from RAM
.equ VTOR,0xE000ED08
// SysTick Timer definitions
.equ SCS_BASE,0xe000e000
.equ SCS_SYST_CSR,0x10// Control/Status register
.equ SCS_SYST_RVR,0x14// Value to countdown from
.equ SCS_SYST_CVR,0x18// Current value
.equ SYSTICK_RELOAD_1MS, 63999 //1 msec at 64MHz ...
// Start of data section
.data
.align
LED1: .word 0 // LED1 State (Green)
LED2: .word 0 // LED2 State (Red)
PA3: .word 0 // PA3 pin State
// Start of text section
.text
.type main, %function
.global main
.align
main:
bl INIT // Priprava V/I in sistemskih naprav za kontrolo LED diod in PA3
ldr r1,=LED1
ldr r2,=LED2
ldr r3,=PA3
mov r4,#0xff // LED(Pin) On value
mov r5,#0 // LED(Pin) Off value
loop:
str r4,[r1] // Vklop LED1 diode (Green)
str r5,[r2] // Izklop LED2 diode (Red)
str r4,[r3] // Vklop PA3
// bl WRITEOUT // Prenesi na prikljucke
@ delay half cycle
mov r0,#500
ZAN1: ldr r6, =LEDDELAY
ZAN1n: subs r6, r6,#1
bne ZAN1n
subs r0,r0,#1
bne ZAN1
str r5,[r1] // Izklop LED1 diode (Green)
str r4,[r2] // Vklop LED2 diode (Red)
str r5,[r3] // Izklop PA3
// bl WRITEOUT // Prenesi na prikljucke
@ delay half cycle
mov r0,#500
ZAN2: ldr r6, =LEDDELAY
ZAN2n: subs r6, r6,#1
bne ZAN2n
subs r0,r0,#1
bne ZAN2
b loop // skok na vrstico loop:
__end: b __end
INIT:
push {r0,r1,lr}
bl INIT_IO
// If running code from FLASH comment next 3 lines!!!
ldr r1, =VTOR // Set Vector table addr. to 0x24000000
ldr r0, =0x24000000
str r0, [r1]
bl INIT_TC_PSP // Priprava SysTick časovnika s prek
pop {r0,r1,pc}
INIT_IO:
push {r5, r6, lr}
// Enable GPIOA,I,J Peripheral Clock (bit 8 in AHB4ENR register)
ldr r6, = RCC_AHB4ENR // Load peripheral clock reg address to r6
ldr r5, [r6] // Read its content to r5
orr r5, #0x00000300 // Set bits 8 and 9 to enable GPIOI,J clock
orr r5, #0x00000001 // Set bits 1 to enable GPIOA clock
str r5, [r6] // Store result in peripheral clock register
// Make GPIOA Pin3 as output pin (bits 7:6 in MODER register)
ldr r6, =GPIOA_BASE // Load GPIOA BASE address to r6
ldr r5, [r6,#GPIOx_MODER] // Read GPIOA_MODER content to r5
and r5, #0xFFFFFF3F // Clear bits 7-6 for PA3
orr r5, #0x00000040 // Write 01 to bits 7-6 for PA3
str r5, [r6] // Store result in GPIO MODER register
// Make GPIOI Pin13 as output pin (bits 27:26 in MODER register)
ldr r6, =GPIOI_BASE // Load GPIOI BASE address to r6
ldr r5, [r6,#GPIOx_MODER] // Read GPIOI_MODER content to r5
and r5, #0xF3FFFFFF // Clear bits 27-26 for P13
orr r5, #0x04000000 // Write 01 to bits 27-26 for P13
str r5, [r6] // Store result in GPIO MODER register
// Make GPIOJ Pin2 as output pin (bits 5:4 in MODER register)
ldr r6, =GPIOJ_BASE // Load GPIOJ BASE address to r6
ldr r5, [r6,#GPIOx_MODER] // Read GPIOJ_MODER content to r5
and r5, #0xFFFFFFCF // Clear bits 5-4 for P2
orr r5, #0x00000010 // Write 01 to bits 5-4 for PJ2
str r5, [r6] // Store result in GPIO MODER register
pop {r5, r6, pc}
INIT_TC_PSP:
push {r0, r1, lr}
ldr r1, =SCS_BASE
ldr r0, =SYSTICK_RELOAD_1MS
str r0, [r1, #SCS_SYST_RVR]
mov r0, #0
str r0, [r1, #SCS_SYST_CVR]
mov r0, #0b111 // Set TickInt to 1 as well
str r0, [r1, #SCS_SYST_CSR]
pop {r0, r1, pc}
.global SysTick_Handler
.section .text.SysTick_Handler,"ax",%progbits
.type SysTick_Handler, %function
SysTick_Handler:
push {r3, r4, r5, r6, lr}
// -----------------------------------
// Set LED1 from LED1 variable
ldr r3,=LED1 // Load LED1 value
ldr r4,[r3]
cmp r4,#0
beq L1ON
mov r5, #LED1_OFF
b CONT1
L1ON: mov r5, #LED1_ON
CONT1: // Set GPIOJ Pins through BSRR register
ldr r6, =GPIOJ_BASE // Load GPIOD BASE address to r6
str r5, [r6,#GPIOx_BSRR] // Write to BSRR register
// -----------------------------------
// Set LED2 from LED2 variable
ldr r3,=LED2 // Load LED1 value
ldr r4,[r3]
cmp r4,#0
beq L2ON
mov r5, #LED2_OFF
b CONT2
L2ON: mov r5, #LED2_ON
CONT2: // Set GPIOI Pins through BSRR register
ldr r6, =GPIOI_BASE // Load GPIOD BASE address to r6
str r5, [r6,#GPIOx_BSRR] // Write to BSRR register
// -----------------------------------
// Set PA3 from PA3 variable
ldr r3,=PA3 // Load PA3 value
ldr r4,[r3]
cmp r4,#0
beq L3ON
mov r5, #PA3_OFF
b CONT3
L3ON: mov r5, #PA3_ON
CONT3: // Set GPIOA Pins through BSRR register
ldr r6, =GPIOA_BASE // Load GPIOD BASE address to r6
str r5, [r6,#GPIOx_BSRR] // Write to BSRR register
RET: pop {r3, r4, r5, r6, pc }
2024-12-23 07:30 AM
When checking timing, drive the pins directly so you can scope them. I'm not sure of the value of establishing different beat frequencies from setting variables, and then actioning them on a 1 KHz tick.
You can measure cycles via DWT CYCCNT
Check alignments of branch targets.
Perhaps put the delay in subroutine, to confirm that the same code in the same location has consistent behaviour.
2024-12-24 07:06 AM
Hello,
thanks for tips.
Can I ask for more info about "alignments of branch targets"?
I've extended the code with CYCCNT measurements, but I guess I'm doing something wrong, because it measures only for the first time, then it seems to repeat same value.
Is there any more detailed docs about DWT counters? I'm also interested in other counters for pipeline performance. I have never suceeded to read others - only CYCCNT and only first time in the code...
Thanks.
/*
* Main.s
*/
.syntax unified
.cpu cortex-m7
.thumb
///////////////////////////////////////////////////////////////////////////////
// Definitions
///////////////////////////////////////////////////////////////////////////////
// Definitions section. Define all the registers and
// constants here for code readability.
// Constants
.equ LEDDELAY, 64000
// For LOOPTC Software delay
// By default 64MHz internal HSI clock is enabled
// Internal loop takes N cycles
// Register Addresses
// You can find the base addresses for all peripherals from Memory Map section 2.3.2
// RM0433 on page 131. Then the offsets can be found on their relevant sections.
// RCC base address is 0x58024400
// AHB4ENR register offset is 0xE0
.equ RCC_AHB4ENR, 0x580244E0 // RCC AHB4 peripheral clock reg
// GPIOA base address is 0x58020000
.equ GPIOA_BASE, 0x58020000 // GPIOI base address)
// GPIOI base address is 0x58022000
.equ GPIOI_BASE, 0x58022000 // GPIOI base address)
// GPIOJ base address is 0x58022000
.equ GPIOJ_BASE, 0x58022400 // GPIOJ base address)
// MODER register offset is 0x00
.equ GPIOx_MODER, 0x00 // GPIOx port mode register
// ODR register offset is 0x14
.equ GPIOx_ODR, 0x14 // GPIOx output data register
// BSSR register offset is 0x18
.equ GPIOx_BSRR, 0x18 // GPIOx port set/reset register
// Values for BSRR register - pin PI13: LED is on, when GPIO is off (Red)
.equ LED2_OFF, 0x00002000 // Setting pin to 1 -> LED is off
.equ LED2_ON, 0x20000000 // Setting pin to 0 -> LED is on
// Values for BSRR register - pin PJ2: LED is on, when GPIO is off (Green)
.equ LED1_OFF, 0x00000004 // Setting pin to 1 -> LED is off
.equ LED1_ON, 0x00040000 // Setting pin to 0 -> LED is on
// Values for BSRR register - pin PA3: PA3
.equ PA3_ON, 0x00000008 // Setting pin to 1
.equ PA3_OFF, 0x00080000 // Setting pin to 0
// Vector table offset register definition
// Important for relocated Vector table on running from RAM
.equ VTOR,0xE000ED08
// SysTick Timer definitions
.equ SCS_BASE,0xe000e000
.equ SCS_SYST_CSR,0x10// Control/Status register
.equ SCS_SYST_RVR,0x14// Value to countdown from
.equ SCS_SYST_CVR,0x18// Current value
.equ SYSTICK_RELOAD_1MS, 63999 //1 msec at 64MHz ...
// Register Addresses
.equ DWT_BASE, 0xE0001000 // DWT Base address
.equ DWT_CTRL, 0x00 // DWT_CTRL reg (RM0433, pp.3209)
.equ DWT_CYCCNT, 0x04 // increments on each clock cycle when the processor is not halted in debug state.
.equ DWT_CPICNT, 0x08 // additional cycles required to execute multi-cycle instructions, and instruction fetch stalls
.equ DWT_EXCCNT, 0x0C // count the total cycles spent in interrupt processing (cycles spent performing exception entry and exit procedures)
.equ DWT_SLPCNT, 0x10 // count the total number of cycles during which the processor is sleeping (cycles spent sleeping)
.equ DWT_LSUCNT, 0x14 // counts the total number of cycles that the processor is processing an LSU operation (cycles spent waiting for loads and stores to complete)
// For example, an LDR that takes two cycles to complete increments this counter one cycle.
// Equivalently, an LDR that stalls for two cycles (and so takes four cycles), increments counter three times.
.equ DWT_FOLDCNT, 0x18 // count the total number of folded instructions (cycles saved by instructions which execute in zero cycles)
// This counts 1 for each instruction that takes 0 cycles.
// If the processor configuration includes the DWT profiling counters, the instruction count can be calculated as:
// instructions executed = DWT_CYCCNT - DWT_CPICNT - DWT_EXCCNT - DWT_SLEEPCNT - DWT_LSUCNT + DWT_FOLDCNT
.equ DWT_LAR, 0xFB0 // DWT_LAR DWT_LAR = 0xC5ACCE55; // unlock (CM7)
.equ DEMCR, 0xE000EDFC // SCB_DEMCR |= 0x01000000;
// Start of data section
.data
.align
LED1: .word 0 // LED1 State (Green)
LED2: .word 0 // LED2 State (Red)
PA3: .word 0 // PA3 pin State
// Start of text section
.text
.type main, %function
.global main
.align
main:
bl INIT // Priprava V/I in sistemskih naprav za kontrolo LED diod in PA3
bl INIT_CNT
ldr r0, =DWT_BASE
ldr r1,=LED1
ldr r2,=LED2
ldr r3,=PA3
mov r4,#0xff // LED(Pin) On value
mov r5,#0 // LED(Pin) Off value
loop:
str r4,[r1] // Vklop LED1 diode (Green)
str r5,[r2] // Izklop LED2 diode (Red)
str r4,[r3] // Vklop PA3
// bl WRITEOUT // Prenesi na prikljucke
bl INIT_CNT
bl RESET_CNT
bl ENABLE_CNT
// Read DWT Counter before value
ldr r8, [r0,#DWT_CYCCNT]
@ delay half cycle
mov r0,#500
ZAN1: ldr r6, =LEDDELAY
ZAN1n: subs r6, r6,#1
bne ZAN1n
subs r0,r0,#1
bne ZAN1
// Read DWT Counter after value
ldr r10, [r0,#DWT_CYCCNT]
sub r8,r10,r8 // Difference in r0
str r5,[r1] // Izklop LED1 diode (Green)
str r4,[r2] // Vklop LED2 diode (Red)
str r5,[r3] // Izklop PA3
// bl WRITEOUT // Prenesi na prikljucke
bl INIT_CNT
bl RESET_CNT
bl ENABLE_CNT
// Read DWT Counter before value
ldr r9, [r0,#DWT_CYCCNT]
@ delay half cycle
mov r0,#500
ZAN2: ldr r6, =LEDDELAY
ZAN2n: subs r6, r6,#1
bne ZAN2n
subs r0,r0,#1
bne ZAN2
// Read DWT Counter after value
ldr r10, [r0,#DWT_CYCCNT]
sub r11,r10,r9 // Difference in r0
b loop // skok na vrstico loop:
__end: b __end
INIT:
push {r0,r1,lr}
bl INIT_IO
// If running code from FLASH comment next 3 lines!!!
ldr r1, =VTOR // Set Vector table addr. to 0x24000000
ldr r0, =0x24000000
str r0, [r1]
bl INIT_TC_PSP // Priprava SysTick časovnika s prek
pop {r0,r1,pc}
INIT_IO:
push {r5, r6, lr}
// Enable GPIOA,I,J Peripheral Clock (bit 8 in AHB4ENR register)
ldr r6, = RCC_AHB4ENR // Load peripheral clock reg address to r6
ldr r5, [r6] // Read its content to r5
orr r5, #0x00000300 // Set bits 8 and 9 to enable GPIOI,J clock
orr r5, #0x00000001 // Set bits 1 to enable GPIOA clock
str r5, [r6] // Store result in peripheral clock register
// Make GPIOA Pin3 as output pin (bits 7:6 in MODER register)
ldr r6, =GPIOA_BASE // Load GPIOA BASE address to r6
ldr r5, [r6,#GPIOx_MODER] // Read GPIOA_MODER content to r5
and r5, #0xFFFFFF3F // Clear bits 7-6 for PA3
orr r5, #0x00000040 // Write 01 to bits 7-6 for PA3
str r5, [r6] // Store result in GPIO MODER register
// Make GPIOI Pin13 as output pin (bits 27:26 in MODER register)
ldr r6, =GPIOI_BASE // Load GPIOI BASE address to r6
ldr r5, [r6,#GPIOx_MODER] // Read GPIOI_MODER content to r5
and r5, #0xF3FFFFFF // Clear bits 27-26 for P13
orr r5, #0x04000000 // Write 01 to bits 27-26 for P13
str r5, [r6] // Store result in GPIO MODER register
// Make GPIOJ Pin2 as output pin (bits 5:4 in MODER register)
ldr r6, =GPIOJ_BASE // Load GPIOJ BASE address to r6
ldr r5, [r6,#GPIOx_MODER] // Read GPIOJ_MODER content to r5
and r5, #0xFFFFFFCF // Clear bits 5-4 for P2
orr r5, #0x00000010 // Write 01 to bits 5-4 for PJ2
str r5, [r6] // Store result in GPIO MODER register
pop {r5, r6, pc}
INIT_TC_PSP:
push {r0, r1, lr}
ldr r1, =SCS_BASE
ldr r0, =SYSTICK_RELOAD_1MS
str r0, [r1, #SCS_SYST_RVR]
mov r0, #0
str r0, [r1, #SCS_SYST_CVR]
mov r0, #0b111 // Set TickInt to 1 as well
str r0, [r1, #SCS_SYST_CSR]
pop {r0, r1, pc}
.global SysTick_Handler
.section .text.SysTick_Handler,"ax",%progbits
.type SysTick_Handler, %function
SysTick_Handler:
push {r3, r4, r5, r6, lr}
// -----------------------------------
// Set LED1 from LED1 variable
ldr r3,=LED1 // Load LED1 value
ldr r4,[r3]
cmp r4,#0
beq L1ON
mov r5, #LED1_OFF
b CONT1
L1ON: mov r5, #LED1_ON
CONT1: // Set GPIOJ Pins through BSRR register
ldr r6, =GPIOJ_BASE // Load GPIOD BASE address to r6
str r5, [r6,#GPIOx_BSRR] // Write to BSRR register
// -----------------------------------
// Set LED2 from LED2 variable
ldr r3,=LED2 // Load LED1 value
ldr r4,[r3]
cmp r4,#0
beq L2ON
mov r5, #LED2_OFF
b CONT2
L2ON: mov r5, #LED2_ON
CONT2: // Set GPIOI Pins through BSRR register
ldr r6, =GPIOI_BASE // Load GPIOD BASE address to r6
str r5, [r6,#GPIOx_BSRR] // Write to BSRR register
// -----------------------------------
// Set PA3 from PA3 variable
ldr r3,=PA3 // Load PA3 value
ldr r4,[r3]
cmp r4,#0
beq L3ON
mov r5, #PA3_OFF
b CONT3
L3ON: mov r5, #PA3_ON
CONT3: // Set GPIOA Pins through BSRR register
ldr r6, =GPIOA_BASE // Load GPIOD BASE address to r6
str r5, [r6,#GPIOx_BSRR] // Write to BSRR register
RET: pop {r3, r4, r5, r6, pc }
INIT_CNT:
push {r0-r2, lr}
// Added in 2024 :
ldr r1,=DWT_BASE
ldr r0,=0xC5ACCE55
str r0,[r1,#DWT_LAR] // *DWT_LAR = 0xC5ACCE55; // unlock (CM7)
ldr r1,=DEMCR
ldr r0,[r1]
orr r0,r0,#0x01000000
str r0,[r1] // *SCB_DEMCR |= 0x01000000;
// End: Added in 2024 :
mov r0,#0
ldr r1, =DWT_BASE
// Disable DWT Counters
ldr r2, [r1,#DWT_CTRL]
bic r2,r2,#1 // Disabling CYCCNTENA bit
str r2, [r1,#DWT_CTRL]
// Reset DWT Counters
str r0, [r1,#DWT_CYCCNT]
str r0, [r1,#DWT_CPICNT]
str r0, [r1,#DWT_EXCCNT]
str r0, [r1,#DWT_SLPCNT]
str r0, [r1,#DWT_LSUCNT]
str r0, [r1,#DWT_FOLDCNT]
pop {r0-r2, pc}
RESET_CNT:
push {r0-r2, lr}
mov r0,#0
ldr r1, =DWT_BASE
// Disable DWT Counters
ldr r2, [r1,#DWT_CTRL]
bic r2,r2,#1 // Disabling CYCCNTENA bit
str r2, [r1,#DWT_CTRL]
// Reset DWT Counters
str r0, [r1,#DWT_CYCCNT]
str r0, [r1,#DWT_CPICNT]
str r0, [r1,#DWT_EXCCNT]
str r0, [r1,#DWT_SLPCNT]
str r0, [r1,#DWT_LSUCNT]
str r0, [r1,#DWT_FOLDCNT]
pop {r0-r2, pc}
ENABLE_CNT:
push {r0-r2, lr}
ldr r0,=0xC5ACCE55
str r0,[r1,#DWT_LAR] // *DWT_LAR = 0xC5ACCE55; // unlock (CM7)
ldr r1, =DWT_BASE
// Enable DWT Counters
ldr r2, [r1,#DWT_CTRL]
orr r2,r2,#1 // Enabling CYCCNTENA bit
str r2, [r1,#DWT_CTRL]
pop {r0-r2, pc}
2024-12-27 06:21 AM
Hello,
I've found some errors in my code, but still cannot explain weird behaviour.
If I uncomment two lines of reading CYCCNT counter before entering SW delay nested loop (there are two such loops in program), then both LEDs blink for the same time.
If I comment thoise sections, green LED is on much longer (aprox. 8x) than red LED (which stays in the same time interval).
I really can't understand or explain such behaviour?
Also, I've managed to activate also other counters and get surprising results.
I've measured 32 050 210 cycles on second delay loop and have calculated number of executed instructions according to formula:
instructions executed = DWT_CYCCNT - DWT_CPICNT - DWT_EXCCNT - DWT_SLEEPCNT - DWT_LSUCNT + DWT_FOLDCNT
and get 32 049 740 instructions, which is approx. CPI of 1.
But if I calculate briefly, the number in one nested delay loop should be above 64 000 000.
Is this difference because of two-issue pipeline?
Anyone has an idea what might be wrong?
Thanks in advance.
I'm including corrected code :
/*
* Main.s
*
* Comment: both time delays appear equal if
* at least one of CYCCNT reading two lines codes are uncommented:
* Read DWT Counter before value 1 or Read DWT Counter before value 2.
*
* Otherwise: Green LED is switched on much more time that Red LED.
*/
.syntax unified
.cpu cortex-m7
.thumb
///////////////////////////////////////////////////////////////////////////////
// Definitions
///////////////////////////////////////////////////////////////////////////////
// Definitions section. Define all the registers and
// constants here for code readability.
// Constants
.equ LEDDELAY, 64000
// For LOOPTC Software delay
// By default 64MHz internal HSI clock is enabled
// Internal loop takes N cycles
// Register Addresses
// You can find the base addresses for all peripherals from Memory Map section 2.3.2
// RM0433 on page 131. Then the offsets can be found on their relevant sections.
// RCC base address is 0x58024400
// AHB4ENR register offset is 0xE0
.equ RCC_AHB4ENR, 0x580244E0 // RCC AHB4 peripheral clock reg
// GPIOA base address is 0x58020000
.equ GPIOA_BASE, 0x58020000 // GPIOI base address)
// GPIOI base address is 0x58022000
.equ GPIOI_BASE, 0x58022000 // GPIOI base address)
// GPIOJ base address is 0x58022000
.equ GPIOJ_BASE, 0x58022400 // GPIOJ base address)
// MODER register offset is 0x00
.equ GPIOx_MODER, 0x00 // GPIOx port mode register
// ODR register offset is 0x14
.equ GPIOx_ODR, 0x14 // GPIOx output data register
// BSSR register offset is 0x18
.equ GPIOx_BSRR, 0x18 // GPIOx port set/reset register
// Values for BSRR register - pin PI13: LED is on, when GPIO is off (Red)
.equ LED2_OFF, 0x00002000 // Setting pin to 1 -> LED is off
.equ LED2_ON, 0x20000000 // Setting pin to 0 -> LED is on
// Values for BSRR register - pin PJ2: LED is on, when GPIO is off (Green)
.equ LED1_OFF, 0x00000004 // Setting pin to 1 -> LED is off
.equ LED1_ON, 0x00040000 // Setting pin to 0 -> LED is on
// Values for BSRR register - pin PA3: PA3
.equ PA3_ON, 0x00000008 // Setting pin to 1
.equ PA3_OFF, 0x00080000 // Setting pin to 0
// Vector table offset register definition
// Important for relocated Vector table on running from RAM
.equ VTOR,0xE000ED08
// SysTick Timer definitions
.equ SCS_BASE,0xe000e000
.equ SCS_SYST_CSR,0x10// Control/Status register
.equ SCS_SYST_RVR,0x14// Value to countdown from
.equ SCS_SYST_CVR,0x18// Current value
.equ SYSTICK_RELOAD_1MS, 63999 //1 msec at 64MHz ...
// Register Addresses
.equ DWT_BASE, 0xE0001000 // DWT Base address
.equ DWT_CTRL, 0x00 // DWT_CTRL reg (RM0433, pp.3209)
.equ DWT_CYCCNT, 0x04 // increments on each clock cycle when the processor is not halted in debug state.
.equ DWT_CPICNT, 0x08 // additional cycles required to execute multi-cycle instructions, and instruction fetch stalls
.equ DWT_EXCCNT, 0x0C // count the total cycles spent in interrupt processing (cycles spent performing exception entry and exit procedures)
.equ DWT_SLPCNT, 0x10 // count the total number of cycles during which the processor is sleeping (cycles spent sleeping)
.equ DWT_LSUCNT, 0x14 // counts the total number of cycles that the processor is processing an LSU operation (cycles spent waiting for loads and stores to complete)
// For example, an LDR that takes two cycles to complete increments this counter one cycle.
// Equivalently, an LDR that stalls for two cycles (and so takes four cycles), increments counter three times.
.equ DWT_FOLDCNT, 0x18 // count the total number of folded instructions (cycles saved by instructions which execute in zero cycles)
// This counts 1 for each instruction that takes 0 cycles.
.equ DWT_CTRL_ENABLE_CNTs, 0x003f0001 // Enable bits 16-21 and 1
// If the processor configuration includes the DWT profiling counters, the instruction count can be calculated as:
// instructions executed = DWT_CYCCNT - DWT_CPICNT - DWT_EXCCNT - DWT_SLEEPCNT - DWT_LSUCNT + DWT_FOLDCNT
.equ DWT_LAR, 0xFB0 // DWT_LAR DWT_LAR = 0xC5ACCE55; // unlock (CM7)
.equ DEMCR, 0xE000EDFC // SCB_DEMCR |= 0x01000000;
// Start of data section
.data
.align
LED1: .word 0 // LED1 State (Green)
LED2: .word 0 // LED2 State (Red)
PA3: .word 0 // PA3 pin State
// Start of text section
.text
.type main, %function
.global main
.align
main:
bl INIT // Priprava V/I in sistemskih naprav za kontrolo LED diod in PA3
//bl INIT_CNT
//bl RESET_CNT
ldr r1,=LED1
ldr r2,=LED2
ldr r3,=PA3
mov r4,#0xff // LED(Pin) On value
mov r5,#0 // LED(Pin) Off value
loop:
str r4,[r1] // Izklop LED1 diode (Green)
str r5,[r2] // Vklop LED2 diode (Red)
str r4,[r3] // Vklop PA3 High
// bl WRITEOUT // Prenesi na prikljucke
//bl RESET_CNT
// Read DWT Counter before value 1
// Uncommenting following 2 lines code works (both delays are same,
// otherwise one delay takes much longer ???
//ldr r0, =DWT_BASE
//ldr r8, [r0,#DWT_CYCCNT]
@ delay half cycle
mov r0,#500
ZAN1: ldr r6, =LEDDELAY
ZAN1n: subs r6, r6,#1
bne ZAN1n
subs r0,r0,#1
bne ZAN1
// Read DWT Counter after value
//ldr r0, =DWT_BASE
//ldr r10, [r0,#DWT_CYCCNT]
//sub r8,r10,r8 // Difference in r8
// Calculate and read other counters
//mov r0,r8
//bl CALC_CNT
//mov r11,r0 // Number of instr. in r11
str r5,[r1] // Vklop LED1 diode (Green)
str r4,[r2] // Izklop LED2 diode (Red)
str r5,[r3] // Izklop PA3 Low
// bl WRITEOUT // Prenesi na prikljucke
// bl RESET_CNT
// Read DWT Counter before value 2
// Uncommenting following 2 lines code works (both delays are same,
// otherwise one delay takes much longer ???
ldr r0, =DWT_BASE
ldr r9, [r0,#DWT_CYCCNT]
@ delay half cycle
mov r0,#500
ZAN2: ldr r6, =LEDDELAY
ZAN2n: subs r6, r6,#1
bne ZAN2n
subs r0,r0,#1
bne ZAN2
// Read DWT Counter after value
// ldr r0, =DWT_BASE
// ldr r10, [r0,#DWT_CYCCNT]
// sub r10,r10,r9 // Difference in r10
// Calculate and read other counters
// mov r0,r10
// bl CALC_CNT
// mov r12,r0 // Number of instr. in r12
b loop // skok na vrstico loop:
__end: b __end
INIT:
push {r0,r1,lr}
bl INIT_IO
// If running code from FLASH comment next 3 lines!!!
ldr r1, =VTOR // Set Vector table addr. to 0x24000000
ldr r0, =0x24000000
str r0, [r1]
bl INIT_TC_PSP // Priprava SysTick časovnika s prek
pop {r0,r1,pc}
INIT_IO:
push {r5, r6, lr}
// Enable GPIOA,I,J Peripheral Clock (bit 8 in AHB4ENR register)
ldr r6, = RCC_AHB4ENR // Load peripheral clock reg address to r6
ldr r5, [r6] // Read its content to r5
orr r5, #0x00000300 // Set bits 8 and 9 to enable GPIOI,J clock
orr r5, #0x00000001 // Set bits 1 to enable GPIOA clock
str r5, [r6] // Store result in peripheral clock register
// Make GPIOA Pin3 as output pin (bits 7:6 in MODER register)
ldr r6, =GPIOA_BASE // Load GPIOA BASE address to r6
ldr r5, [r6,#GPIOx_MODER] // Read GPIOA_MODER content to r5
and r5, #0xFFFFFF3F // Clear bits 7-6 for PA3
orr r5, #0x00000040 // Write 01 to bits 7-6 for PA3
str r5, [r6] // Store result in GPIO MODER register
// Make GPIOI Pin13 as output pin (bits 27:26 in MODER register)
ldr r6, =GPIOI_BASE // Load GPIOI BASE address to r6
ldr r5, [r6,#GPIOx_MODER] // Read GPIOI_MODER content to r5
and r5, #0xF3FFFFFF // Clear bits 27-26 for P13
orr r5, #0x04000000 // Write 01 to bits 27-26 for P13
str r5, [r6] // Store result in GPIO MODER register
// Make GPIOJ Pin2 as output pin (bits 5:4 in MODER register)
ldr r6, =GPIOJ_BASE // Load GPIOJ BASE address to r6
ldr r5, [r6,#GPIOx_MODER] // Read GPIOJ_MODER content to r5
and r5, #0xFFFFFFCF // Clear bits 5-4 for P2
orr r5, #0x00000010 // Write 01 to bits 5-4 for PJ2
str r5, [r6] // Store result in GPIO MODER register
pop {r5, r6, pc}
INIT_TC_PSP:
push {r0, r1, lr}
ldr r1, =SCS_BASE
ldr r0, =SYSTICK_RELOAD_1MS
str r0, [r1, #SCS_SYST_RVR]
mov r0, #0
str r0, [r1, #SCS_SYST_CVR]
mov r0, #0b111 // Set TickInt to 1 as well
str r0, [r1, #SCS_SYST_CSR]
pop {r0, r1, pc}
.global SysTick_Handler
.section .text.SysTick_Handler,"ax",%progbits
.type SysTick_Handler, %function
SysTick_Handler:
push {r3, r4, r5, r6, lr}
// -----------------------------------
// Set LED1 from LED1 variable
ldr r3,=LED1 // Load LED1 value
ldr r4,[r3]
cmp r4,#0
beq L1ON
mov r5, #LED1_OFF
b CONT1
L1ON: mov r5, #LED1_ON
CONT1: // Set GPIOJ Pins through BSRR register
ldr r6, =GPIOJ_BASE // Load GPIOD BASE address to r6
str r5, [r6,#GPIOx_BSRR] // Write to BSRR register
// -----------------------------------
// Set LED2 from LED2 variable
ldr r3,=LED2 // Load LED1 value
ldr r4,[r3]
cmp r4,#0
beq L2ON
mov r5, #LED2_OFF
b CONT2
L2ON: mov r5, #LED2_ON
CONT2: // Set GPIOI Pins through BSRR register
ldr r6, =GPIOI_BASE // Load GPIOD BASE address to r6
str r5, [r6,#GPIOx_BSRR] // Write to BSRR register
// -----------------------------------
// Set PA3 from PA3 variable
ldr r3,=PA3 // Load PA3 value
ldr r4,[r3]
cmp r4,#0
beq L3ON
mov r5, #PA3_OFF
b CONT3
L3ON: mov r5, #PA3_ON
CONT3: // Set GPIOA Pins through BSRR register
ldr r6, =GPIOA_BASE // Load GPIOD BASE address to r6
str r5, [r6,#GPIOx_BSRR] // Write to BSRR register
RET: pop {r3, r4, r5, r6, pc }
INIT_CNT:
push {r0-r2, lr}
// Added in 2024 (but it seems not needed for H7):
// ldr r1,=DWT_BASE
// ldr r0,=0xC5ACCE55
// str r0,[r1,#DWT_LAR] // *DWT_LAR = 0xC5ACCE55; // unlock (CM7)
// bit [24] TRCENA Global enable for all DWT and ITM features:
// 0 = DWT and ITM blocks disabled.
// 1 = DWT and ITM blocks enabled.
ldr r1,=DEMCR
ldr r0,[r1]
orr r0,r0,#0x01000000
str r0,[r1] // *SCB_DEMCR |= 0x01000000;
// End: Added in 2024 :
ldr r1, =DWT_BASE
// Disable DWT Counters
ldr r2, [r1,#DWT_CTRL]
// bic r2,r2,#1 // Disabling CYCCNTENA bit
ldr r0,=DWT_CTRL_ENABLE_CNTs // Mask for enabling all Counters bits
bic r2,r2,r0 // Disabling all counter bits
str r2, [r1,#DWT_CTRL]
// Reset DWT Counters
mov r0,#0
str r0, [r1,#DWT_CYCCNT]
str r0, [r1,#DWT_CPICNT]
str r0, [r1,#DWT_EXCCNT]
str r0, [r1,#DWT_SLPCNT]
str r0, [r1,#DWT_LSUCNT]
str r0, [r1,#DWT_FOLDCNT]
pop {r0-r2, pc}
RESET_CNT:
push {r0-r2, lr}
ldr r1, =DWT_BASE
// Disable DWT Counters
ldr r2, [r1,#DWT_CTRL]
// bic r2,r2,#1 // Disabling CYCCNTENA bit
ldr r0,=DWT_CTRL_ENABLE_CNTs // Enabling all Counters bits
bic r2,r2,r0 // Disabling all counter bits
str r2, [r1,#DWT_CTRL]
// Reset DWT Counters
mov r0,#0
str r0, [r1,#DWT_CYCCNT]
str r0, [r1,#DWT_CPICNT]
str r0, [r1,#DWT_EXCCNT]
str r0, [r1,#DWT_SLPCNT]
str r0, [r1,#DWT_LSUCNT]
str r0, [r1,#DWT_FOLDCNT]
bl ENABLE_CNT
pop {r0-r2, pc}
CALC_CNT: // DWT Counter is in r0
push {r1-r6,r8, lr}
ldr r1, =DWT_BASE
// Disable DWT Counters
ldr r2, [r1,#DWT_CTRL]
// bic r2,r2,#1 // Disabling CYCCNTENA bit
ldr r3,=DWT_CTRL_ENABLE_CNTs // Enabling all Counters bits
bic r2,r2,r3 // Disabling all counter bits
str r2, [r1,#DWT_CTRL]
// instructions executed = DWT_CYCCNT - DWT_CPICNT - DWT_EXCCNT - DWT_SLEEPCNT - DWT_LSUCNT + DWT_FOLDCNT
// Read other DWT Counters
ldr r2, [r1,#DWT_CPICNT]
sub r8,r0,r2
ldr r3, [r1,#DWT_EXCCNT]
sub r8,r8,r3
ldr r4, [r1,#DWT_SLPCNT]
sub r8,r8,r4
ldr r5, [r1,#DWT_LSUCNT]
sub r8,r8,r5
ldr r6, [r1,#DWT_FOLDCNT]
add r8,r8,r6
// r8 contains number of instructions
mov r0,r8
pop {r1-r6,r8, pc}
ENABLE_CNT:
push {r0-r2, lr}
ldr r1, =DWT_BASE
// Enable DWT Counters
ldr r2, [r1,#DWT_CTRL]
// orr r2,r2,#1 // Enabling CYCCNTENA bit
ldr r0,=DWT_CTRL_ENABLE_CNTs // Enabling all Counters bits
orr r2,r2,r0
str r2, [r1,#DWT_CTRL]
pop {r0-r2, pc}