2013-03-08 01:31 AM
I've stolen some code from Clive's post here <https://my.st.com/a99fbbe2> (Thanks Clive!) and it works great for computing a CRC all at once. In other words, the following function signature.
unsigned long crc32(const unsigned char* buffer, const unsigned int length);
However, I'm porting some zlib/png code and the CRC is calculated incrementally with the following function signature:
unsigned long crc32(unsigned long crc, const unsigned char* buffer, const unsigned int length);
So, I need to be able to preload CRC->DR with the parameter crc.
Anyone know how that can be done?
#crc32-crc-stm32f4 #crc #crc32
2013-03-08 04:14 AM
The hardware is intrinsically capable of doing bursts of 32-bit words, so you could manage a stream so the hardware register is just left to accumulate.If you have multiple streams, and need to save/restore a specific context there is a math trick to advancing the CRC->DR. It's possible to reverse the polynomial and wind the machine backward to arrive at a magic value to get the next state you want.This is a parallel, small table (4-bit) implementation
https://community.st.com/0D50X00009XkZcLSAV
You could check if the register is already where you want to save some time. The code foot print of a serial (1-bit) implementation would be a bit smaller at the expense of some speed.
Edit: Fixed DEAD LINK, original post from March 8, 2013
2013-03-08 09:25 PM
Wow! A link to source code and everything. I thought my question was so obscure it didn't have much of a chance. Thanks, Clive!
Anyway, it's all working great for me now. Here's my source code in case anyone else finds it helpful.static
const
uint32_t CrcTable[16] =
{
0x00000000, 0xB2B4BCB6, 0x61A864DB, 0xD31CD86D, 0xC350C9B6, 0x71E47500, 0xA2F8AD6D, 0x104C11DB,
0x82608EDB, 0x30D4326D, 0xE3C8EA00, 0x517C56B6, 0x4130476D, 0xF384FBDB, 0x209823B6, 0x922C9F00
};
static
uint32_t ReverseCRC32(uint32_t currentCRC, uint32_t desiredCRC)
{
desiredCRC = (desiredCRC >> 4) ^ CrcTable[desiredCRC & 0x0F];
desiredCRC = (desiredCRC >> 4) ^ CrcTable[desiredCRC & 0x0F];
desiredCRC = (desiredCRC >> 4) ^ CrcTable[desiredCRC & 0x0F];
desiredCRC = (desiredCRC >> 4) ^ CrcTable[desiredCRC & 0x0F];
desiredCRC = (desiredCRC >> 4) ^ CrcTable[desiredCRC & 0x0F];
desiredCRC = (desiredCRC >> 4) ^ CrcTable[desiredCRC & 0x0F];
desiredCRC = (desiredCRC >> 4) ^ CrcTable[desiredCRC & 0x0F];
desiredCRC = (desiredCRC >> 4) ^ CrcTable[desiredCRC & 0x0F];
return
desiredCRC ^ currentCRC;
}
uint32_t Compute(
const
uint8_t* buffer,
const
size_t
length,
const
uint32_t crc)
const
{
uint32_t byteCount;
uint32_t bitCount;
uint32_t value;
//Initialize DR
if
(crc == 0)
{
CRC_ResetDR();
}
else
{
//No need to compute Reverse CRC if DR already has the value we need
uint32_t desiredCRC = __RBIT(crc ^ 0xFFFFFFFF);
//Compensate for STM32F4's CRC32 shift order
if
(CRC->DR != desiredCRC)
{
CRC->DR = ReverseCRC32(CRC->DR, desiredCRC);
}
}
//Do multiples of 4 bytes (32 bits)
byteCount = length >> 2;
while
(byteCount--)
{
value=*((uint32_t *)buffer);
CRC->DR = __RBIT(value);
//Compensate for STM32F4's CRC32 shift order
buffer += 4;
}
//Do remaining bytes
value = __RBIT(CRC->DR);
//Compensate for STM32F4's CRC32 shift order
byteCount = length & 3;
while
(byteCount--)
{
value ^= (uint32_t)*buffer++;
for
(bitCount = 0; bitCount < 8; bitCount++)
{
if
(value & 1)
{
value = (value >> 1) ^ 0xEDB88320;
}
else
{
value >>= 1;
}
}
}
value ^= 0xFFFFFFFF;
return
value;
//now the output is compatible with windows/winzip/winrar/zlib/libpng
}
2013-03-08 10:13 PM
Let's just finish up the demo by table driving the bytes
static uint32_t ReverseCRC32(uint32_t currentCRC, uint32_t desiredCRC)
{ static const uint32_t CrcTable[16] = { // For 0x04C11DB7 Reverse Polynomial 0x00000000, 0xB2B4BCB6, 0x61A864DB, 0xD31CD86D, 0xC350C9B6, 0x71E47500, 0xA2F8AD6D, 0x104C11DB, 0x82608EDB, 0x30D4326D, 0xE3C8EA00, 0x517C56B6, 0x4130476D, 0xF384FBDB, 0x209823B6, 0x922C9F00 }; desiredCRC = (desiredCRC >> 4) ^ CrcTable[desiredCRC & 0x0F]; desiredCRC = (desiredCRC >> 4) ^ CrcTable[desiredCRC & 0x0F]; desiredCRC = (desiredCRC >> 4) ^ CrcTable[desiredCRC & 0x0F]; desiredCRC = (desiredCRC >> 4) ^ CrcTable[desiredCRC & 0x0F]; desiredCRC = (desiredCRC >> 4) ^ CrcTable[desiredCRC & 0x0F]; desiredCRC = (desiredCRC >> 4) ^ CrcTable[desiredCRC & 0x0F]; desiredCRC = (desiredCRC >> 4) ^ CrcTable[desiredCRC & 0x0F]; desiredCRC = (desiredCRC >> 4) ^ CrcTable[desiredCRC & 0x0F]; return(desiredCRC ^ currentCRC); } static uint32_t FastBlockCRC32(const uint8_t* buffer, const size_t length, const uint32_t crc) { static const uint32_t CrcTable[16] = { // For 0xEDB88320 Polynomial 0x00000000,0x1DB71064,0x3B6E20C8,0x26D930AC,0x76DC4190,0x6B6B51F4,0x4DB26158,0x5005713C, 0xEDB88320,0xF00F9344,0xD6D6A3E8,0xCB61B38C,0x9B64C2B0,0x86D3D2D4,0xA00AE278,0xBDBDF21C }; // sourcer32@gmail.com while(length--) { crc = crc ^ (uint32_t)*buffer++; crc = (crc >> 4) ^ CrcTable[crc & 0x0F]; crc = (crc >> 4) ^ CrcTable[crc & 0x0F]; } return(crc); } uint32_t Compute(const uint8_t* buffer, const size_t length, const uint32_t crc) { uint32_t byteCount;uint32_t value;
//Initialize DR if (crc == 0) { CRC_ResetDR(); } else { //No need to compute Reverse CRC if DR already has the value we need uint32_t desiredCRC = __RBIT(crc ^ 0xFFFFFFFF); //Compensate for STM32F4's CRC32 shift order value = CRC->DR; if (value != desiredCRC) { CRC->DR = ReverseCRC32(value, desiredCRC); } } //Do multiples of 4 bytes (32 bits) byteCount = length >> 2; while(byteCount--) { value=*((uint32_t *)buffer); CRC->DR = __RBIT(value); //Compensate for STM32F4's CRC32 shift order buffer += 4; } //Do remaining bytes value = __RBIT(CRC->DR); //Compensate for STM32F4's CRC32 shift order value = FastBlockCRC32(buffer, length & 3, value); value ^= 0xFFFFFFFF; return value; //now the output is compatible with windows/winzip/winrar/zlib/libpng }2014-08-18 09:03 AM
Assuming you just want the result and don't specifically need to put the CRC calculator in a specific state, you don't need any tables to deal with initial state or leftover bytes:
My apologies to people allergic to C++, and please beware of the inversion in my definition of read().struct
crcc {
/*00*/
io_u32 dr;
//r>
/*04*/
u8 idr;
//rw
/*08*/
io_u32 cr;
//-x
u32 rbit( u32 x ) {
u32 y;
asm(
'' rbit %0, %1''
:
''=r''
(y) :
''r''
(x) );
return
y;
}
void
reset() { cr = 1; }
void
write( u32 data ) { dr = rbit( data ); }
u32 read() {
return
~rbit( dr ); }
u32 calc( u8 data, u32 crc ) // single byte
{
crc = ~crc ^ data;
reset();
write( ~( crc << 24 ) );
return
( crc >> 8 ) ^ read();
}
u32 calc(
size_t
nwords, u32 wdata[], u32 crc = 0 ) // wordwise
{
if
( nwords == 0 )
return
crc;
reset();
write( wdata[0] ^ crc );
for
(
size_t
i = 1; i != nwords; i++ )
write( wdata[i] );
return
read();
}
u32 calc(
size_t
nbytes, u8 data[], u32 crc = 0 )// general
{
if
( nbytes >= 4 ) {
// process 0-3 bytes to make pointer word-aligned
size_t
align = -(size_t)data & 3;
nbytes -= align;
while
( align-- )
crc = calc( *data++, crc );
// process bulk of data
size_t
nwords = nbytes >> 2;
crc = calc( nwords, (u32 *)data, crc );
data += nwords << 2;
nbytes &= 3;
}
// leftover bytes
while
( nbytes-- )
crc = calc( *data++, crc );
return
crc;
}
};
This still has a plenty of room for optimization. For example, CRCing 2 or 3 bytes can be done with a single reset/write/read sequence instead of one per byte, e.g.u32 calc( u16 data, u32 crc )
{
crc = ~crc ^ data;
reset();
write( ~( crc << 16 ) );
return
( crc >> 16 ) ^ read();
}
Also, if you want to perform a new calculation but still know the current state of the CRC engine (because you've just read the data register anyway) you can avoid a reset since
u32 old = read();
reset();
write( foo );
is equivalent to
u32 old = read();
write( foo ^ old );
I'll leave the microoptimizations as an exercise for the reader ;)
2016-07-21 09:12 AM
2016-07-21 09:30 AM
This would be awesome if it worked. I seems to return the exact same values as when I set the HW CRC to InputInversion = WORD and OutputInversion = Enabled.
Me thinks you are not using an STM32F4 part... It only has fixed and inflexible 32-bit operation.