Timothy Swan
Timothy Swan

Reputation: 763

CRC32 hash of python string

Using an existing C example algorithm, I want to generate the correct CRC32 hash for a string in python. However, I am receiving incorrect results. I mask the result of every operation and attempt to copy the original algorithm's logic. The C code was provided by the same website which has a webpage string hash checking tool, so it is likely to be correct.

Below is a complete Python file including C code in its comments which it attempts to mimic. All pertinent information is in the file.

P_32 = 0xEDB88320
init = 0xffffffff
_ran = True
tab32 = []

def mask32(n):
    return n & 0xffffffff

def mask8(n):
    return n & 0x000000ff

def mask1(n):
    return n & 0x00000001

def init32():
    for i in range(256):
        crc = mask32(i)
        for j in range(8):
            if (mask1(crc) == 1):
                crc = mask32(mask32(crc >> 1) ^ P_32)
            else:
                crc = mask32(crc >> 1)
        tab32.append(crc)
    global _ran
    _ran = False

def update32(crc, char):
    char = mask8(char)
    t = crc ^ char
    crc = mask32(mask32(crc >> 8) ^ tab32[mask8(t)])
    return crc

def run(string):
    if _ran:
        init32()
    crc = init
    for c in string:
        crc = update32(crc, ord(c))
    print(hex(crc)[2:].upper())

check0 = "The CRC32 of this string is 4A1C449B"
check1 = "123456789" # CBF43926
run(check0) # Produces B5E3BB64
run(check1) # Produces 340BC6D9

# Check CRC-32 on http://www.lammertbies.nl/comm/info/crc-calculation.html#intr

"""
/* http://www.lammertbies.nl/download/lib_crc.zip */

#define                 P_32        0xEDB88320L
static int              crc_tab32_init          = FALSE;
static unsigned long    crc_tab32[256];

    /*******************************************************************\
    *                                                                   *
    *   unsigned long update_crc_32( unsigned long crc, char c );       *
    *                                                                   *
    *   The function update_crc_32 calculates a  new  CRC-32  value     *
    *   based  on  the  previous value of the CRC and the next byte     *
    *   of the data to be checked.                                      *
    *                                                                   *
    \*******************************************************************/

unsigned long update_crc_32( unsigned long crc, char c ) {

    unsigned long tmp, long_c;

    long_c = 0x000000ffL & (unsigned long) c;

    if ( ! crc_tab32_init ) init_crc32_tab();

    tmp = crc ^ long_c;
    crc = (crc >> 8) ^ crc_tab32[ tmp & 0xff ];

    return crc;

}  /* update_crc_32 */

    /*******************************************************************\
    *                                                                   *
    *   static void init_crc32_tab( void );                             *
    *                                                                   *
    *   The function init_crc32_tab() is used  to  fill  the  array     *
    *   for calculation of the CRC-32 with values.                      *
    *                                                                   *
    \*******************************************************************/

static void init_crc32_tab( void ) {

    int i, j;
    unsigned long crc;

    for (i=0; i<256; i++) {

        crc = (unsigned long) i;

        for (j=0; j<8; j++) {

            if ( crc & 0x00000001L ) crc = ( crc >> 1 ) ^ P_32;
            else                     crc =   crc >> 1;
        }

        crc_tab32[i] = crc;
    }

    crc_tab32_init = TRUE;

}  /* init_crc32_tab */
"""

Upvotes: 2

Views: 2282

Answers (1)

Dom
Dom

Reputation: 1722

There's just one thing that's wrong with the current implementation and the fix is actually just one line of code to the end of your run function which is:

crc = crc ^ init

Which if added to your run function look like this:

def run(string):
    if _ran:
        init32()
    crc = init
    for c in string:
        crc = update32(crc, ord(c))
    crc = crc ^ init    
    print(hex(crc)[2:].upper())

This will give you the correct results you are expecting.The reason that this is necessary is after you are done updating the CRC32, the finalization of it is XORing it with the 0xFFFFFFFF. Since you only had the init table and update functions and not the finalize, you were one step off from the actual crc.

Another C implimentation that is a little more straightforward is this one it's a little bit easier to see the whole process. The only thing slightly obsure is the init poly ~0x0 is the same (0xFFFFFFFF).

Upvotes: 2

Related Questions