learner
learner

Reputation: 123

Code and decode variable length integer in binary format efficiently

I need to store integers in range (0-50000000) in binary file and decode them later. To save space I am storing number of bytes required to decode the integer in first 2 bits of first byte i.e 01XXXXXX refers 2 bytes are required to save the number.I was facing issue in the implementation.The number I am getting after decoding is not correct. Here is my sample code -

int main()
{
    FILE *input = NULL,  
            *output = NULL;

    output = fopen("sample.bin","wb+");

    unsigned int num =  32594; //(this number would come from input file)
    char buff_ts[4];
    sprintf(buff_ts,"%d",num);
    setBitAt(buff_ts, sizeof(buff_ts), 23, 1); // set first two bits
    fwrite(buff_ts,1,sizeof(buff_ts),output);
    fclose(output);

    input = fopen("sample.bin", "rb");

    int diff;
    char buff[1];
    fread(buff,1,1,input);
    char buff_copy = buff[0];
    int temp = atoi(buff);

    int more_bytes_to_read = (temp>>6); // read first 2 bits
    buff_copy = buff_copy & ((1<<6)-1); // reset first 2 bits

    if(more_bytes_to_read==0) // if no more bytes to read
    {
        diff = buff_copy;
    }
    else
    {
        char extra_buff[more_bytes_to_read];
        fread(extra_buff,1,sizeof(extra_buff),input); // read extra bytes
        char num_buf[more_bytes_to_read+1];
        num_buf[0] = buff_copy;  // copy prev read buffer
        for(int i=1;i<=more_bytes_to_read;i++)
        {
            num_buf[i] = extra_buff[i-1];
        }
        diff = atoi(num_buf);
    }
        cout<<diff<<endl;

        return 0;
}

Upvotes: 0

Views: 473

Answers (1)

Henrik Carlqvist
Henrik Carlqvist

Reputation: 1168

#include <stdio.h>
#include <stdint.h>

int little_endian(void)
{
   uint16_t t=0x01;
   char *p = (char *)&t;

   return (*p > *(p+1));   
}

uint32_t swap_bytes(uint32_t i)
{
   uint32_t o;
   char *p = (char *)&i;
   char *q = (char *)&o;

   q[0]=p[3];
   q[1]=p[2];
   q[2]=p[1];
   q[3]=p[0];

   return o;
}

uint32_t fix_endian(uint32_t i)
{
   if(little_endian())
      return swap_bytes(i);
   else
      return i;
}

int encode_num(uint32_t num, char *buf)
{
   int extra_bytes_needed;
   uint32_t *p = (uint32_t *) buf;
   if(num <= 0x3f)
      extra_bytes_needed=0;
   else if(num <= 0x3fff)
      extra_bytes_needed=1;
   else if(num <= 0x3fffff)
      extra_bytes_needed=2;
   else if(num <= 0x3fffffff)
      extra_bytes_needed=3;

   *p = fix_endian(num);
   if(little_endian())
      *p = *p >> (8*(3 - extra_bytes_needed));
   else
      *p = *p << (8*(3 - extra_bytes_needed));

   *buf |= extra_bytes_needed << 6;

   return extra_bytes_needed + 1;
}

int main()
{
    FILE *input = NULL,  
       *output = NULL;
    int i;
    uint32_t nums[10] = {32594136, 1, 2, 3, 4, 5, 6, 7, 8 , 193};
    char buff_ts[4];
    unsigned char c;
    int len;
    uint32_t num;
    int more_bytes_to_read;

    output = fopen("sample.bin","wb+");

    for(i=0; i<10; i++)
    {
       len = encode_num(nums[i], buff_ts);
       fwrite(buff_ts,1,len,output);
    }
    fclose(output);

    input = fopen("sample.bin", "rb");

    while(fread(&c,1,1,input)==1)
    {
       more_bytes_to_read=c>>6;
       num = c & 0x3f;
       while(more_bytes_to_read--)
       {
          fread(&c,1,1,input);
          num <<= 8;
          num |= c;
       }
       printf("Read number %d\n", num);
    }
    return 0;
}

Upvotes: 2

Related Questions