pfinferno
pfinferno

Reputation: 1945

Calculating entropy in C

I'm trying to find the entropy of any given file. However, when I run my program, it always gives 3.00000 as an answer. I haven't used C in awhile, but I'm not sure where I'm going wrong here. I've been fiddling with it for a few hours now. Any tips would be great, thank you!

#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <string.h>

#define SIZE 256

int entropy_calc(long byte_count[], int length)
{
      float entropy;
      float count;
      int i;

      /* entropy calculation */
      for (i = 0; i < SIZE; i++)
        {
          if (byte_count[i] != 0)
            {
              count = (float) byte_count[i] / (float) length;
              entropy += -count * log2f(count);
            }
        }
      return entropy;
}

int main(int argc, char **argv)
{
  FILE            *inFile;
  int             i;              
  int             j;              
  int             n;              // Bytes read by fread;
  int             length;         // length of file
  float           count;
  float           entropy;
  long            byte_count[SIZE];
  unsigned char   buffer[1024];

  /* do this for all files */
  for(j = 1; j < argc; j++)
    {
      memset(byte_count, 0, sizeof(long) * SIZE);

      inFile = fopen(argv[j], "rb");    // opens the file given on command line

      if(inFile == NULL)                // error-checking to see if file exists
        {
          printf("Files does not exist. `%s`\n", argv[j]);
          continue;
        }

      /* Read the whole file in parts of 1024 */
      while((n = fread(buffer, 1, 1024, inFile)) != 0)
        {
          /* Add the buffer to the byte_count */
          for (i = 0; i < n; i++)
            {
              byte_count[(int) buffer[i]]++;
              length++;
            }
        }
      fclose(inFile);

      float entropy = entropy_calc(byte_count, length);
      printf("%02.5f \t%s\n", entropy, argv[j]);
    }
  return 0;
}

Upvotes: 2

Views: 6592

Answers (1)

hesham_EE
hesham_EE

Reputation: 1165

Your return type of the function entropy_calc() should be float not int.

Upvotes: 2

Related Questions