Don Kielon
Don Kielon

Reputation: 31

Comparing 2 files in c linux fgets

I need to compare 2 files and return 1 if they are same or 0 if not, but function always return 0. I have no idea why. Maybe you know diferent function that can do this.

int compare(char *file_1, char *file_2)
{
    FILE *data_1 = fopen(file_1,"r");
    FILE *data_2 = fopen(file_2,"r");
    char line1[1000];
    char line2[1000];
    while(fgets(line1, sizeof(line1), data_1)&&fgets(line2, sizeof(line2), data_2)){
        if(strcmp(line1,line2)==0){
          fclose(data_1);
          fclose(data_2);
          return 0;
        }
    }
    fclose(data_1);
    fclose(data_2);
    return 1;
}

Upvotes: 1

Views: 714

Answers (6)

Michi
Michi

Reputation: 5307

Well, if those files are not to big, a good approach will be like this:

#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include <unistd.h>

void fileExistence( const char *fileName );
char *readFile( const char *const fileName  );
size_t getFileSize ( FILE *const file );

int main ( void )
{
    const char *const file_1 = "file1.txt";
    const char *const file_2 = "file2.txt";

    char *const getFile_01 = readFile( file_1 );
    char *const getFile_02 = readFile( file_2 );

    if (strcmp( getFile_01, getFile_02 ) == 0 )
    {
        printf( "Files are the same\n" );
    }else
    {
        printf( "Files are not the same\n" );
    }

    free( getFile_01 );
    free( getFile_02 );

    return 0;
}

char *readFile( const char *const fName )
{
    fileExistence( fName );
    size_t size, length;
    char *buffer;
    FILE *file = fopen ( fName , "rb" );
    if ( file == NULL )
    {
        fprintf( stderr, "Can't open output file %s!\n", fName );
        exit( EXIT_FAILURE );
    }

    length = getFileSize( file );
    if ( length == 0 )
    {
        printf( "Error, getFileSize()\n" );
    }

    buffer = malloc( length + 1 );
    if ( buffer == NULL ){
        printf( "Error, malloc().\n" );
        exit( EXIT_FAILURE );
    }

    size = fread ( buffer , 1 , length, file );
    if ( size != length ){
        printf( "Error, fread().\n" );
        exit( EXIT_FAILURE );
    }

    buffer[length] = '\0';
    fclose ( file );
    return buffer;
}

size_t getFileSize ( FILE *const file )
{
    int fsk = fseek ( file , 0 , SEEK_END );
    if ( fsk == -1 )
    {
        printf( "Error, fseek()\n" );
        return 0;
    }
    long tel = ftell( file );

    if ( tel < 0 || (unsigned long) tel >= SIZE_MAX )
    {
        printf( "Error, ftell()\n" );
        return 0;
    }

    fsk = fseek (file , 0 , SEEK_SET );
    if ( fsk == -1 )
    {
        printf( "Error, fseek()\n" );
        return 0;
    }
    return ( size_t )tel;
}

void fileExistence( const char *const fileName )
{
    if( access(fileName, F_OK ) )
    {
        printf("The File %s\t not Found\n",fileName);
        exit( EXIT_FAILURE );
    }

    if( access(fileName, R_OK ) )
    {
        printf("The File %s\t cannot be readed\n",fileName);
        exit( EXIT_FAILURE );
    }

    if( access( fileName, W_OK ) )
    {
        printf("The File %s\t it cannot be Edited\n",fileName);
        exit( EXIT_FAILURE );
    }
}

Where you read both files and save them in two buffers and use strcmp() to compare both them.

If you need compare files without capitalization you can use strcasecmp() function which is found in strings.h

Upvotes: 0

chux
chux

Reputation: 154242

Other issues concerning comparing files not covered in answers yet

File data with '\0'

Should a file contain a null character, fgets() will read that character like any other non-end-of-line character. Then a following strcmp() will not compare all the line that was read. Better to use fread()/memcmp() to avoid this short-coming.

Comparing as text or binary?

Opening the file with "r" as in fopen(file_1,"r") allows various translations: end-of-line, end-of-file, byte-order marks.

Opening with "r" makes sense to compare as text. Otherwise, open the file in binary mode "rb". Use fread() in either case.

A line of text with "\r\n" in one file and a line of text with "\n" in another file can compare equal in text mode, but differ in binary mode.

As post is tagged [linux] though, no translations are expected in text mode.

Non-comparable

During readings an input error may occur rendering the compare moot.


Sample compare code

#include <stdbool.h>
#include <stdio.h>
#define FILE_COMPARE_N 1024

// 1: match
// 0: mis-match
// -1: failure
int stream_compare(FILE *f1, FILE *f2) {
  unsigned char buf1[FILE_COMPARE_N];
  unsigned char buf2[FILE_COMPARE_N];
  size_t l1, l2;
  do {
    l1 = fread(buf1, sizeof buf1[0], FILE_COMPARE_N, f1);
    if (ferror(f1))
      return -1;

    l2 = fread(buf2, sizeof buf2[0], FILE_COMPARE_N, f2);
    if (ferror(f2))
      return -1;

    if (l1 != l2 || memcmp(buf1, buf2, l1) != 0)
      return 0; // mis-match

  } while (l1);
  return 1; //match
}

int file_compare(const char *name1, const char *name2, bool as_text) {
  FILE *f1 = fopen(name1, as_text ? "rb" : "r");
  if (f1 == NULL)
    return -1;

  FILE *f2 = fopen(name2, as_text ? "rb" : "r");
  if (f2 == NULL) {
    fclose(f1);
    return -1;
  }

  int compare = stream_compare(f1, f2);

  fclose(f1);
  fclose(f2);
  return compare;
}

Upvotes: 1

myxaxa
myxaxa

Reputation: 1381

you can compare files char by char (or byte by byte) to get faster result in case then files are not equal:

int compare(char *file_1, char *file_2)
{
    FILE *data_1 = fopen(file_1,"r");
    FILE *data_2 = fopen(file_2,"r");
    int ch1, ch2;
    for (;;) {
        ch1 = getc(data_1); 
        ch2 = getc(data_2); 

        if ((ch1 != ch2) || (ch1 == EOF)) break;
    }

    fclose(data_1);
    fclose(data_2);

    return (ch1 == ch2);
}

Upvotes: 1

HAL9000
HAL9000

Reputation: 2188

Here are to solutions, one char by char read/comparisons (Inspired by the answer from myxaxa, but with bugfixes) And another block by block read/comparisons. Error checking has been skipped due to laziness, but a robust implementation MUST HAVE ERROR CHECKING. (See comments)

#include <stdio.h>

int main(int argc, char **argv)
{
    int equal = 1;

    // TODO: check argc == 3

    FILE *data_1 = fopen(argv[1],"r");
    FILE *data_2 = fopen(argv[2],"r");

    // TODO: check data_1 and data_2 !=NULL

    for (;;)
      {
        int ch1, ch2;
        ch1 = fgetc(data_1); 
        ch2 = fgetc(data_2); 

        if (ch1 != ch2)
         { 
           equal = 0;
           break;
         }
        // We only need to test ch1, because at this point ch1 == ch2;   
        if (ch1 == EOF)
          break;
      }

    // TODO: check for read errors in data_1 and data_2 using ferror

    fclose(data_1);
    fclose(data_2);

    if (equal)
      printf("equal\n");
    else
      printf("not equal\n");
}

Second solution using block reads/comparisons:

#include <stdio.h>
#include <string.h>

#define BUFFSIZE 4096

int main(int argc, char **argv)
{
    int equal = 1;

    // TODO: check argc == 3

    FILE *data_1 = fopen(argv[1],"r");
    FILE *data_2 = fopen(argv[2],"r");

    // TODO: check data_1 and data_2 !=NULL

    for (;;)
      {
        char b1[BUFFSIZE];
        char b2[BUFFSIZE];

        size_t r1 = fread(b1, 1, BUFFSIZE, data_1); 
        size_t r2 = fread(b2, 1, BUFFSIZE, data_2);

        if (r1 != r2)
          {
            equal = 0;
            break;
          }

        // We only need to test r1, because at this point r1 == r2;   
        if (r1 == 0)
          break;
        if (memcmp(b1, b2, r1) != 0)
          { 
            equal = 0;
            break;
          }
      }

    // TODO: check for read errors in data_1 and data_2 using ferror 

    fclose(data_1);
    fclose(data_2);

    if (equal)
      printf("equal\n");
    else
      printf("not equal\n");
}

Runtimes for char by char reads/comparisons on a 840Mb file compared with itself:

real    0m5.158s
user    0m4.880s
sys     0m0.277s

... and for block by block on the same file:

real    0m0.353s
user    0m0.083s
sys     0m0.270s

Both test did multiple runs to ensure file was already cached

Upvotes: 1

bruno
bruno

Reputation: 32596

strcmp(line1,line2)==0 means line1 and line2 are equals, your code supposes they are different

There is an other error, if a file is starts with the content of the other you consider the files are equals (supposing you corrected the strcmp)


I encourage you to check the result of the fopen in case at least one of them does not exist / cannot be open


a solution can be :

int compare(char *file_1, char *file_2)
{
  FILE *fp1 = fopen(file_1,"r");

  if (fp1 == 0)
    return 0;

  FILE *fp2 = fopen(file_2,"r");

  if (fp2 == 0) {
    fclose(fp1);
    return 0;
  }

  char line1[1000];
  char line2[1000];
  char * r1, * r2;
  int result;

  for (;;) {
    r1 = fgets(line1, sizeof(line1), fp1);
    r2 = fgets(line2, sizeof(line2), fp2);

    if ((r1 == 0) || (r2 == 0)) {
      result = (r1 == r2);
      break;
    }

    if (strcmp(line1,line2) != 0) {
      result = 0;
      break;
    }
  }

  fclose(fp1);
  fclose(fp2);

  return result;
}

Upvotes: 4

vinodsaluja
vinodsaluja

Reputation: 694

Try reading whole file in one go, the loop will run until whole of the file is read and If it matches once, it will always going to return 0. Get the file size something like this and use malloc and read>

fseek(fp, 0, SEEK_END); 
size = ftell(fp);
fseek(fp, 0, SEEK_SET); 

Upvotes: 0

Related Questions