Clint
Clint

Reputation: 175

Error validating the contents of a file using regex in C

I am having a problem with the below code validating a file using regex. My file must only contain letters or numbers. My regular expression is:

#define to_find "^[a-zA-Z0-9]+$"

which is located in my main.h file. The below code is in my main.c

#include <ctype.h>
#include <errno.h>
#include <regex.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>

#include "main.h"

int main(int argc, char * argv[])
{
    int ret_val;
    regex_t regex;
    FILE *fp;
    char line[1024];

     if (regcomp(&regex, to_find, REG_EXTENDED) != 0)
     {
        fprintf(stderr, "Failed to compile regex '%s'\n", to_find);
        return EXIT_FAILURE;
     }

    if (argc > 2)
    {
        printf("Usage: tree OR tree [filename]\n");
        return EXIT_FAILURE;
    }

    else if (argc == 2)
    {
        fp = fopen(strcat(argv[1],".dat"), "r");

        printf("file opened\n");
        while ((fgets(line, 1024, fp)) != NULL)
        {
            line[strlen(line) - 1] = '\0';
            if ((ret_val = regexec(&regex, line, 0, NULL, 0)) != 0);
            {
                printf("Error: %s\n", line);
                return EXIT_FAILURE;
            }
        }

        fclose(fp);
        printf("File closed\n");        
    }

    return 0;
}

My file I am reading is called names.dat and contains:

int
char
[
double

What is happening is it is kicking out at the very first line which it should kick out at the 3rd line. I am sure this is pretty simple to solve but it seems I have not figured it out. I would appreciate any help. Also, how do I deal with the

\n

character in the file? this file will have several lines. Thanks in advance.

Upvotes: 0

Views: 143

Answers (1)

user1129665
user1129665

Reputation:

You have some small errors but the one that cause the error is:

// Do you see this sweet little semicolon :P ----------------+
if ((ret_val = regexec(&regex, line, 0, NULL, 0)) != 0); // <+
{
    printf("Error: %s\n", line);
    return EXIT_FAILURE;
 }

beside this line:

fp = fopen(strcat(argv[1],".dat"), "r");

You cannot add to argv, you need to create a new buffer to hold the data, create a buffer with PATH_MAX size add append the path to it. Here an improved version:

#include <ctype.h>
#include <errno.h>
#include <regex.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <limits.h>

#define to_find "^[a-zA-Z0-9]+$"

int main(int argc, char * argv[])
{
    int ret_val;
    regex_t regex;
    FILE *fp;
    char file[PATH_MAX];
    char line[1024];

     if (regcomp(&regex, to_find, REG_EXTENDED) != 0)
     {
        fprintf(stderr, "Failed to compile regex '%s'\n", to_find);
        return EXIT_FAILURE;
     }

    if (argc > 2)
    {
        printf("Usage: tree OR tree [filename]\n");
        return EXIT_FAILURE;
    }

    else if (argc == 2)
    {
        sprintf(file, "%s.dat", argv[1]);
        fp = fopen(file, "r");
        if( fp == NULL ) {
            perror("Error");
            return -1;
        }

        printf("file opened\n");
        while (fscanf(fp, "%1023s", line) > 0)
        {
            if ((ret_val = regexec(&regex, line, 0, NULL, 0)) != 0)
            {
                printf("Not match: %s\n", line);
                //return EXIT_FAILURE;
            } else {
                printf("Match: %s\n", line);
            }
        }

        regfree(&regex);
        fclose(fp);
        printf("File closed\n");        
    }

    return 0;
}

See the diff: http://www.diffchecker.com/8itbz5dy

test:

$ gcc -Wall sample.c 
$ 
$ cat name.dat 
int
char
[
double
$ ./a.out name
file opened
Match: int
Match: char
Not match: [
Match: double
File closed
$ 

Upvotes: 1

Related Questions