Face
Face

Reputation: 53

How to find a line in a file using regex in C?

how can i use Regex Expressions in C programming? for example if i want to find a line in a file

DAEMONS=(sysklogd network sshd !netfs !crond)

then print each daemon in separate line like this

sysklogd 
network 
sshd 
!netfs 
!crond

here what i did so far

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <regex.h>
#define tofind    "[a-z A-Z] $"
int main(){
 FILE *fp;
 char line[1024];
 int retval = 0;
 char address[256];
 regex_t re;

 if(regcomp(&re, tofind, REG_EXTENDED) != 0)
  return;

 fp = fopen("/etc/rc.conf","r");//this file has this line "DAEMONS=(sysklogd network sshd !netfs !crond)"
 while((fgets(line, 1024, fp)) != NULL) {
     if((retval = regexec(&re, address, 0, NULL, 0)) == 0)
      printf("%s\n", address);
 } 
}

Any help would be much appreciated.

Upvotes: 5

Views: 20387

Answers (1)

Jonathan Leffler
Jonathan Leffler

Reputation: 754710

You read the line into line, so you should pass line to regexec(). You also need to think about whether the newline at the end of the line affects the patterns. (It was correct to use fgets(), but remember it keeps the newline at the end.)

You should also do return -1; (or any other value that is not 0 modulo 256) rather than a plain return with no value. Also, you should check that the file was opened; I had to use an alternative name because there is no such file as /etc/rc.conf on my machine - MacOS X.

This works for me:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <sys/types.h>
#include <regex.h>

#define tofind    "[a-z A-Z] $"

int main(int argc, char **argv)
{
    FILE *fp;
    char line[1024];
    int retval = 0;
    regex_t re;
    //this file has this line "DAEMONS=(sysklogd network sshd !netfs !crond)"
    const char *filename = "/etc/rc.conf";

    if (argc > 1)
        filename = argv[1];

    if (regcomp(&re, tofind, REG_EXTENDED) != 0)
    {
        fprintf(stderr, "Failed to compile regex '%s'\n", tofind);
        return EXIT_FAILURE;
    }

    fp = fopen(filename, "r");
    if (fp == 0)
    {
        fprintf(stderr, "Failed to open file %s (%d: %s)\n",
                filename, errno, strerror(errno));
        return EXIT_FAILURE;
    }

    while ((fgets(line, 1024, fp)) != NULL)
    {
        line[strlen(line)-1] = '\0';
        if ((retval = regexec(&re, line, 0, NULL, 0)) == 0)
            printf("<<%s>>\n", line);
    } 
    return EXIT_SUCCESS;
}

If you need help writing regular expressions instead of help writing C code that uses them, then we need to design the regex to match the line you show.

^DAEMONS=([^)]*) *$

This will match the line as long as it is written as shown. If you can have spaces between the 'S' and the '=' or between the '=' and the '(', then you need appropriate modifications. I've allowed for trailing blanks - people are often sloppy; but if they use trailing tabs, then the line won't be selected.

Once you've found the line, you have to split it into pieces. You might elect to use the 'capturing' brackets facility, or simply use strchr() to find the open bracket, and then a suitable technique for separating the daemon names - I'd avoid strtok() and probably use strspn() or strcspn() to find the words.


#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <sys/types.h>
#include <regex.h>

#define tofind    "^DAEMONS=\\(([^)]*)\\)[ \t]*$"

int main(int argc, char **argv)
{
    FILE *fp;
    char line[1024];
    int retval = 0;
    regex_t re;
    regmatch_t rm[2];
    //this file has this line "DAEMONS=(sysklogd network sshd !netfs !crond)"
    const char *filename = "/etc/rc.conf";

    if (argc > 1)
        filename = argv[1];

    if (regcomp(&re, tofind, REG_EXTENDED) != 0)
    {
        fprintf(stderr, "Failed to compile regex '%s'\n", tofind);
        return EXIT_FAILURE;
    }

    fp = fopen(filename, "r");
    if (fp == 0)
    {
        fprintf(stderr, "Failed to open file %s (%d: %s)\n", filename, errno, strerror(errno));
        return EXIT_FAILURE;
    }

    while ((fgets(line, 1024, fp)) != NULL)
    {
        line[strlen(line)-1] = '\0';
        if ((retval = regexec(&re, line, 2, rm, 0)) == 0)
        {
            printf("<<%s>>\n", line);
            printf("Line: <<%.*s>>\n", (int)(rm[0].rm_eo - rm[0].rm_so), line + rm[0].rm_so);
            printf("Text: <<%.*s>>\n", (int)(rm[1].rm_eo - rm[1].rm_so), line + rm[1].rm_so);
            char *src = line + rm[1].rm_so;
            char *end = line + rm[1].rm_eo;
            while (src < end)
            {
                size_t len = strcspn(src, " ");
                if (src + len > end)
                    len = end - src;
                printf("Name: <<%.*s>>\n", (int)len, src);
                src += len;
                src += strspn(src, " ");
            }
        }
    }
    return EXIT_SUCCESS;
}

A good deal of debugging code in there - but it won't take you long to produce the answer you request. I get:

<<DAEMONS=(sysklogd network sshd !netfs !crond)>>
Line: <<DAEMONS=(sysklogd network sshd !netfs !crond)>>
Text: <<sysklogd network sshd !netfs !crond>>
Name: <<sysklogd>>
Name: <<network>>
Name: <<sshd>>
Name: <<!netfs>>
Name: <<!crond>>

Beware: when you want a backslash in a regex, you have to write two backslashes in the C source code.

Upvotes: 6

Related Questions