Klas. S
Klas. S

Reputation: 692

Get line of regex match in C

I have written this code which finds files which finds the pattern "match" in the string str and prints it.

#include <regex.h>
#include <string.h>
#include <stdio.h>
int main(int argc, const char *argv[]) {
    char *str = strdup("aaaaaaa match aaaaaaaaaaaaaaaaaaaa\n"
                       "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb\n"
                       "cc match ccccccccccccccccccccccccc");
    regex_t regex;
    regmatch_t match;
    regcomp(&regex, "match", REG_EXTENDED);
    while(regexec(&regex, str, 1, &match, 0) != REG_NOMATCH) {
        int beg = match.rm_so;
        int end = match.rm_eo;
        int len = end-beg;
        char *match_string = str+beg;
        match_string[len] = '\0';
        printf("%s\n", match_string);
        str = str + end + 1;
    }
    return 0;
}

My problem is that I need to find on which line the match starts. Preferably this should work with multi-line matches, but single line is fine right now. Does regex have some hidden feature which I can use to solve this?

Upvotes: 1

Views: 758

Answers (2)

Klas. S
Klas. S

Reputation: 692

In this code I save all matches to a linked list and then afterwards go through the string to find the line of the match. It seems to work well for the most part. If anyone knows of a better solution please let me know.

#include <regex.h>
#include <string.h>
#include <stdio.h>
#include <stdlib.h>

typedef struct match_s match_t;

struct match_s {
    int beg;
    match_t *next;
};

int main(int argc, const char *argv[]) {
    char *str = strdup("aaaaaaa match aaaaaaaaaaaaaaaaaaaa\n"
                       "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb\n"
                       "cc match ccccccccccccccccccccccccc");
    match_t *head = NULL;      
    match_t *tail = NULL;      
    char *c = str;             
    regex_t regex;             
    regmatch_t match;          
    regcomp(&regex, "match", REG_EXTENDED);
    int prev = 0;
    while(regexec(&regex, str, 1, &match, 0) != REG_NOMATCH) {
        int beg = match.rm_so;
        int end = match.rm_eo;
        str = str + end + 1;
        match_t *match = malloc(sizeof(match_t));
        match->beg = beg + prev;
        match->next = NULL;
        prev += end+1;    
        if(head == NULL) {
            head = match;
            tail = match;
        } else {
            tail->next = match;
            tail = match;
        }
    }
    int line = 0;
    int i = 0;
    for(i = 0; c[i] != '\0' && head != NULL; i++) {
        if(c[i] == '\n') {
            line++;
        } else if(head->beg == i) {
            printf("Match on line: %d\n", line);
            match_t *tmp = head->next;
            free(head);
            head = tmp;
        }
    }
    free(str);
    return 0;
}

Upvotes: 1

RoadRunner
RoadRunner

Reputation: 26325

You can parse the lines with strtok() to split the string at every \n.

Additionally, using a struct to store each line can be used:

typedef struct {
    char *str;
    size_t lineno;
} line_t;

Then once you know how many \n exist in the string, you can create an array of structs:

line_t *lines = malloc((numlines+1) * sizeof(line_t));

Each line will be stored like this:

Line 1: "aaaaaaa match aaaaaaaaaaaaaaaaaaaa"
Line 2: "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"
Line 3: "cc match ccccccccccccccccccccccccc";

Then you can simply use strtok() again to check patterns between the spaces. To compare strings, strcmp will be good to use.

Here is some example code:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

typedef struct {
    char *str;
    size_t lineno;
} line_t;

int main(void) {
    char str[] = "aaaaaaa match aaaaaaaaaaaaaaaaaaaa\n"
                 "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb\n"
                 "cc match ccccccccccccccccccccccccc";
    const char *key = "match";
    const char *delim1 = "\n";
    const char *delim2 = " ";
    char *pattern;
    size_t numlines = 0, count = 0;

    for (size_t i = 0; str[i]; i++) {
        if (str[i] == '\n') {
            numlines++;
        }
    }

    line_t *lines = malloc((numlines+1) * sizeof(line_t));
    if (!lines) {
        printf("Cannot allocate %zu members\n", numlines+1);
        exit(EXIT_FAILURE);
    }

    pattern = strtok(str, delim1);
    while (pattern != NULL) {
        lines[count].str = malloc(strlen(pattern)+1);
        if (!lines[count].str) {
            printf("Cannot allocate %zu bytes\n", strlen(pattern)+1);
            exit(EXIT_FAILURE);
        }
        strcpy(lines[count].str, pattern);
        lines[count].lineno = count+1;
        count++;

        pattern = strtok(NULL, delim1);
    }

    for (size_t i = 0; i < count; i++) {
        pattern = strtok(lines[i].str, delim2);
        while (pattern != NULL) {
            if (strcmp(pattern, key) == 0) {
                printf("pattern '%s' found on line %zu\n", key, lines[i].lineno);
            }
            pattern = strtok(NULL, delim2);
        }
        free(lines[i].str);
        lines[i].str = NULL;
    }

    free(lines);
    lines = NULL;

    return 0;
}

Note: This code uses dynamic memory allocation with malloc(), and free()'s the pointers at the end. If you would like me to explain these more, let me know.

Upvotes: 2

Related Questions