Reputation: 528
I'm trying to work with regex and c here to search for a given pattern with an unknown number of groups. So far, what I have is:
It is a destructive search (similar to strtok
). What I want it to do is to take in a string chop out the groups given in a regex pattern
for example for a date that goes yyyymmdd
([[:digit:]]{4})([[:digit:]]{2})([[:digit:]]{2}
)
If it finds that pattern in the string, extract out those pieces and separate them with some identifiable character (maybe \0
) and store them into a buffer.
Once it is done with that and all groups have been extracted, replace the original string with the contents of the buffer. Finally free up the memory taken by the buffer.
int search(char *string, char *pattern)
{
int status;
regex_t re;
size_t n_groups;
int begin, end, length;
char *match_text;
if(regcomp(&re, pattern, REG_EXTENDED) != 0)
{
return EXIT_SUCCESS;
}
n_groups = re.re_nsub + 1;
regmatch_t * groups = (regmatch_t*)malloc(n_groups * sizeof(regmatch_t));
if (groups == NULL)
{
fprintf(stderr, "Error allocating regex groups\n");
return EXIT_FAILURE;
}
status = regexec(&re, string, n_groups, groups, 0);
if(status != 0)
{
fprintf(stderr, "No matches found\n");
return EXIT_SUCCESS;
}
begin = groups[1].rm_so;
end = groups[1].rm_eo;
length = end - begin;
match_text = (char*)malloc(sizeof(char) * (length + 1));
if(match_text == NULL)
{
fprintf(stderr, "Error allocating %ld bytes\n", sizeof(char) * (length + 1));
exit(EXIT_FAILURE);
}
strncpy(match_text, string + begin, length);
match_text[length] = '\0';
regfree(&re);
/* Adjust size of string to contain matches */
string = (char*)realloc(string, (sizeof(char) * length+1));
if (string == NULL)
{
fprintf(stderr, "Error allocating %ld bytes\n", sizeof(char) * (length + 1));
return EXIT_FAILURE;
}
strncpy(string, match_text, length);
free(match_text);
return EXIT_SUCCESS;
}
Right now, it finds the number of groups correctly and appears to allocate the correct amount of memory for the buffer. Then it replaces the contents of the original string. How do I get it to grab the other groups and separate them?
Thank you in advance.
Upvotes: 0
Views: 257
Reputation: 528
I seem to have something that is working. I don't know if it is "good" c, but it does work.
char* search(char *string, char *pattern)
{
regex_t compiled;
regmatch_t* groups;
size_t n_groups;
int begin, end, length, group, total_length;
char* match_text;
char* new_string;
if(regcomp(&compiled, pattern, REG_EXTENDED) != 0)
{
fprintf(stderr, "Could not compile regex\n");
return string;
}
n_groups = compiled.re_nsub + 1;
groups = (regmatch_t*)malloc(n_groups * sizeof(regmatch_t));
if(groups == NULL)
{
fprintf(stderr, "Error allocating regex groups\n");
return string;
}
if(regexec(&compiled, string, n_groups, groups,0 ) != 0)
{
fprintf(stderr, "No matches found\n");
return string;
}
total_length = (groups[0].rm_eo - groups[0].rm_so) ;
new_string = (char*)malloc(sizeof(char) * (total_length + 1));
if(new_string == NULL)
{
fprintf(stderr, "Error allocating %ld bytes\n",sizeof(char) * (total_length + 1) );
return NULL;
}
/* Clean new string*/
new_string[0] = '\0';
group = 1;
begin = groups[group].rm_so;
end = groups[group].rm_eo;
length = end - begin;
match_text = (char*)malloc(sizeof(char) * (length + 1));
if(match_text == NULL)
{
fprintf(stderr, "Error allocating %ld bytes\n",sizeof(char) * (total_length + 1) );
return NULL;
}
/* Clean new string*/
match_text[0] = '\0';
strncpy(match_text, string + begin, length);
match_text[length] = '\0';
new_string = append_string(new_string, match_text, "");
group++;
while (group < n_groups) {
begin= groups[group].rm_so;
end= groups[group].rm_eo;
length = end - begin;
match_text = (char*)malloc(sizeof(char) * (length + 1));
if(match_text == NULL)
{
fprintf(stderr, "Error allocating %ld bytes\n",sizeof(char) * (total_length + 1) );
return NULL;
}
match_text[0] = '\0';
strncpy(match_text, string + begin, length);
match_text[length] = '\0';
new_string = append_string(new_string, match_text, ",");
group++;
}
regfree(&compiled);
free(match_text);
return new_string;
}
I wrote a little helper function "append_string"
char* append_string(char* string, char* string2, char* sep)
{
char* new_string;
if((new_string = (char*)malloc(strlen(string) + strlen(string2) + strlen(sep)+ 1) ) == NULL)
{
fprintf(stderr, "Error appending strings" );
return NULL;
}
new_string[0] = '\0';
strcat(new_string, string);
strcat(new_string, sep);
strcat(new_string, string2);
return new_string;
}
Upvotes: 1