Reputation: 2067
I want to check whether a string contains |c
, but some non-ascii characters are making trouble as the following code returns Match
. Is there a way to ignore non-ascii characters?
#include <sys/types.h>
#include <regex.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
int main(int argc, char *argv[]){
regex_t regex;
int reti;
char msgbuf[100];
/* Compile regular expression */
reti = regcomp(®ex, "\|c", REG_EXTENDED);
if( reti ){ fprintf(stderr, "Could not compile regex\n"); exit(1); }
/* Execute regular expression */
reti = regexec(®ex, "<81>U¼T_<84>Ùe/^P^Rï+߶ë", 0, NULL, 0);
if( !reti ){
puts("Match");
}
else if( reti == REG_NOMATCH ){
puts("No match");
}
else{
regerror(reti, ®ex, msgbuf, sizeof(msgbuf));
fprintf(stderr, "Regex match failed: %s\n", msgbuf);
exit(1);
}
/* Free compiled regular expression if you want to use the regex_t again */
regfree(®ex);
return 0;
}
Upvotes: 1
Views: 169
Reputation: 287835
non-ASCII characters in the input aren't the problem. Instead, "\|c"
is a wrong way of writing "|c"
(which matches everything). Use two backslashes (one to escape the backslash in the string itself):
reti = regcomp(®ex, "\\|c", REG_EXTENDED);
Upvotes: 4