Reputation: 63
I need to check the string to see it matches any of the prefixes. The number of prefixes to be compared will increase in the future. So I have concern on the performance of code like below. What are the options to make it run faster when there are lots of strings need to be checked?
int checkString(const char *name)
{
if(!name) return 0;
if(strncmp(name, "AE_", 3) == 0 ) return 1;
if(strncmp(name, "AEDZ_", 5) == 0 ) return 1;
if(strncmp(name, "EDPZ_", 5) == 0 ) return 1;
if(strncmp(name, "EFAN_", 5) == 0 ) return 1;
if(strncmp(name, "E_GCA", 5 ) == 0 ) return 1;
if(strncmp(name, "EFFAN_", 6) == 0 ) return 1;
if(strncmp(name, "EPDPZ_", 6) == 0 ) return 1;
if(strncmp(name, "EDDPZ_", 6) == 0 ) return 1;
if(strncmp(name, "ECADF_", 6) == 0 ) return 1;
if(strncmp(name, "EPCEA_", 6) == 0 ) return 1;
if(strncmp(name, "CFEXXX_", 7) == 0 ) return 1;
if(strncmp(name, "IFEXX_", 7) == 0 ) return 1;
if(strncmp(name, "EINFFAN_", 8) == 0 ) return 1;
if(strncmp(name, "NXXEFAN_", 8) == 0 ) return 1;
if(strncmp(name, "ENAEAZY_", 8) == 0 ) return 1;
if(strncmp(name, "EYYYYYY_", 8) == 0 ) return 1;
if(strncmp(name, "ENEOENUE_", 9) == 0 ) return 1;
/*
more strncmp to be added.
*/
return 0;
}
Upvotes: 5
Views: 243
Reputation: 648
If the prefixes don't change and are only added and you declare a constant for the quantity of prefixes you have, you could loop using strstr
:
#include "stdio.h"
#include "string.h"
#define N_STRINGS 17
int checkString(const char *name);
const char *subStrings[N_STRINGS];
int main() {
subStrings[0] = "AE_";
subStrings[1] = "AEDZ_";
subStrings[2] = "EDPZ_";
subStrings[3] = "EFAN_";
subStrings[4] = "E_GCA";
subStrings[5] = "EFFAN_";
subStrings[6] = "EPDPZ_";
subStrings[7] = "EDDPZ_";
subStrings[8] = "ECADF_";
subStrings[9] = "EPCEA_";;
subStrings[10] = "CFEXXX_";
subStrings[11] = "IFEXX_";
subStrings[12] = "EINFFAN_";
subStrings[13] = "NXXEFAN_";
subStrings[14] = "ENAEAZY_";
subStrings[15] = "EYYYYYY_";
subStrings[16] = "ENEOENUE_";
//run for a random string
printf("%d\n", checkString("AEDZ_value"));
return 1;
}
int checkString(const char *name)
{
int i;
if(!name) return -1;
for (i = 0; i < N_STRINGS; i++) {
if (strstr(name, subStrings[i]) != 0) {
return i;
}
}
return -1;
}
the function checkString
would return the index of the prefix.
There are probably though many more effecient implementations for this case.
Upvotes: -2
Reputation: 153407
What are the options to make it run faster when there are lots of strings need to be checked?
If the n
prefixes were sorted, then at most log2(n)
compares would be needed. Code could use bsearch()
.
#include <stdio.h>
#include <stdlib.h>
const char *prefix[] = {"AE_", "AEDZ_", "CFEXXX_", "ECADF_", "EDDPZ_",
"EDPZ_", "EFAN_", "EFFAN_", "EINFFAN_", "ENAEAZY_", "ENEOENUE_", "EPCEA_",
"EPDPZ_", "EYYYYYY_", "E_GCA", "IFEXX_", "NXXEFAN_"};
int cmp(const void *key, const void *element) {
const char *k = key;
const char *e = *(const char **) element;
size_t elen = strlen(e);
printf("strncmp(%s,%s,%zu)\n", k,e,elen);
return strncmp(k, e, elen);
}
void test(const char *key) {
printf("Search for <%s>\n", key);
size_t n = sizeof prefix/sizeof prefix[0];
const char **s = bsearch(key, prefix, n, sizeof prefix[0], cmp);
if (s) {
printf("Found <%s>\n", *s);
} else {
printf("Not Found\n");
}
}
int main() {
test("E_GC");
test("E_GCA");
test("E_GCA_");
}
Output
Search for <E_GC>
strncmp(E_GC,EINFFAN_,8)
strncmp(E_GC,EYYYYYY_,8)
strncmp(E_GC,IFEXX_,6)
strncmp(E_GC,E_GCA,5)
Not Found
Search for <E_GCA>
strncmp(E_GCA,EINFFAN_,8)
strncmp(E_GCA,EYYYYYY_,8)
strncmp(E_GCA,IFEXX_,6)
strncmp(E_GCA,E_GCA,5)
Found <E_GCA>
Search for <E_GCA_>
strncmp(E_GCA_,EINFFAN_,8)
strncmp(E_GCA_,EYYYYYY_,8)
strncmp(E_GCA_,IFEXX_,6)
strncmp(E_GCA_,E_GCA,5)
Found <E_GCA>
Upvotes: 2
Reputation: 215221
One-time, ahead-of-time setup:
regex_t re;
regcomp(&re, "^(AE_|AEDZ|_EDPZ_|EFAN_|E_GCA|" /*...*/ ")", REG_EXTENDED);
To check:
return regexec(&re, name, 0, 0, 0) == 0;
On any good regex implementation, the regcomp
will compile the regex to a DFA that executes in a number of steps bounded by the length of the longest prefix.
Upvotes: 3