Wayne Li
Wayne Li

Reputation: 63

How to optimize below multiple strncmp?

I need to check the string to see it matches any of the prefixes. The number of prefixes to be compared will increase in the future. So I have concern on the performance of code like below. What are the options to make it run faster when there are lots of strings need to be checked?

int checkString(const char *name)
{
    if(!name) return 0;

    if(strncmp(name, "AE_", 3) == 0 ) return 1;                                                                              
    if(strncmp(name, "AEDZ_", 5) == 0 ) return 1;                                                                            
    if(strncmp(name, "EDPZ_", 5) == 0 ) return 1;                                                                            
    if(strncmp(name, "EFAN_", 5) == 0 ) return 1;                                                                            
    if(strncmp(name, "E_GCA", 5 ) == 0 ) return 1;                                                                           
    if(strncmp(name, "EFFAN_", 6) == 0 ) return 1;                                                                           
    if(strncmp(name, "EPDPZ_", 6) == 0 ) return 1;                                                                           
    if(strncmp(name, "EDDPZ_", 6) == 0 ) return 1;                                                                           
    if(strncmp(name, "ECADF_", 6) == 0 ) return 1;                                                                           
    if(strncmp(name, "EPCEA_", 6) == 0 ) return 1;                                                                           
    if(strncmp(name, "CFEXXX_", 7) == 0 ) return 1;                                                                          
    if(strncmp(name, "IFEXX_", 7) == 0 ) return 1;                                                                           
    if(strncmp(name, "EINFFAN_", 8) == 0 ) return 1;                                                                         
    if(strncmp(name, "NXXEFAN_", 8) == 0 ) return 1;                                                                         
    if(strncmp(name, "ENAEAZY_", 8) == 0 ) return 1;                                                                         
    if(strncmp(name, "EYYYYYY_", 8) == 0 ) return 1;                                                                         
    if(strncmp(name, "ENEOENUE_", 9) == 0 ) return 1;                                                                        
    /*
    more strncmp to be added.
    */

    return 0;
}       

Upvotes: 5

Views: 243

Answers (3)

Alexander James Pane
Alexander James Pane

Reputation: 648

If the prefixes don't change and are only added and you declare a constant for the quantity of prefixes you have, you could loop using strstr:

#include "stdio.h"
#include "string.h"

#define N_STRINGS 17

int checkString(const char *name);

const char *subStrings[N_STRINGS];

int main() {

    subStrings[0] = "AE_";
    subStrings[1] = "AEDZ_";
    subStrings[2] = "EDPZ_";
    subStrings[3] = "EFAN_";
    subStrings[4] = "E_GCA";
    subStrings[5] = "EFFAN_";
    subStrings[6] = "EPDPZ_";
    subStrings[7] = "EDDPZ_";
    subStrings[8] = "ECADF_";
    subStrings[9] = "EPCEA_";;
    subStrings[10] = "CFEXXX_";
    subStrings[11] = "IFEXX_";
    subStrings[12] = "EINFFAN_";
    subStrings[13] = "NXXEFAN_";
    subStrings[14] = "ENAEAZY_";
    subStrings[15] = "EYYYYYY_";
    subStrings[16] = "ENEOENUE_";

    //run for a random string
    printf("%d\n", checkString("AEDZ_value"));

    return 1;
}

int checkString(const char *name)
{
    int i;

    if(!name) return -1;

    for (i = 0; i < N_STRINGS; i++) {
        if (strstr(name, subStrings[i]) != 0) {
            return i;
        }
    }                                                                 

    return -1;
}

the function checkString would return the index of the prefix.

There are probably though many more effecient implementations for this case.

Upvotes: -2

chux
chux

Reputation: 153407

What are the options to make it run faster when there are lots of strings need to be checked?

If the n prefixes were sorted, then at most log2(n) compares would be needed. Code could use bsearch().

#include <stdio.h>
#include <stdlib.h>

const char *prefix[] = {"AE_", "AEDZ_", "CFEXXX_", "ECADF_", "EDDPZ_",
    "EDPZ_", "EFAN_", "EFFAN_", "EINFFAN_", "ENAEAZY_", "ENEOENUE_", "EPCEA_",
    "EPDPZ_", "EYYYYYY_", "E_GCA",  "IFEXX_", "NXXEFAN_"};

int cmp(const void *key, const void *element) {
  const char *k = key;
  const char *e = *(const char **) element;
  size_t elen = strlen(e);
  printf("strncmp(%s,%s,%zu)\n", k,e,elen);
  return strncmp(k, e, elen);
}

void test(const char *key) {
  printf("Search for <%s>\n", key);
  size_t n = sizeof prefix/sizeof prefix[0];
  const char **s = bsearch(key, prefix, n, sizeof prefix[0], cmp);
  if (s) {
    printf("Found <%s>\n", *s);
  } else {
    printf("Not Found\n");
  }
}

int main() {
  test("E_GC");
  test("E_GCA");
  test("E_GCA_");
}

Output

Search for <E_GC>
strncmp(E_GC,EINFFAN_,8)
strncmp(E_GC,EYYYYYY_,8)
strncmp(E_GC,IFEXX_,6)
strncmp(E_GC,E_GCA,5)
Not Found
Search for <E_GCA>
strncmp(E_GCA,EINFFAN_,8)
strncmp(E_GCA,EYYYYYY_,8)
strncmp(E_GCA,IFEXX_,6)
strncmp(E_GCA,E_GCA,5)
Found <E_GCA>
Search for <E_GCA_>
strncmp(E_GCA_,EINFFAN_,8)
strncmp(E_GCA_,EYYYYYY_,8)
strncmp(E_GCA_,IFEXX_,6)
strncmp(E_GCA_,E_GCA,5)
Found <E_GCA>

Upvotes: 2

R.. GitHub STOP HELPING ICE
R.. GitHub STOP HELPING ICE

Reputation: 215221

One-time, ahead-of-time setup:

regex_t re;
regcomp(&re, "^(AE_|AEDZ|_EDPZ_|EFAN_|E_GCA|" /*...*/ ")", REG_EXTENDED);

To check:

return regexec(&re, name, 0, 0, 0) == 0;

On any good regex implementation, the regcomp will compile the regex to a DFA that executes in a number of steps bounded by the length of the longest prefix.

Upvotes: 3

Related Questions