Vinayak Deshmukh
Vinayak Deshmukh

Reputation: 379

Case insensitive version of strstr()

In order to solve a problem from K&R C book, I am trying to write a function strstrci() which is the case insensitive version of strstr()

char * strstrci (char *s, char *p)
{
    int i, j;
    for (i = 0; *(s + i) != '\0'; i++)
    {
        if (*(s + i) == *p || *(s + i) == *p + 32 || *(s + i) == *p - 32)
        {
            for (j = 1; *(s + i + j) == *(p + j) || *(s + i + j) == *(p + j) + 32 || *(s + i + j) == *(p + j) - 32; j++)
            {
                if (*(p + j) == '\0')
                    return (s + i);
            }
        }
    }
    return NULL;
}

The function seems to work well for all cases except when the pattern is the last part of the input string. For example: string : On a wall he sat pattern: sat In such a case it returns a NULL. Please point out the mistake

Upvotes: 1

Views: 154

Answers (2)

chqrlie
chqrlie

Reputation: 145307

There are multiple problems:

  • The arguments should be typed const char *.
  • Empty strings should be handled in a way consistent with strstr: an empty string pointed to by p should be found at the beginning of s, even if s points to an empty string.
  • The offset 32 is ASCII specific, which may be OK, but it is not correct for all characters. Your function will erroneously match @, [, \, ] with respectively `, {, |, } and ^, among other pairs that you indiscriminately consider equivalent.
  • The index variables i and j should have type size_t to allow for arbitrary long strings.
  • The inner loop only tests for *(p + j) == '\0' if the characters match, hence the function only matches the needle at the end of the haystack string.
  • Using the pointer syntax *(p + j) == '\0' is cumbersome and less readable than the equivalent array syntax p[j] == '\0'.

Here is a modified version using the standard header <ctype.h>:

#include <ctype.h>

char *strstrci(const char *s, const char *p) {
    if (*p == '\0')
        return (char *)s;
    for (; *s; s++) {
        if (tolower((unsigned char)*s) == tolower((unsigned char)*p)) {
            size_t i;
            for (i = 1;; i++) {
                if (p[i] == '\0')
                    return (char *)s;
                if (tolower((unsigned char)s[i]) != tolower((unsigned char)p[i]))
                    break;
            }
        }
    }
    return NULL;
}

Upvotes: 3

0___________
0___________

Reputation: 68034

It is very hard to read your function.

  • Use indexes instead of pointer arithmetic. It is the same but it is much easier for humans.
  • Use standard functions to change case.
  • Use size_t for indexes and sizes.
  • Try to be const correct.
  • Use meaningful variable names.

Generally, it was easier for me to write it instead of analyzing:

char *stristr(const char *haystack, const char *needle)
{
    char *result = NULL;

    if (haystack && needle) // Ensure neither input is NULL
    {
        if (*needle == '\0') // If the needle is empty
        {
            result = (char *)haystack;
        }
        else
        {
            const char *h = haystack;

            while (*h) 
            {
                const char *h_sub = h;
                const char *n_sub = needle;

                while (*n_sub && *h_sub && tolower((unsigned char)*h_sub) == tolower((unsigned char)*n_sub))
                {
                    ++h_sub;
                    ++n_sub;
                }

                if (*n_sub == '\0')
                {
                    result = (char *)h;
                    break;
                }

                ++h; 
            }
        }
    }

    return result;
}

or using indexes:

char *strstrci(const char *haystack, const char *needle)
{
    if (*needle == '\0') // If the needle is empty
    {
        return (char *)haystack; 
    }

    for (; *haystack; haystack++) 
    {
        if (tolower((unsigned char)*haystack) == tolower((unsigned char)*needle))
        {
            size_t needleIndex = 1; 

            while (needle[needleIndex] != '\0' && haystack[needleIndex] != '\0' &&
                   tolower((unsigned char)haystack[needleIndex]) == tolower((unsigned char)needle[needleIndex]))
            {
                needleIndex++;
            }

            if (needle[needleIndex] == '\0')
            {
                return (char *)haystack;
            }
        }
    }

    return NULL; // No match found
}

Tests:

void runTests()
{
    char *result;

    
    result = stristr("On a wall he sat", "sat");
    assert(result != NULL && strcmp(result, "sat") == 0);

    result = stristr("Hello, World!", "world");
    assert(result != NULL && strcmp(result, "World!") == 0);

    result = stristr("Programming is fun", "fun");
    assert(result != NULL && strcmp(result, "fun") == 0);

    result = stristr("Some text", "");
    assert(result == "Some text");

    result = stristr("", "anything");
    assert(result == NULL);

    result = stristr("", "");
    assert(result != NULL && strcmp(result, "") == 0);

    result = stristr("Case insensitive search", "missing");
    assert(result == NULL);

    result = stristr("CaseInsensitive", "caseinsensitive");
    assert(result != NULL && strcmp(result, "CaseInsensitive") == 0);

    result = stristr("CaseInsensitive Search", "INSENSITIVE");
    assert(result != NULL && strcmp(result, "Insensitive Search") == 0);

    result = stristr("Short", "This is a long needle");
    assert(result == NULL);

    result = stristr("A@B#C!D$E%", "@b#c");
    assert(result != NULL && strcmp(result, "@B#C!D$E%") == 0);

    printf("All tests passed!\n");
}

int main()
{
    runTests();
    return 0;
}

https://godbolt.org/z/cb63Y8vW9

Upvotes: 2

Related Questions