Reputation:
I notcied that memmem
is not available in MSVC for Windows, so I tried to write something for it. I have the following code:
void *memmem(const void *haystack_start, size_t haystack_len, const void *needle_start, size_t needle_len)
{
const unsigned char *haystack = (const unsigned char *)haystack_start;
const unsigned char *needle = (const unsigned char *)needle_start;
const unsigned char *h = NULL;
const unsigned char *n = NULL;
size_t x = needle_len;
/* The first occurrence of the empty string is deemed to occur at
the beginning of the string. */
if (needle_len == 0) {
return (void *)haystack_start;
}
/* Sanity check, otherwise the loop might search through the whole
memory. */
if (haystack_len < needle_len) {
return NULL;
}
for (; *haystack && haystack_len--; haystack++) {
x = needle_len;
n = needle;
h = haystack;
if (haystack_len < needle_len)
break;
if ((*haystack != *needle) || (*haystack + needle_len != *needle + needle_len))
continue;
for (; x; h++, n++) {
x--;
if (*h != *n)
break;
if (x == 0)
return (void *)haystack;
}
}
return NULL;
}
But, I do not think it works correctly. If I try something like this:
static const char haystack[24] = {
0x4e, 0x65, 0x76, 0x65, 0x72, 0x20, 0x67, 0x6f,
0x6e, 0x6e, 0x61, 0x20, 0x67, 0x69, 0x76, 0x65,
0x20, 0x79, 0x6f, 0x75, 0x20, 0x75, 0x70, 0x2c,
};
static const char needle[8] = {
0x20, 0x79, 0x6f, 0x75, 0x20, 0x75, 0x70, 0x2c
};
char *res = memmem(haystack, sizeof(haystack), needle, sizeof(needle));
printf("%s", res);
The result is null. Any ideas where the problem might be?
Upvotes: 4
Views: 3599
Reputation: 141165
I think you are overcomplicating this.
void *memmem(const void *haystack, size_t haystack_len,
const void * const needle, const size_t needle_len)
{
if (haystack == NULL) return NULL; // or assert(haystack != NULL);
if (haystack_len == 0) return NULL;
if (needle == NULL) return NULL; // or assert(needle != NULL);
if (needle_len == 0) return NULL;
for (const char *h = haystack;
haystack_len >= needle_len;
++h, --haystack_len) {
if (!memcmp(h, needle, needle_len)) {
return h;
}
}
return NULL;
}
Until haystack_len is greater or equal to needle_len, you should memory compare needle with current position in haystack. If it's true, return haystack.
const void *
pointer const unsigned char *haystack = (const unsigned char *)haystack_start;
is just const unsigned char *haystack = haystack_start;
(*haystack != *needle) || (*haystack + needle_len != *needle + needle_len))
is just the same think. It becomes obvious, once you use []
operator rather then *
: haystack[0] != needle[0] || haystack[0] + needle_len != needle[0] + needle_len
. Even if you meant ... != needle[needle_len]
this is out-of-bound access of needle.for (; *haystack && haystack_len--; haystack++) {
if (haystack_len < needle_len)
break;
Why not:
for (; *haystack && haystack_len < needle_len; haystack_len--, haystack++)
?
And the expression *haystack
is just invalid, you are not checking null-terminated string like in case of strstr
. haystack points to any bytes in memory and may have zero as values. The haystack_len
keeps the length of haystack.
Upvotes: 7
Reputation: 707
The accepted answer is pretty good, but there are easy optimizations that can be made. For example, windows does provide a memchr function, and it is probably written in assembly language and is much more efficient than manually looping one character at a time from C and calling memcmp at every character. I didn't perform any benchmarks, but almost certainly this is not ideal.
Here's a solution the makes use of this optimization. We do the primary search with memchr to find the first byte of the needle, then do a memcmp to check the rest. Since there are 256 possible values for a byte, on average this will reduce the number of calls to memcmp by a factor of 255.
void *os_memmem(const void *src,int srclen,const void *trg,int trglen)
{
unsigned char *csrc = (unsigned char *)src;
unsigned char *ctrg = (unsigned char *)trg;
unsigned char *tptr,*cptr;
int searchlen,ndx=0;
/* add some initial error checking if you want */
while (ndx<=srclen) {
cptr = &csrc[ndx];
if ((searchlen = srclen-ndx-trglen+1) <= 0) {
return NULL;
} /* if */
if ((tptr = memchr(cptr,*ctrg,searchlen)) == NULL) {
return NULL;
} /* if */
if (memcmp(tptr,ctrg,trglen) == 0) {
return tptr;
} /* if */
ndx += tptr-cptr+1;
} /* while */
return NULL;
}
Upvotes: 1
Reputation: 34408
for (; *haystack && haystack_len--; haystack++) {
Here you're decrementing haystack_len at the start of the loop. This means that when you reach your match haystack_len = 7 but needle_len = 8. Therefore you fail the f (haystack_len < needle_len)
check and continue
. As Kamil notes in the comments you probably don't want a nul check on haystack either. So I'd suggest
for (; haystack_len > 0; ++haystack, --haystack_len) {
Upvotes: 0
Reputation: 108938
The following statements are the same
char needle[] = "a test";
char needle[7] = { 'a', ' ', 't', 'e', 's', 't', '\0' };
Your haystack
does not have a '\0'
at the same place as needle
.
Try with
// needle is NOT a string
char needle[6] = "a test"; // needle is NOT a string
Upvotes: 0