rookie
rookie

Reputation: 1208

Search for byte pattern in a buffered data stream

I want to search for a pattern of bytes that I receive in chunks (serially) as and when such data is available. For example, a pattern of bytes 0xbbffbbffbb. There is no guarantee that this pattern will be received in full, so doing a simple strnstrn may not be the solution. What algorithm can I use to look for this pattern? My approach is to look for the first byte (in this case 0xbb), then ensure I have 4 more bytes and then compare it with the string. Though it fails if there is some garbage data after two bytes, say 0xbbff01[bbffbbffbb].

My code(sorry if shabby) looks like this:

char* pattern_search(char* buff, size_t *bytes_read)
{
    char* ptr = buff;
    uint16_t remaining_length = *bytes_read;

    while(1) {

        // look for one byte in the stream
        char* pattern_start = memmem((void*)ptr, remaining_length, 0xbb, 1);

        if (pattern_start == NULL) {
            // printf("nothing found\n");
            return NULL;
        }

        int pos = pattern_start - ptr;
        remaining_length = remaining_length - pos;
        ptr = pattern_start;

        // see if you have 5 bytes to compare, if not get more
        remaining_length += get_additional_bytes();

        // compare 5 bytes for pattern
        pattern_start = memmem((void*)ptr, remaining_length, start_flag, PATTERN_LEN);
        if (pattern_start == NULL) {
            // move one step and continue search
            ptr++;
            remaining_length--;
            // move these bytes back to beginning of the buffer
            memcpy(buff, ptr, remaining_length);
            ptr = buff;
            *bytes_read = remaining_length;
            if (remaining_length > 0) {
                continue;
            } else {
                return NULL;
            }
        } else {
            // found!
            printf("pattern found!\n");
            ptr = pattern_start;
            break;
        }
    }

    return ptr;
}

Upvotes: 0

Views: 639

Answers (1)

Stephan Schlecht
Stephan Schlecht

Reputation: 27106

One can certainly find many different solutions here. One could be:

  • specify the pattern as an unsigned char array
  • calling of an 'input_received' function with received data blocks and a pointer to a callback function, which is called whenever the pattern is found

It could look like this:

#include <stdio.h>

static unsigned const char PATTERN[] = {0xbb, 0xff, 0xbb, 0xff, 0xbb};

static void found(size_t pos) {
    printf("pattern found at index %zu\n", pos);
}

static void input_received(const unsigned char *const data,
                           int n,
                           void (*callback)(size_t)) {
    static int match_count;
    static size_t position;

    for (int i = 0; i < n; i++, position++) {
        if (data[i] == PATTERN[match_count]) {
            match_count++;
        } else {
            match_count = data[i] == PATTERN[0] ? 1 : 0;
        }
        if (match_count == sizeof PATTERN) {
            (*callback)(position - sizeof PATTERN + 1);
            match_count = 0;
        }
    }
}

int main(void) {

    unsigned char input[] = {0xff, 0x01, 0x02, 0xff, 0x00,
                             0xbb, 0xff, 0xbb, 0xff, 0xbb,
                             0xbb, 0xff, 0xbb, 0xff, 0xbb};

    input_received(input, 2, found);
    input_received(&input[2], 3, found);
    input_received(&input[5], 2, found);
    input_received(&input[7], 2, found);
    input_received(&input[9], 5, found);
    input_received(&input[14], 1, found);

    return 0;
}

Test

This would then output the following in the debug console:

pattern found at index 5
pattern found at index 10

Upvotes: 1

Related Questions