pkthapa
pkthapa

Reputation: 1071

Reading formatted string data using sscanf

I have a below code:

int main(int argc, char* argv[])
{
    char tempBuf[100] = {"|BOD|01|02|100|ID000001|EOD|"};
    char startSentinel[10], endSentinel[10], s1[10], s2[10], s3[10], s4[10];
    sscanf((char *)tempBuf, "|%[^|]|%[^|]|%[^|]|%[^|]|%[^|]|%[^|]|", startSentinel, s1, s2, s3, s4, endSentinel);

    cout<<startSentinel<<" "<<s1<<" "<<s2<<" "<<s3<<" "<<s4<<" "<<endSentinel;
    return 0;
}

The above code prints correct data. Output: BOD 01 02 100 ID000001 EOD


If I modify the line to below:

char tempBuf[100] = {"|BOD|||100|ID000001|EOD|"};

Output: Garbage output.


If I give spaces as shown:

char tempBuf[100] = {"|BOD| | |100|ID000001|EOD|"}; //Inserted space.

Output is correct:

BOD     100 ID000001 EOD

Can anyone tell me why? And how can I get correct output without inserting space?

Upvotes: 0

Views: 710

Answers (1)

ad absurdum
ad absurdum

Reputation: 21355

Note that there is no need for the cast (char *)tempBuf in the call to sscanf(), since array names decay to pointers in function calls (and in most expressions). The trouble is that the scanset [^|] matches one or more characters which are not '|'; when no such match occurs, the match fails and sscanf() returns.

The easiest solution would be to use the BSD (including macOS) and Linux function strsep() to parse the input string into tokens. Since this function modifies the input string, you may want to use strdup() (POSIX) to make a duplicate of the original string to work with. strdup() uses malloc() to allocate memory for the duplicate string, so this will need to be freed after use. Also, a feature test macro may be needed to enable these functions.

Note that strsep() returns an empty string when two delimiters are found next to each other. In the code below, the first character is assumed to be a delimiter, and it is skipped over. Some changes would need to be made to handle less strictly formatted input.

#define _DEFAULT_SOURCE

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

int main(void)
{
    char tempBuf[100] = {"|BOD|||100|ID000001|EOD|"};
    char startSentinel[10], endSentinel[10], s1[10], s2[10], s3[10], s4[10];
    char *delims = "|";
    char *string = strdup(tempBuf);
    char *next = string + 1;            // skip first delimiter
    char *token;

    token = strsep(&next, delims);
    strncpy(startSentinel, token, 10);
    token = strsep(&next, delims);
    strncpy(s1, token, 10);
    token = strsep(&next, delims);
    strncpy(s2, token, 10);
    token = strsep(&next, delims);
    strncpy(s3, token, 10);
    token = strsep(&next, delims);
    strncpy(s4, token, 10);
    token = strsep(&next, delims);
    strncpy(endSentinel, token, 10);

    printf("%s %s %s %s %s %s\n", startSentinel, s1, s2, s3, s4, endSentinel);

    free(string);

    return 0;
}

If the strsep() and strdup() functions are not available, they can be simply implemented, leaving the fundamental code above unchanged:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

char * my_strdup(const char *);
char * my_strsep(char **, const char *);

int main(void)
{
    char tempBuf[100] = {"|BOD|||100|ID000001|EOD|"};
    char startSentinel[10], endSentinel[10], s1[10], s2[10], s3[10], s4[10];
    char *delims = "|";
    char *string = my_strdup(tempBuf);
    char *next = string + 1;            // skip first delimiter
    char *token;

    token = my_strsep(&next, delims);
    strncpy(startSentinel, token, 10);
    token = my_strsep(&next, delims);
    strncpy(s1, token, 10);
    token = my_strsep(&next, delims);
    strncpy(s2, token, 10);
    token = my_strsep(&next, delims);
    strncpy(s3, token, 10);
    token = my_strsep(&next, delims);
    strncpy(s4, token, 10);
    token = my_strsep(&next, delims);
    strncpy(endSentinel, token, 10);

    printf("%s %s %s %s %s %s\n", startSentinel, s1, s2, s3, s4, endSentinel);

    free(string);

    return 0;
}

char * my_strdup(const char *str)
{
    char *dup = malloc(strlen(str) + 1);
    char *c = dup;

    if (dup) {
        while (*str != '\0') {
            *c++ = *str++;
        }
        *c = '\0';
    }

    return dup;
}

char * my_strsep(char **strp, const char *delims)
{
    char *tok = NULL;

    if (*strp) {
        char *end = strpbrk(*strp, delims);

        if (end) {
            *end = '\0';
            tok = *strp;
            *strp = end + 1;
        }
    }

    return tok;
}

Both of the above programs give output:

BOD   100 ID000001 EOD

Upvotes: 2

Related Questions