Tim
Tim

Reputation: 2163

Parsing words from string in C

I'm trying to extract the words from a string in C and I'm getting unexpected results. I have print statements that seem to confirm my control flow, but printing the final array returns nothing.

In the code below I'm parsing trying to parse input, which is the string "Some stuff \t to parse &". I want to turn that string into params[0] = "Some"; params[1] = "stuff"; params[2] = "to"; params[3] = "parse"; params[4] = "&";

#include <stdio.h>
#include <ctype.h>
#include <string.h>
#define MAX_PARAMS 10
#define MAX_WORDS 100

void parse(void);
char* input = "Some stuff  \t to parse &";
char* params[MAX_PARAMS+1] = {NULL};

int main(int argc, char *argv[]) {
    parse();
    // Prints nothing!
    for (int i = 0; i <= MAX_PARAMS; i++) {
        puts(params[i]);
    }
}

void parse(void) {
    char buffer[MAX_WORDS];
    memset(buffer, 0, MAX_WORDS);
    int param = 0;
    int buff = 0;
    int i = 0;
    for (; i < strlen(input); i++) {
        if (isspace(input[i]) && *buffer) {
            printf("Buffer '%s' being added to params[%d]\n", buffer, param);
            params[param++] = buffer;
            memset(buffer, 0, MAX_WORDS);
            buff = 0;
            if (param == MAX_PARAMS) {
                break;
            }
        }
        else if (!isspace(input[i])) {
            buffer[buff++] = input[i];
            printf("%c added to buffer\n", input[i]);
        }
    }

    if (param < MAX_PARAMS && strcmp(buffer, "")) {
        params[param] = buffer;
        printf("Buffer '%s' being added to params[%d]\n", buffer, param);
    }
}

Upvotes: 3

Views: 3730

Answers (2)

RoadRunner
RoadRunner

Reputation: 26335

You need to make a copy of the buffer you are storing, otherwise you will overwrite the addresses of the buffers you are parsing.

You can do two things to solve this:

  1. Use malloc, to set aside enough memory from the buffer to copy into your array.
  2. Use strdup which returns a pointer to a null terminated string, which pretty much duplicates the string you are trying to copy, using strcpy off course.

With either of these methods, you must free them in order to prevent memory leak or other bad things.

In this example I used malloc(), but it can also be easily done with strdup() too.

Note: This is a character by character brute force approach. As others have said, using strtok() would make life easier, however, if you are looking for the first approach, something like this would be what you want.

This is the code:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>

#define MAX_PARAMS 10
#define MAX_WORDS 100

void parse(char *input, char *params[]);
void add_param(char *params[], char buffer[], int *currparam);
void print_and_free_params(char *params[], int n);

int
main(int argc, char *argv[]) {
    char input[] = "Some stuff to parse &";
    char *params[MAX_PARAMS+1] = {NULL};

    parse(input, params);

    return 0;
}

void
parse(char *input, char *params[]) {
    int i, param = 0, buff_count = 0;
    char buffer[MAX_WORDS];

    for (i = 0; input[i] && param < MAX_PARAMS; i++) {
        if ((isspace(input[i]) && *buffer)) {
            add_param(params, buffer, &param);
            buff_count = 0;

        } else if (!isspace(input[i])) {
            buffer[buff_count++] = input[i];
            buffer[buff_count] = '\0';
        }

        if (i == strlen(input)-1 && *buffer) {
            add_param(params, buffer, &param);
            buff_count = 0;
        }
    }

    print_and_free_params(params, param);
}

void
add_param(char *params[], char buffer[], int *currparam) {
    params[*currparam] = malloc(strlen(buffer)+1);
    if (params[*currparam] == NULL) {
        fprintf(stderr, "%s\n", "Error mallocing space for string");
        exit(EXIT_FAILURE);
    }

    strcpy(params[*currparam], buffer);

    (*currparam)++;
}

void
print_and_free_params(char *params[], int n) {
    int i;

    for (i = 0; i < n; i++) {
        printf("params[%d] = %s\n", i, params[i]);
        free(params[i]);
        params[i] = NULL;
    }
}   

Program output here: Demo

Upvotes: 1

tdao
tdao

Reputation: 17713

There is at least one mistake in your code: your params array of pointers actually point to local variables buffer whose scope ends within parse() - that causes undefined behavior.

I'd be surprised if for this purpose you can't utilize standard function strtok

It is as simple as:

char input[] = "Some stuff  \t to parse &";  // <- you need to change to array because `strtok` change the string in place

char * pch = strtok (input," \t");
while (pch != NULL)
{
    printf ("%s\n",pch);
    pch = strtok (NULL, " \t");
}

Functional code:

void parse(void);
char input[] = "Some stuff  \t to parse &";
char* params[MAX_PARAMS+1] = {NULL};

int main(int argc, char *argv[]) {
    parse();
    for (int i = 0; i <= MAX_PARAMS && params[i]; i++) {
        puts(params[i]);
    }
}

void parse(void) {
    int i = 0;
    char * pch = strtok (input," \t");
    params[i++] = pch;
    while (pch != NULL)
    {
        pch = strtok (NULL, " \t");
        params[i++] = pch;
    }        
}

Upvotes: 3

Related Questions