daveg
daveg

Reputation: 1191

In C, how can one dynamically build an array of strings in a function and return to caller

RHEL6

I'm trying to implement a perl split funciton in a C subroutine which dynamically builds the array of strings. My attempt fails with a segfault. But it does not fail if I comment out the printf statement in the for loop (perhaps implying that the segfault is in where its getting built as opposed to how)

Here it is...

#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

int split(char *s, char **arr);


void main(int argc, char* argv[])
{
  int x;
  int arrsz;
  char str[]="aaa:bbb:ccc"; 
  char **arr;


  arrsz=split(str,arr);


  for(x=0;x<arrsz;x++) {
    printf("%s\n",arr[x]);
  }

  exit(0);
}

/***********************************/
int split(char *str, char **arr) {

  int arrsz=0;
  char delim[2] = ":";
  char *tok;

  arr = malloc(sizeof(char **));
  arr[0] = malloc(1);
  arr[0] = '\0';

  tok = strtok(str,delim);
  while(tok != NULL) {
    arrsz++;
    arr = (char **)realloc(arr,(arrsz*sizeof(char *))+1);
    arr[arrsz-1] = malloc((sizeof(char)*strlen(tok))+1);
    strcpy(arr[arrsz-1],tok);
    arr[arrsz]=malloc(1);
    arr[arrsz]='\0';

    tok = strtok(NULL,delim);
  }

  return(arrsz);
}

I think the problem is in how I'm passing "arr" to the split function or how it's being received and used in the function. I say this because if I move the body of the function to main, it works there.

I tried dealing with arr inside the functions as it it was a (char ***), but that didn't work.

Can a C expert out there set me straight ?

Upvotes: 0

Views: 99

Answers (5)

chux
chux

Reputation: 153303

OP's code does not return the allocated memory assigned to arr

int split(char *str, char **arr) {
  ...
  // Memory allocated and assigned to local `arr`
  // Yet `arr` is not returned.
  // Calling code never sees the result of this assignment.
  arr = malloc(sizeof(char **));
  ...
  return(arrsz);
}

Instead, I took a whole new approach to mimic split /PATTERN/,EXPR.

I really wanted to avoid all the ** and *** programming.

IMO, a split() should not change the expression so directly using strtok() is out. A common implementation of strtok() effectively does a strspn() and strcspsn(), so coding those directly avoids the strtok().

The below returns a string list type. Various other function signatures could be used, this return type seemed natural for OP's goal. Another solution might return a NULL terminated array of char * pointers.

When memory allocations fails, it is detected and then code calls TBD_Code();. Unclear how OP wants to handle that. Code could print a message and exit or attempt some recovery.

#include <stdlib.h>
#include <string.h>

typedef struct {
  size_t n;
  char **strings;
} string_list;

string_list split(const char *pattern, const char *expr) {
  string_list list = { 0, NULL };
  size_t length;

  // Find length of initial matching characters
  while ((length = strspn(expr, pattern)), expr[length]) {
    // Skip leading characters from `expr` that match the pattern
    expr += length;
    // Find length of characters NOT from the pattern
    length = strcspn(expr, pattern);
    // Allocate for 1 more pointer
    void *tmp = realloc(list.strings, sizeof *(list.strings) * (list.n + 1));
    if (tmp == NULL) TBD_Code();
    list.strings = tmp;
    //Allocate for the token and save it
    list.strings[list.n] = malloc(length + 1u);
    if (list.strings[list.n] == 0) TBD_Code();
    memcpy(list.strings[list.n], expr, length);
    list.strings[list.n][length] = '\0';
    // Advance
    list.n++;
    expr += length;
  }
  return list;
}

void string_list_free(string_list list) {
  if (list.strings) {
    for (size_t i = 0; i < list.n; i++) {
      free(list.strings[i]);
    }
    free(list.strings);
  }
}

Test code

#include <stdio.h>

void print_string_list(string_list list) {
  for (size_t i = 0; i < list.n; i++) {
    printf("%zu: <%s>\n", i, list.strings[i]);
  }
  string_list_free(list);
}

int main(void) {
  print_string_list(split(":", "aaa:bbb:ccc"));
  print_string_list(split(":b", "aaa:bbb:ccc"));
  print_string_list(split("a:", "aaa:bbb:ccc"));
  print_string_list(split(":c", "aaa:bbb:ccc"));
}

Output

0: <aaa>
1: <bbb>
2: <ccc>
0: <aaa>
1: <ccc>
0: <bbb>
1: <ccc>
0: <aaa>
1: <bbb>

Upvotes: 0

Craig Estey
Craig Estey

Reputation: 33601

There are a few bugs. I've annotated and [partially] fixed bugs. It will still segfault. I added a refactored version that will work correctly.

#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

int split(char *s, char **arr);

void main(int argc, char* argv[])
{
  int x;
  int arrsz;
  char str[]="aaa:bbb:ccc";
  char **arr;

#if 1
#endif

  arrsz=split(str,arr);

  for(x=0;x<arrsz;x++) {
    printf("%s\n",arr[x]);
  }

  exit(0);
}

/***********************************/
int split(char *str, char **arr) {

  int arrsz=0;
  char delim[2] = ":";
  char *tok;

  // NOTE/BUG: this function only changes arr within the function and does
  // _not_ propagate it to the caller

  arr = malloc(sizeof(char **));

  // NOTE/BUG: this is replaced in the loop and leaks memory
#if 0
  arr[0] = malloc(1);
  arr[0] = '\0';
#endif

  tok = strtok(str,delim);
  while(tok != NULL) {
    arrsz++;

    // NOTE/BUG: this is incorrect -- it only adds a byte instead of another
    // pointer (i.e. it doesn't allocate enough)
#if 0
    arr = (char **)realloc(arr,(arrsz*sizeof(char *))+1);
#else
    arr = (char **)realloc(arr,sizeof(char *) * (arrsz + 1));
#endif

#if 0
    arr[arrsz-1] = malloc((sizeof(char)*strlen(tok))+1);
    strcpy(arr[arrsz-1],tok);
#else
    arr[arrsz-1] = strdup(tok);
#endif

    // NOTE/BUG: this is wrong and leaks memory
#if 0
    arr[arrsz]=malloc(1);
    arr[arrsz]='\0';
#endif

    tok = strtok(NULL,delim);
  }

#if 1
  arr[arrsz] = NULL;
#endif

  return(arrsz);
}

But, as written, your function doesn't update caller's value of arr.

To fix your function, split would need arr to be defined as a "three star" pointer (e.g. char ***arr) which is considered cumbersome and very bad practice.

So, a better/simpler solution is to refactor the function and pass back arr as return (e.g. char **split(char *str,int *sizrtn):

#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

char **split(char *s, int *arsiz);

int main(int argc, char* argv[])
{
  int x;
  int arrsz;
  char str[]="aaa:bbb:ccc";
  char **arr;

  arrsz = 0;
  arr = split(str,&arrsz);

  for(x=0;x<arrsz;x++) {
    printf("%s\n",arr[x]);
  }

  return 0;
}

/***********************************/
char **split(char *str, int *sizrtn)
{
  int arrsz=0;
  const char *delim = ":";
  char *tok;
  char **arr = NULL;

  tok = strtok(str,delim);
  while (tok != NULL) {
    arrsz++;
    arr = realloc(arr,sizeof(char *) * (arrsz + 1));

    arr[arrsz - 1] = strdup(tok);

    tok = strtok(NULL,delim);
  }

  if (arr == NULL)
    arr = malloc(sizeof(*arr));

  arr[arrsz] = NULL;

  *sizrtn = arrsz;

  return arr;
}

Upvotes: 2

user3629249
user3629249

Reputation: 16540

the following proposed code:

  1. cleanly compiles
  2. performs the desired functionality
  3. properly checks for errors from system functions
  4. eliminates any need to use a *** parameter -- google three star programer as to why that is bad
  5. does not include header files those contents are not used

and now, the proposed code:

//#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

char ** split(char *str, size_t *arrsz);


int main( void )
{
    size_t x;
    size_t arrsz;
    char str[]="aaa:bbb:ccc";

    char **arr=split(str,&arrsz);


    for(x=0;x<arrsz;x++)
    {
        printf("%s\n",arr[x]);
    }

    exit(0);
}


/***********************************/
char ** split(char *str, size_t *arrsz)
{
    char **arr = NULL;
    size_t count = 0;

    char delim[2] = ":";
    char *tok;

    tok = strtok(str,delim);
    while(tok != NULL)
    {
        count++;
        char **temp = realloc(arr,(count*sizeof(char *)));
        if( !temp )
        {
            perror( "malloc failed" );
            // perform cleanup and
            free( arr );
            exit( EXIT_FAILURE );
        }

        arr = temp;

        arr[count-1] = strdup( tok );
        if( !arr[count-1] )
        {
            perror( "strdup failed" );
            // perform cleanup and
            free( arr );
            exit( EXIT_FAILURE );
        }

        tok = strtok(NULL,delim);
    }

    *arrsz = count;
    return( arr );
}

Upvotes: 0

giuseppe-dandrea
giuseppe-dandrea

Reputation: 106

The main error is that you should pass a pointer to the strings list to the split function, not the strings list itself, so you should use an ***arr:

int split(char *str, char ***arr);

And you should use & to pass the pointer in main:

...
arrsz=split(str,&arr);
...

In the function you could use a double pointer to avoid confusion and at the end assign that pointer to the parameter:

int split(char *str, char ***arrreturn) {
     char **arr;    //Use this strings list to add the strings

     ...

     *arreturn = arr;
     return(arrsz);
}

-You should not call realloc anytime you need to insert a string, but you could oversize it and increment its dimension if you need.

-I cannot see the need of assign '\0' at the end of the list if you have a variable with the length

-You can use strdup instead of malloc-strcpy funcs:

char *first = "ciao";
char *str = malloc(strlen(first) * sizeof(char));
strcpy(str, first);

Is equal to:

char *first = "ciao";
char *str = strdup(first);

I corrected your code:

#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

int split(char *str, char ***arrreturn);


void main(int argc, char *argv[]) {
    int x;
    int arrsz;
    char str[] = "aaa:bbb:ccc";
    char **arr;


    arrsz = split(str, &arr);


    for (x = 0; x < arrsz; x++) {
        printf("%s\n", arr[x]);
    }

    exit(0);
}

/***********************************/
int split(char *str, char ***arrreturn) {

    int arrsz = 1;
    int len = 0;
    char delim[2] = ":";
    char *tok;
    char **arr;

    arr = malloc(sizeof(char **));

    tok = strtok(str, delim);
    while (tok != NULL) {
        len++;
        if (len >= arrsz) {
            arrsz *= 2;
            arr = realloc(arr, arrsz * sizeof(char **));
        }
        arr[len - 1] = strdup(tok);
        tok = strtok(NULL, delim);
    }
    *arrreturn = arr;
    return (len);
}

Upvotes: 2

Clifford
Clifford

Reputation: 93446

To modify an object in the caller's scope you must pass a pointer to the object - so you need one more level of indirection. There is also at least one semantic error in the implementation - assigning '\0' to the pointer returned by malloc(), will both invalidate the pointer and cause a memory leak.

Change split() prototype to:

int split( char* s, char*** arr ) ;

Then call it thus:

arrsz = split( str, &arr ) ; 

And change the implementation:

int split( char* str, char*** arr ) 
{
  int arrsz = 0 ;
  char delim[2] = ":" ;
  char* tok ;

  *arr = malloc(sizeof(char**));
  *arr[0] = malloc(1);
  **arr[0] = '\0';         // <<< This is fixed too

  tok = strtok( str, delim ) ;
  while( tok != NULL ) 
  {
    arrsz++;
    *arr = (char **)realloc(*arr,(arrsz*sizeof(char *))+1);
    *arr[arrsz-1] = malloc((sizeof(char)*strlen(tok))+1);
    strcpy(*arr[arrsz-1],tok);
    *arr[arrsz]=malloc(1);
    *arr[arrsz]='\0';

    tok = strtok(NULL,delim);
  }

  return(arrsz);
}

There may be other errors I have not spotted, but that is fundamental. Best from hereon debugged using a debugger rather then Q&A.

Upvotes: 0

Related Questions