Reputation: 1191
RHEL6
I'm trying to implement a perl split funciton in a C subroutine which dynamically builds the array of strings. My attempt fails with a segfault. But it does not fail if I comment out the printf statement in the for loop (perhaps implying that the segfault is in where its getting built as opposed to how)
Here it is...
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int split(char *s, char **arr);
void main(int argc, char* argv[])
{
int x;
int arrsz;
char str[]="aaa:bbb:ccc";
char **arr;
arrsz=split(str,arr);
for(x=0;x<arrsz;x++) {
printf("%s\n",arr[x]);
}
exit(0);
}
/***********************************/
int split(char *str, char **arr) {
int arrsz=0;
char delim[2] = ":";
char *tok;
arr = malloc(sizeof(char **));
arr[0] = malloc(1);
arr[0] = '\0';
tok = strtok(str,delim);
while(tok != NULL) {
arrsz++;
arr = (char **)realloc(arr,(arrsz*sizeof(char *))+1);
arr[arrsz-1] = malloc((sizeof(char)*strlen(tok))+1);
strcpy(arr[arrsz-1],tok);
arr[arrsz]=malloc(1);
arr[arrsz]='\0';
tok = strtok(NULL,delim);
}
return(arrsz);
}
I think the problem is in how I'm passing "arr" to the split function or how it's being received and used in the function. I say this because if I move the body of the function to main, it works there.
I tried dealing with arr inside the functions as it it was a (char ***), but that didn't work.
Can a C expert out there set me straight ?
Upvotes: 0
Views: 99
Reputation: 153303
OP's code does not return the allocated memory assigned to arr
int split(char *str, char **arr) {
...
// Memory allocated and assigned to local `arr`
// Yet `arr` is not returned.
// Calling code never sees the result of this assignment.
arr = malloc(sizeof(char **));
...
return(arrsz);
}
Instead, I took a whole new approach to mimic split /PATTERN/,EXPR
.
I really wanted to avoid all the **
and ***
programming.
IMO, a split()
should not change the expression so directly using strtok()
is out. A common implementation of strtok()
effectively does a strspn()
and strcspsn()
, so coding those directly avoids the strtok()
.
The below returns a string list type. Various other function signatures could be used, this return type seemed natural for OP's goal. Another solution might return a NULL
terminated array of char *
pointers.
When memory allocations fails, it is detected and then code calls TBD_Code();
. Unclear how OP wants to handle that. Code could print a message and exit or attempt some recovery.
#include <stdlib.h>
#include <string.h>
typedef struct {
size_t n;
char **strings;
} string_list;
string_list split(const char *pattern, const char *expr) {
string_list list = { 0, NULL };
size_t length;
// Find length of initial matching characters
while ((length = strspn(expr, pattern)), expr[length]) {
// Skip leading characters from `expr` that match the pattern
expr += length;
// Find length of characters NOT from the pattern
length = strcspn(expr, pattern);
// Allocate for 1 more pointer
void *tmp = realloc(list.strings, sizeof *(list.strings) * (list.n + 1));
if (tmp == NULL) TBD_Code();
list.strings = tmp;
//Allocate for the token and save it
list.strings[list.n] = malloc(length + 1u);
if (list.strings[list.n] == 0) TBD_Code();
memcpy(list.strings[list.n], expr, length);
list.strings[list.n][length] = '\0';
// Advance
list.n++;
expr += length;
}
return list;
}
void string_list_free(string_list list) {
if (list.strings) {
for (size_t i = 0; i < list.n; i++) {
free(list.strings[i]);
}
free(list.strings);
}
}
Test code
#include <stdio.h>
void print_string_list(string_list list) {
for (size_t i = 0; i < list.n; i++) {
printf("%zu: <%s>\n", i, list.strings[i]);
}
string_list_free(list);
}
int main(void) {
print_string_list(split(":", "aaa:bbb:ccc"));
print_string_list(split(":b", "aaa:bbb:ccc"));
print_string_list(split("a:", "aaa:bbb:ccc"));
print_string_list(split(":c", "aaa:bbb:ccc"));
}
Output
0: <aaa>
1: <bbb>
2: <ccc>
0: <aaa>
1: <ccc>
0: <bbb>
1: <ccc>
0: <aaa>
1: <bbb>
Upvotes: 0
Reputation: 33601
There are a few bugs. I've annotated and [partially] fixed bugs. It will still segfault. I added a refactored version that will work correctly.
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int split(char *s, char **arr);
void main(int argc, char* argv[])
{
int x;
int arrsz;
char str[]="aaa:bbb:ccc";
char **arr;
#if 1
#endif
arrsz=split(str,arr);
for(x=0;x<arrsz;x++) {
printf("%s\n",arr[x]);
}
exit(0);
}
/***********************************/
int split(char *str, char **arr) {
int arrsz=0;
char delim[2] = ":";
char *tok;
// NOTE/BUG: this function only changes arr within the function and does
// _not_ propagate it to the caller
arr = malloc(sizeof(char **));
// NOTE/BUG: this is replaced in the loop and leaks memory
#if 0
arr[0] = malloc(1);
arr[0] = '\0';
#endif
tok = strtok(str,delim);
while(tok != NULL) {
arrsz++;
// NOTE/BUG: this is incorrect -- it only adds a byte instead of another
// pointer (i.e. it doesn't allocate enough)
#if 0
arr = (char **)realloc(arr,(arrsz*sizeof(char *))+1);
#else
arr = (char **)realloc(arr,sizeof(char *) * (arrsz + 1));
#endif
#if 0
arr[arrsz-1] = malloc((sizeof(char)*strlen(tok))+1);
strcpy(arr[arrsz-1],tok);
#else
arr[arrsz-1] = strdup(tok);
#endif
// NOTE/BUG: this is wrong and leaks memory
#if 0
arr[arrsz]=malloc(1);
arr[arrsz]='\0';
#endif
tok = strtok(NULL,delim);
}
#if 1
arr[arrsz] = NULL;
#endif
return(arrsz);
}
But, as written, your function doesn't update caller's value of arr
.
To fix your function, split
would need arr
to be defined as a "three star" pointer (e.g. char ***arr
) which is considered cumbersome and very bad practice.
So, a better/simpler solution is to refactor the function and pass back arr
as return (e.g. char **split(char *str,int *sizrtn)
:
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
char **split(char *s, int *arsiz);
int main(int argc, char* argv[])
{
int x;
int arrsz;
char str[]="aaa:bbb:ccc";
char **arr;
arrsz = 0;
arr = split(str,&arrsz);
for(x=0;x<arrsz;x++) {
printf("%s\n",arr[x]);
}
return 0;
}
/***********************************/
char **split(char *str, int *sizrtn)
{
int arrsz=0;
const char *delim = ":";
char *tok;
char **arr = NULL;
tok = strtok(str,delim);
while (tok != NULL) {
arrsz++;
arr = realloc(arr,sizeof(char *) * (arrsz + 1));
arr[arrsz - 1] = strdup(tok);
tok = strtok(NULL,delim);
}
if (arr == NULL)
arr = malloc(sizeof(*arr));
arr[arrsz] = NULL;
*sizrtn = arrsz;
return arr;
}
Upvotes: 2
Reputation: 16540
the following proposed code:
and now, the proposed code:
//#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
char ** split(char *str, size_t *arrsz);
int main( void )
{
size_t x;
size_t arrsz;
char str[]="aaa:bbb:ccc";
char **arr=split(str,&arrsz);
for(x=0;x<arrsz;x++)
{
printf("%s\n",arr[x]);
}
exit(0);
}
/***********************************/
char ** split(char *str, size_t *arrsz)
{
char **arr = NULL;
size_t count = 0;
char delim[2] = ":";
char *tok;
tok = strtok(str,delim);
while(tok != NULL)
{
count++;
char **temp = realloc(arr,(count*sizeof(char *)));
if( !temp )
{
perror( "malloc failed" );
// perform cleanup and
free( arr );
exit( EXIT_FAILURE );
}
arr = temp;
arr[count-1] = strdup( tok );
if( !arr[count-1] )
{
perror( "strdup failed" );
// perform cleanup and
free( arr );
exit( EXIT_FAILURE );
}
tok = strtok(NULL,delim);
}
*arrsz = count;
return( arr );
}
Upvotes: 0
Reputation: 106
The main error is that you should pass a pointer to the strings list to the split function, not the strings list itself, so you should use an ***arr:
int split(char *str, char ***arr);
And you should use & to pass the pointer in main:
...
arrsz=split(str,&arr);
...
In the function you could use a double pointer to avoid confusion and at the end assign that pointer to the parameter:
int split(char *str, char ***arrreturn) {
char **arr; //Use this strings list to add the strings
...
*arreturn = arr;
return(arrsz);
}
-You should not call realloc anytime you need to insert a string, but you could oversize it and increment its dimension if you need.
-I cannot see the need of assign '\0' at the end of the list if you have a variable with the length
-You can use strdup instead of malloc-strcpy funcs:
char *first = "ciao";
char *str = malloc(strlen(first) * sizeof(char));
strcpy(str, first);
Is equal to:
char *first = "ciao";
char *str = strdup(first);
I corrected your code:
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int split(char *str, char ***arrreturn);
void main(int argc, char *argv[]) {
int x;
int arrsz;
char str[] = "aaa:bbb:ccc";
char **arr;
arrsz = split(str, &arr);
for (x = 0; x < arrsz; x++) {
printf("%s\n", arr[x]);
}
exit(0);
}
/***********************************/
int split(char *str, char ***arrreturn) {
int arrsz = 1;
int len = 0;
char delim[2] = ":";
char *tok;
char **arr;
arr = malloc(sizeof(char **));
tok = strtok(str, delim);
while (tok != NULL) {
len++;
if (len >= arrsz) {
arrsz *= 2;
arr = realloc(arr, arrsz * sizeof(char **));
}
arr[len - 1] = strdup(tok);
tok = strtok(NULL, delim);
}
*arrreturn = arr;
return (len);
}
Upvotes: 2
Reputation: 93446
To modify an object in the caller's scope you must pass a pointer to the object - so you need one more level of indirection. There is also at least one semantic error in the implementation - assigning '\0' to the pointer returned by malloc()
, will both invalidate the pointer and cause a memory leak.
Change split()
prototype to:
int split( char* s, char*** arr ) ;
Then call it thus:
arrsz = split( str, &arr ) ;
And change the implementation:
int split( char* str, char*** arr )
{
int arrsz = 0 ;
char delim[2] = ":" ;
char* tok ;
*arr = malloc(sizeof(char**));
*arr[0] = malloc(1);
**arr[0] = '\0'; // <<< This is fixed too
tok = strtok( str, delim ) ;
while( tok != NULL )
{
arrsz++;
*arr = (char **)realloc(*arr,(arrsz*sizeof(char *))+1);
*arr[arrsz-1] = malloc((sizeof(char)*strlen(tok))+1);
strcpy(*arr[arrsz-1],tok);
*arr[arrsz]=malloc(1);
*arr[arrsz]='\0';
tok = strtok(NULL,delim);
}
return(arrsz);
}
There may be other errors I have not spotted, but that is fundamental. Best from hereon debugged using a debugger rather then Q&A.
Upvotes: 0