Reputation: 321
I made a simple string vector in C. But the test code is crash and I don't know which part is wrong.
These are my codes.
// vec.h
#include <stdlib.h>
#define VEC_CAPACITY_MULTIPLE 4
typedef struct my_vec {
size_t length;
size_t capacity;
char **strings;
} my_vec;
my_vec* my_vec_new();
void my_vec_push(my_vec *vec, const char *str);
const char* my_vec_get(my_vec *vec, size_t index);
void my_vec_free(my_vec *vec);
and implementations.
// vec.c
#include "vec.h"
#include <string.h>
my_vec* my_vec_new()
{
my_vec *vec = malloc(sizeof(my_vec));
vec->length = 0;
vec->capacity = VEC_CAPACITY_MULTIPLE;
vec->strings = malloc(sizeof(char*) * vec->capacity);
return vec;
}
void my_vec_push(my_vec *vec, const char *str)
{
vec->strings[vec->length] = malloc(sizeof(char) * strlen(str) + 1);
strcpy(vec->strings[vec->length], str);
vec->length++;
if (vec->length == vec->capacity) {
char **new_strings = malloc(
sizeof(char*)
*
vec->capacity + VEC_CAPACITY_MULTIPLE
);
for (size_t i = 0; i < vec->length; ++i) {
new_strings[i] = malloc(sizeof(char) * strlen(str) + 1);
strcpy(new_strings[i], vec->strings[i]);
free(vec->strings[i]);
}
free(vec->strings);
vec->strings = new_strings;
vec->capacity += VEC_CAPACITY_MULTIPLE;
}
}
const char* my_vec_get(my_vec *vec, size_t index)
{
return vec->strings[index];
}
void my_vec_free(my_vec *vec)
{
for (size_t i = 0; i < vec->length; ++i) {
free(vec->strings[i]);
}
free(vec);
}
and the test code.
// test_vec.c
#include <stdio.h>
#include "vec.h"
int main()
{
my_vec *vec = my_vec_new();
my_vec_push(vec, "Hello");
my_vec_push(vec, ",");
my_vec_push(vec, "world");
my_vec_push(vec, "!");
my_vec_push(vec, "foo");
my_vec_push(vec, "bar");
my_vec_push(vec, "baz");
printf("vec capacity: %ld\n", vec->capacity);
printf("vec length: %ld\n", vec->length);
for (size_t i = 0; i < vec->length; ++i) {
printf("%s\n", my_vec_get(vec, i));
}
return 0;
}
but out is something like,
vec capacity: 8
vec length: 7
���ojU
,
world
!
foo
bar
baz
What happend to first string "Hello"? I have put printf in the reallocate part in my_vec_push
and no problems. Only occurs my_vec_get
function. But this function do simply returning pointer to the given index.
Upvotes: 0
Views: 77
Reputation: 386386
At line 31, you allocate strlen(str) + 1
bytes. (sizeof(char)
is guaranteed to be 1
.)
new_strings[i] = malloc(sizeof(char) * strlen(str) + 1);
At line 32, you copy strlen(vec->strings[i]) + 1
bytes.
strcpy(new_strings[i], vec->strings[i]);
Mismatch!
You could use the following:
new_strings[i] = malloc(strlen(vec->strings[i]) + 1);
strcpy(new_strings[i], vec->strings[i]);
free(vec->strings[i]);
You could also use the following:
new_strings[i] = strdup(vec->strings[i]);
free(vec->strings[i]);
But why make a copy of the string at all? You could simply copy the pointer!
new_strings[i] = vec->strings[i];
This leaves you with the following:
for (size_t i = 0; i < vec->length; ++i) {
new_strings[i] = vec->strings[i];
}
That loop could easily be written as follows:
memmove(new_strings, vec->strings, vec->length * sizeof(*new_strings));
But you're not out of the woods yet! The following is also incorrect:
char **new_strings = malloc(
sizeof(char*)
*
vec->capacity + VEC_CAPACITY_MULTIPLE
);
This is because
sizeof(char*) * vec->capacity + VEC_CAPACITY_MULTIPLE
means
( sizeof(char*) * vec->capacity ) + VEC_CAPACITY_MULTIPLE
but you want
sizeof(char*) * ( vec->capacity + VEC_CAPACITY_MULTIPLE )
But, why not use realloc
instead of malloc
+ memmove
?
// Returns 0 and sets errno on error.
int my_vec_push(my_vec *vec, const char *str)
{
if (vec->length == vec->capacity) {
size_t new_capacity = vec->capacity + VEC_CAPACITY_MULTIPLE;
char **new_strings = realloc(vec->strings, sizeof(char*) * new_capacity);
if (!new_strings)
return 0;
vec->strings = new_strings;
vec->capacity = new_capacity;
}
vec->strings[vec->length] = strdup(str);
if (!vec->strings[vec->length])
return 0;
++vec->length;
return 1;
}
It makes more sense to expand the buffer only when needed, so I moved the check.
I've also added error checking.
Tip: You could have pinpointed the error using -fsanitize=address
.
$ gcc -Wall -Wextra -pedantic -fsanitize=address -g main.c vec.c -o a && ./a
==2751==ERROR: AddressSanitizer: heap-buffer-overflow on address 0x602000000092 at pc 0x7f04406a63a6 bp 0x7fffd1a2ada0 sp 0x7fffd1a2a548
WRITE of size 6 at 0x602000000092 thread T0
#0 0x7f04406a63a5 (/usr/lib/x86_64-linux-gnu/libasan.so.4+0x663a5)
#1 0x7f0441a01230 in my_vec_push /.../vec.c:32 <--------
#2 0x7f0441a00dcb in main /.../main.c:13
#3 0x7f0440261b96 in __libc_start_main (/lib/x86_64-linux-gnu/libc.so.6+0x21b96)
#4 0x7f0441a00c89 in _start (/.../a+0xc89)
[...]
Upvotes: 4
Reputation: 23870
Two mistakes:
One:
In the copying loop, you allocate string buffers large enough to hold the string that was passed as an argument, not the string that you're going to write there. Replace this:
new_strings[i] = malloc(sizeof(char) * strlen(str) + 1);
With this:
new_strings[i] = malloc(sizeof(char) * strlen(vec->strings[i]) + 1);
Two:
This allocates a buffer of the wrong size:
char **new_strings = malloc(
sizeof(char*)
*
vec->capacity + VEC_CAPACITY_MULTIPLE
);
The expression there is equivalent to
(sizeof(char*) * vec->capacity) + VEC_CAPACITY_MULTIPLE
But you want:
sizeof(char*) * (vec->capacity + VEC_CAPACITY_MULTIPLE)
And as noted by ikegami, you do a lot of things that are unnecessary and/or inefficient. For example, your entire capacity extension code can be shortened to:
if (vec->length == vec->capacity) {
vec->strings = realloc(vec->strings, sizeof(char*) * (vec->capacity + VEC_CAPACITY_MULTIPLE));
vec->capacity += VEC_CAPACITY_MULTIPLE;
}
You are, however, very much advised to check the return value of malloc
(and realloc
, if you use my code) against NULL
.
Upvotes: 1