Reputation: 195
I'm playing around with the "fat-pointer" idea of the string. Basically I have header structure holding capacity and length information. I allocate it with preset length of characters then return the pointer to the first character. When I want header info I subtract 'sizeof' header.
All functions are working properly the way I expect them to except for the resize function:
typedef uint8_t* utf8;
/*
* Resize string
*/
bool string_resize( utf8 *str, size_t room ) {
utf8* p = str;
struct string_header *hdr = (string_header_t *) (*p - sizeof(string_header_t));
size_t cap = hdr->capacity;
size_t len = hdr->length;
/* Backup the current capacity if the process fails */
size_t bck = cap;
if ( len + room <= cap ) {
//printf("::hit\n");
return true;
}
cap = len + room;
if ( cap < MAX_PREALLOC ) {
cap *= 2;
} else {
cap += MAX_PREALLOC;
}
hdr->capacity = cap;
void * new = realloc( hdr, sizeof(string_header_t) + cap + 1 );
if ( new == NULL ) {
hdr->capacity = bck;
return false;
}
*str = (utf8) new + sizeof(string_header_t);
/* Remove garbage if there is any after the string content */
memset( *str+len, 0, cap-len + 1 );
return true;
}
Valgrind returns the error that I read in memory not allocated by malloc (always happens when trying to access the new parts of the string).
As You see I use (without typedef) uint8_t** so I should be passing correct pointer to pointer to the function and then changing it.
Any help greatly appreciated.
[update 1] Additional functions for the context of string manipulation:
typedef struct string_header {
size_t capacity;
size_t length;
} string_header_t;
/*
* Allocate the string with the prefered length.
*/
utf8 string_alloc( size_t len ) {
struct string_header *hdr = calloc(1, sizeof(string_header_t) + sizeof(uint8_t) * len);
assert( hdr );
hdr->capacity = len;
hdr->length = 0;
return ((utf8) hdr) + sizeof(string_header_t);
}
/*
* Allocate the new string with the initial default capacity.
*/
utf8 string_new() {
return string_alloc( INITIAL_CAPACITY );
}
/*
* Delete the string.
*/
void string_dealloc( utf8 self ) {
if ( self == NULL )
return;
string_header_t *hdr = (string_header_t *) (self - sizeof(string_header_t));
free(hdr);
}
static inline void string_push( utf8 s, char c ) {
string_header_t* hdr = (string_header_t *) (s - sizeof(string_header_t));
//*(s + hdr->length++) = (uint8_t) c;
size_t len = hdr->length++;
s[len] = c;
}
bool string_append_char( utf8 str, char c ) {
if ( string_resize(&str, 1) != ARDP_SUCCESS )
return ARDP_FAILURE;
string_push( str, c );
return ARDP_SUCCESS;
}
bool string_append_utf8( utf8 s, int cp ) {
if ( cp < 0 or cp > 0x10ffff ) {
return false;
}
else if ( cp < 0x80 ) {
return string_append_char(s, cp & 0x7F);
}
else if ( cp < 0x800 ) {
if ( string_resize( &s, 2 ) isnt ARDP_SUCCESS )
return false;
string_push( s, 0xC0 | ((cp >> 6) & 0x1F) );
string_push( s, 0x80 | (cp & 0x3F) );
}
else if ( cp < 0x10000 ) {
if ( string_resize( &s, 3 ) isnt ARDP_SUCCESS )
return false;
string_push( s, 0xE0 | ((cp >> 12) & 0xF) );
string_push( s, 0x80 | ((cp >> 6) & 0x3F) );
string_push( s, 0x80 | (cp & 0x3F) );
}
else {
if ( string_resize( &s, 4 ) isnt ARDP_SUCCESS )
return false;
string_push( s, 0xF0 | ((cp >> 18) & 0x7) );
string_push( s, 0x80 | ((cp >> 12) & 0x3F) );
string_push( s, 0x80 | ((cp >> 6) & 0x3F) );
string_push( s, 0x80 | (cp & 0x3F) );
}
return true;
}
bool string_finish( utf8 str ) {
if ( string_resize(&str, 1) )
return false;
string_header_t *hdr = (string_header_t *) (str - sizeof(string_header_t));
*(str + hdr->length) = '\0';
return true;
}
[update 2] Valgrind logs (all of them are almost same as this):
==96370== Invalid read of size 8
==96370== at 0x100011201: string_append_char (string.c:68)
==96370== by 0x100000AE7: test_string (example.c:84)
==96370== by 0x100000BEA: main (example.c:106)
==96370== Address 0x100aac6d0 is 0 bytes inside a block of size 24 free'd
==96370== at 0x1000098B8: realloc (in /usr/local/Cellar/valgrind/HEAD/lib/valgrind/vgpreload_memcheck-amd64-darwin.so)
==96370== by 0x100011243: string_append_char (string.c:92)
==96370== by 0x100000ADA: test_string (example.c:83)
==96370== by 0x100000BEA: main (example.c:106)
==96370== Block was alloc'd at
==96370== at 0x100009551: calloc (in /usr/local/Cellar/valgrind/HEAD/lib/valgrind/vgpreload_memcheck-amd64-darwin.so)
==96370== by 0x1000110F2: string_new (string.c:38)
==96370== by 0x100000A5A: test_string (example.c:72)
==96370== by 0x100000BEA: main (example.c:106)
==96370== Invalid write of size 8
==96370== at 0x100011274: string_append_char (string.h:44)
==96370== by 0x100000AE7: test_string (example.c:84)
==96370== by 0x100000BEA: main (example.c:106)
==96370== Address 0x100aac6d8 is 8 bytes inside a block of size 24 free'd
==96370== at 0x1000098B8: realloc (in /usr/local/Cellar/valgrind/HEAD/lib/valgrind/vgpreload_memcheck-amd64-darwin.so)
==96370== by 0x100011243: string_append_char (string.c:92)
==96370== by 0x100000ADA: test_string (example.c:83)
==96370== by 0x100000BEA: main (example.c:106)
==96370== Block was alloc'd at
==96370== at 0x100009551: calloc (in /usr/local/Cellar/valgrind/HEAD/lib/valgrind/vgpreload_memcheck-amd64-darwin.so)
==96370== by 0x1000110F2: string_new (string.c:38)
==96370== by 0x100000A5A: test_string (example.c:72)
==96370== by 0x100000BEA: main (example.c:106)
[update 3] Some example code:
void test_string(void) {
utf8 str = string_new();
string_debug( str );
string_append_char( str, 'h');
string_append_char( str, 't');
string_append_char( str, 't');
string_append_char( str, 'p');
string_append_char( str, ':');
string_append_char( str, '/');
string_append_char( str, '/');
string_append_char( str, 'g');
string_append_char( str, 'o');
string_append_char( str, 'o');
string_append_char( str, 'g');
string_append_char( str, 'l');
string_append_char( str, 'e');
string_append_char( str, '.');
string_append_char( str, 'c');
string_append_char( str, 'o');
string_append_char( str, 'm');
string_append_char( str, '/');
string_append_char( str, '?');
string_append_char( str, 's');
string_append_char( str, '=');
string_append_char( str, 'f');
string_append_char( str, 'i');
string_append_char( str, 's');
string_append_char( str, 'h');
//string_finish(str);
printf("String %s", str);
string_dealloc(str);
}
Upvotes: 1
Views: 447
Reputation: 241691
You're trying to use a character string pointer as a proxy for a string structure. But the string structure might get reallocated, and thus the address of the string might change. In order for the caller to (for example) string_append_char
to be aware of the change, they would have to have some mechanism to receive the new value of the string pointer. But they don't; they pass in a uint8_t*
and get back a bool
. If the append caused reallocation, the new address will be lost once string_append_char
returns.
You could do that by passing a handle (i.e. an uint8_t**
) instead of a simple uint8_t*
. But in many ways that defeats the point of the interface. At the very least, you'll end up with some calls using &str
and others str
, which will make your code fragile and hard to read.
Really, you might as well just use a string
structure directly, and include an inline function to extract a C-string pointer, similar to the C++
interface. The extra level of indirection might seem a little inefficient, but it has proven to be a lot easier to program with.
Upvotes: 2
Reputation: 33601
Here's a "smart" string suite I wrote for another SO answer a while back. It may help a bit:
// javapgmr/xstr -- "smart" string "class" for C
typedef struct {
size_t xstr_maxlen; // maximum space in string buffer
char *xstr_lhs; // pointer to start of string
char *xstr_rhs; // pointer to current string append
} xstr_t;
// xstrinit -- reset string buffer
void
xstrinit(xstr_t *xstr)
{
memset(xstr,0,sizeof(xstr));
}
// xstragain -- reset string buffer
void
xstragain(xstr_t xstr)
{
xstr->xstr_rhs = xstr->xstr_lhs;
}
// xstrgrow -- grow string buffer
void
xstrgrow(xstr_t *xstr,size_t needlen)
{
size_t curlen;
size_t newlen;
char *lhs;
lhs = xstr->xstr_lhs;
// get amount we're currently using
curlen = xstr->xstr_rhs - lhs;
// get amount we'll need after adding the whatever
newlen = curlen + needlen + 1;
// allocate more if we need it
if ((newlen + 1) >= xstr->xstr_maxlen) {
// allocate what we'll need plus a bit more so we're not called on
// each add operation
xstr->xstr_maxlen = newlen + 100;
// get more memory
lhs = realloc(lhs,xstr->xstr_maxlen);
xstr->xstr_lhs = lhs;
// adjust the append pointer
xstr->xstr_rhs = lhs + curlen;
}
}
// xstraddchar -- add character to string
void
xstraddchar(xstr_t *xstr,int chr)
{
// get more space in string buffer if we need it
xstrgrow(xstr,1);
// add the character
*xstr->xstr_rhs++ = chr;
// maintain the sentinel/EOS as we go along
*xstr->xstr_rhs = 0;
}
// xstraddstr -- add string to string
void
xstraddstr(xstr_t *xstr,const char *str)
{
size_t len;
len = strlen(str);
// get more space in string buffer if we need it
xstrgrow(xstr,len);
// add the string
memcpy(xstr->xstr_rhs,str,len);
xstr->xstr_rhs += len;
// maintain the sentinel/EOS as we go along
*xstr->xstr_rhs = 0;
}
// xstrcstr -- get the "c string" value
char *
xstrcstr(xstr_t *xstr,int chr)
{
return xstr->xstr_lhs;
}
// xstrfree -- release string buffer data
void
xstrfree(xstr_t *xstr)
{
char *lhs;
lhs = xstr->xstr_lhs;
if (lhs != NULL)
free(lhs);
xstrinit(xstr);
}
Upvotes: 0