jr.root.cs
jr.root.cs

Reputation: 195

Realloc failing in string resize function

I'm playing around with the "fat-pointer" idea of the string. Basically I have header structure holding capacity and length information. I allocate it with preset length of characters then return the pointer to the first character. When I want header info I subtract 'sizeof' header.

All functions are working properly the way I expect them to except for the resize function:

typedef uint8_t* utf8;

/*
 * Resize string
 */
bool string_resize( utf8 *str, size_t room ) {
    utf8* p = str;

    struct string_header *hdr = (string_header_t *) (*p - sizeof(string_header_t));

    size_t cap = hdr->capacity;
    size_t len = hdr->length;

    /* Backup the current capacity if the process fails */
    size_t bck = cap;

    if ( len + room <= cap ) {
        //printf("::hit\n");
        return true;
    }

    cap = len + room;

    if ( cap < MAX_PREALLOC ) {
        cap *= 2;
    } else {
        cap += MAX_PREALLOC;
    }

    hdr->capacity = cap;

    void * new = realloc( hdr, sizeof(string_header_t) + cap + 1 );

    if ( new == NULL ) {
        hdr->capacity = bck;
        return false;
    }

    *str = (utf8) new + sizeof(string_header_t);
    /* Remove garbage if there is any  after the string content */
    memset( *str+len, 0, cap-len + 1 );
    return true;
}

Valgrind returns the error that I read in memory not allocated by malloc (always happens when trying to access the new parts of the string).

As You see I use (without typedef) uint8_t** so I should be passing correct pointer to pointer to the function and then changing it.

Any help greatly appreciated.

[update 1] Additional functions for the context of string manipulation:

typedef struct string_header {
     size_t capacity;
     size_t length;
} string_header_t;

/*
 * Allocate the string with the prefered length.
 */
utf8 string_alloc( size_t len ) {
    struct string_header *hdr = calloc(1, sizeof(string_header_t) + sizeof(uint8_t) * len);
    assert( hdr );
    hdr->capacity = len;
    hdr->length   = 0;
    return ((utf8) hdr) + sizeof(string_header_t);
}

/*
 * Allocate the new string with the initial default capacity.
 */
utf8 string_new() {
    return string_alloc( INITIAL_CAPACITY );
}

/*
 * Delete the string.
 */
void string_dealloc( utf8 self ) {
    if ( self == NULL )
        return;
    string_header_t *hdr = (string_header_t *) (self - sizeof(string_header_t));
    free(hdr);
}

static inline void string_push( utf8 s, char c ) {
    string_header_t* hdr = (string_header_t *) (s - sizeof(string_header_t));
    //*(s + hdr->length++) = (uint8_t) c;
    size_t len = hdr->length++;

    s[len] = c;
}

bool string_append_char( utf8 str, char c ) {
    if ( string_resize(&str, 1) != ARDP_SUCCESS )
        return ARDP_FAILURE;

    string_push( str, c );
    return ARDP_SUCCESS;
}

bool string_append_utf8( utf8 s, int cp ) {
    if ( cp < 0 or cp > 0x10ffff ) {
        return false;
    }
    else if ( cp < 0x80 ) {
        return string_append_char(s, cp & 0x7F);
    }
    else if ( cp < 0x800 ) {
        if ( string_resize( &s, 2 ) isnt ARDP_SUCCESS )
            return false;
        string_push( s, 0xC0 | ((cp >> 6) & 0x1F) );
        string_push( s, 0x80 | (cp & 0x3F) );
    }
    else if ( cp < 0x10000 ) {
        if ( string_resize( &s, 3 ) isnt ARDP_SUCCESS )
            return false;
        string_push( s, 0xE0 | ((cp >> 12) & 0xF) );
        string_push( s, 0x80 | ((cp >> 6)  & 0x3F) );
        string_push( s, 0x80 |  (cp & 0x3F) );
    }
    else {
        if ( string_resize( &s, 4 ) isnt ARDP_SUCCESS )
            return false;
        string_push( s, 0xF0 | ((cp >> 18) & 0x7) );
        string_push( s, 0x80 | ((cp >> 12) & 0x3F) );
        string_push( s, 0x80 | ((cp >> 6)  & 0x3F) );
        string_push( s, 0x80 |  (cp & 0x3F) );
    }
    return true;
}

bool string_finish( utf8 str ) {
    if ( string_resize(&str, 1) )
        return false;

    string_header_t *hdr = (string_header_t *) (str - sizeof(string_header_t));
    *(str + hdr->length) = '\0';
     return true;
}

[update 2] Valgrind logs (all of them are almost same as this):

==96370== Invalid read of size 8
==96370==    at 0x100011201: string_append_char (string.c:68)
==96370==    by 0x100000AE7: test_string (example.c:84)  
==96370==    by 0x100000BEA: main (example.c:106)
==96370==  Address 0x100aac6d0 is 0 bytes inside a block of size 24 free'd
==96370==    at 0x1000098B8: realloc (in /usr/local/Cellar/valgrind/HEAD/lib/valgrind/vgpreload_memcheck-amd64-darwin.so)
==96370==    by 0x100011243: string_append_char (string.c:92)
==96370==    by 0x100000ADA: test_string (example.c:83)
==96370==    by 0x100000BEA: main (example.c:106)
==96370==  Block was alloc'd at
==96370==    at 0x100009551: calloc (in /usr/local/Cellar/valgrind/HEAD/lib/valgrind/vgpreload_memcheck-amd64-darwin.so)
==96370==    by 0x1000110F2: string_new (string.c:38)
==96370==    by 0x100000A5A: test_string (example.c:72)
==96370==    by 0x100000BEA: main (example.c:106)

==96370== Invalid write of size 8
==96370==    at 0x100011274: string_append_char (string.h:44)
==96370==    by 0x100000AE7: test_string (example.c:84)
==96370==    by 0x100000BEA: main (example.c:106)
==96370==  Address 0x100aac6d8 is 8 bytes inside a block of size 24 free'd
==96370==    at 0x1000098B8: realloc (in /usr/local/Cellar/valgrind/HEAD/lib/valgrind/vgpreload_memcheck-amd64-darwin.so)
==96370==    by 0x100011243: string_append_char (string.c:92)
==96370==    by 0x100000ADA: test_string (example.c:83)
==96370==    by 0x100000BEA: main (example.c:106)
==96370==  Block was alloc'd at
==96370==    at 0x100009551: calloc (in /usr/local/Cellar/valgrind/HEAD/lib/valgrind/vgpreload_memcheck-amd64-darwin.so)
==96370==    by 0x1000110F2: string_new (string.c:38)
==96370==    by 0x100000A5A: test_string (example.c:72)
==96370==    by 0x100000BEA: main (example.c:106)

[update 3] Some example code:

void test_string(void) {
    utf8 str = string_new();

    string_debug( str );
    string_append_char( str, 'h');
    string_append_char( str, 't');
    string_append_char( str, 't');
    string_append_char( str, 'p'); 
    string_append_char( str, ':');
    string_append_char( str, '/');
    string_append_char( str, '/');
    string_append_char( str, 'g');
    string_append_char( str, 'o');
    string_append_char( str, 'o');
    string_append_char( str, 'g');
    string_append_char( str, 'l');
    string_append_char( str, 'e');
    string_append_char( str, '.');
    string_append_char( str, 'c');
    string_append_char( str, 'o');
    string_append_char( str, 'm');
    string_append_char( str, '/');
    string_append_char( str, '?');
    string_append_char( str, 's');
    string_append_char( str, '=');
    string_append_char( str, 'f');
    string_append_char( str, 'i');
    string_append_char( str, 's');
    string_append_char( str, 'h');

    //string_finish(str);

    printf("String %s", str);

    string_dealloc(str);
}

Upvotes: 1

Views: 447

Answers (2)

rici
rici

Reputation: 241691

You're trying to use a character string pointer as a proxy for a string structure. But the string structure might get reallocated, and thus the address of the string might change. In order for the caller to (for example) string_append_char to be aware of the change, they would have to have some mechanism to receive the new value of the string pointer. But they don't; they pass in a uint8_t* and get back a bool. If the append caused reallocation, the new address will be lost once string_append_char returns.

You could do that by passing a handle (i.e. an uint8_t**) instead of a simple uint8_t*. But in many ways that defeats the point of the interface. At the very least, you'll end up with some calls using &str and others str, which will make your code fragile and hard to read.

Really, you might as well just use a string structure directly, and include an inline function to extract a C-string pointer, similar to the C++ interface. The extra level of indirection might seem a little inefficient, but it has proven to be a lot easier to program with.

Upvotes: 2

Craig Estey
Craig Estey

Reputation: 33601

Here's a "smart" string suite I wrote for another SO answer a while back. It may help a bit:

// javapgmr/xstr -- "smart" string "class" for C

typedef struct {
    size_t xstr_maxlen;                 // maximum space in string buffer
    char *xstr_lhs;                     // pointer to start of string
    char *xstr_rhs;                     // pointer to current string append
} xstr_t;

// xstrinit -- reset string buffer
void
xstrinit(xstr_t *xstr)
{

    memset(xstr,0,sizeof(xstr));
}

// xstragain -- reset string buffer
void
xstragain(xstr_t xstr)
{

    xstr->xstr_rhs = xstr->xstr_lhs;
}

// xstrgrow -- grow string buffer
void
xstrgrow(xstr_t *xstr,size_t needlen)
{
    size_t curlen;
    size_t newlen;
    char *lhs;

    lhs = xstr->xstr_lhs;

    // get amount we're currently using
    curlen = xstr->xstr_rhs - lhs;

    // get amount we'll need after adding the whatever
    newlen = curlen + needlen + 1;

    // allocate more if we need it
    if ((newlen + 1) >= xstr->xstr_maxlen) {
        // allocate what we'll need plus a bit more so we're not called on
        // each add operation
        xstr->xstr_maxlen = newlen + 100;

        // get more memory
        lhs = realloc(lhs,xstr->xstr_maxlen);
        xstr->xstr_lhs = lhs;

        // adjust the append pointer
        xstr->xstr_rhs = lhs + curlen;
    }
}

// xstraddchar -- add character to string
void
xstraddchar(xstr_t *xstr,int chr)
{

    // get more space in string buffer if we need it
    xstrgrow(xstr,1);

    // add the character
    *xstr->xstr_rhs++ = chr;

    // maintain the sentinel/EOS as we go along
    *xstr->xstr_rhs = 0;
}

// xstraddstr -- add string to string
void
xstraddstr(xstr_t *xstr,const char *str)
{
    size_t len;

    len = strlen(str);

    // get more space in string buffer if we need it
    xstrgrow(xstr,len);

    // add the string
    memcpy(xstr->xstr_rhs,str,len);
    xstr->xstr_rhs += len;

    // maintain the sentinel/EOS as we go along
    *xstr->xstr_rhs = 0;
}

// xstrcstr -- get the "c string" value
char *
xstrcstr(xstr_t *xstr,int chr)
{

    return xstr->xstr_lhs;
}

// xstrfree -- release string buffer data
void
xstrfree(xstr_t *xstr)
{
    char *lhs;

    lhs = xstr->xstr_lhs;
    if (lhs != NULL)
        free(lhs);

    xstrinit(xstr);
}

Upvotes: 0

Related Questions