Matthew Hoggan
Matthew Hoggan

Reputation: 7604

HOW TO: Deep Copy of Linked List

I am having some issues with a program I am writing in C, and I have surpassed my knowledge. In summary, I need to deep copy a link list from one list to another. The lists have malloc'd data in them and I need to preserve all the data without having pointers pointing at the same information.

I have only posted the code that I think is relevant. If I am missing any important contextual information please let me know.

Here is the code:

matrix.h

typedef struct matrix {
        char *name;
        int R;
        int C;
        int dim;
        void (*concat_matrices)( struct matrix *A, struct matrix *B, struct matrix *ret );
} Matrix;

void concat_matrices( Matrix *A, Matrix *B, Matrix *ret ) {
        int L1 = strlen( A->name );
        int L2 = strlen( B->name );
        int len = L1 + L2;
        char *Ap = (char*)malloc(L1*sizeof(char)); strcpy(Ap,A->name);
        char *Bp = (char*)malloc(L2*sizeof(char )); strcpy(Bp,B->name);
        char *c = (char*)malloc(sizeof(char)*(len + 2));
        c[0] = '('; strcat(c, Ap); strcat(c, Bp); c[len+1] = ')';
        ret->name = (char*)malloc(sizeof(char)*(len + 2));
        strcpy(ret->name, c);
        ret->R = A->R; ret->C = B->C;
        ret->dim = ret->R*ret->C;
        free(Ap); free(Bp); free(c);
}

matrix_list.h

typedef struct node {
        Matrix *M;
        struct node *next;
        struct node *prev;
} Node;

typedef struct matrix_list {
        Node *head;
        Node *tail;
        int size;
        void (*append)( struct matrix_list *list, Matrix *M );
        void (*print)( struct matrix_list *list );
        void (*reverse_print)( struct matrix_list *list );
        void (*delete)( struct matrix_list *list, const char *name );
        void (*delete_tail)( struct matrix_list *list );
        void (*delete_head)( struct matrix_list *list );
        void (*release)( struct matrix_list *list );
        void (*clone_list)( struct matrix_list *from, struct matrix_list *to );
} MatrixList;

...

void clone_list( MatrixList *from, MatrixList *to ) {
        if( from->head == NULL ) {
                to = NULL;
        } else {
                Node *tmp = from->head;
                while( tmp != NULL ) {
                        Matrix *m_copy = (Matrix*)malloc(sizeof(Matrix*));
                        char *c_copy = (char*)malloc(sizeof(char*)*strlen(tmp->M->name));

                        strcpy(c_copy,tmp->M->name);
                        m_copy->name=c_copy;
                        m_copy->R=tmp->M->R;
                        m_copy->C=tmp->M->C;
                        m_copy->concat_matrices = concat_matrices;

                        to->append( to,m_copy );
                        tmp = tmp->next;
                }
        }
}

main.c

chain->print(chain);

MatrixList *chain_copy = (MatrixList*)malloc(sizeof(MatrixList));
set_list_functions( chain_copy );
chain->clone_list(chain, chain_copy);
chain_copy->print(chain_copy);

The problem arises when I try and print out the clone. Because I am malloc'ing in the clone function I understand the data goes out of scope. How can I do this copy so after the function is called clone has its own version of the data?


UPDATE:

First I would like to thank you all for taking your time to answer my question, and for teaching me more about C. I have only been coding for about 3 years. And I have a lot to learn. The updated source with 0 errors from Valgrind can be found at.

http://matthewh.me/Scripts/c++/matrix_chain/ for anyone else trying to figure out things like me. User = guest Password = guest. The clone_list function now looks like this.

void clone_list( MatrixList *from, MatrixList *to ) {
        if( from->head == NULL ) {
                to = NULL;
        } else {
                Node *tmp = from->head;
                while( tmp != NULL ) {
                        Matrix *m_copy = (Matrix*)malloc(sizeof(Matrix));
                        m_copy->name = (char*)malloc(strlen(tmp->M->name) + 1);

                        sprintf( m_copy->name, "%s", tmp->M->name );
                        m_copy->R=tmp->M->R;
                        m_copy->C=tmp->M->C;
                        m_copy->concat_matrices = concat_matrices;

                        to->append( to,m_copy );
                        tmp = tmp->next;
                }
        }
}

If anyone else sees anything else wrong and would like to add additional pointers please feel free to do so.

Upvotes: 3

Views: 4295

Answers (2)

Jonathan Leffler
Jonathan Leffler

Reputation: 755064

You haven't allowed for the null that terminates strings, so you have classic buffer overflows.

You also do not need to copy the names three times. Your current code is:

int L1 = strlen( A->name );
int L2 = strlen( B->name );
int len = L1 + L2;
char *Ap = (char*)malloc(L1*sizeof(char)); strcpy(Ap,A->name);
char *Bp = (char*)malloc(L2*sizeof(char )); strcpy(Bp,B->name);
char *c = (char*)malloc(sizeof(char)*(len + 2));
c[0] = '('; strcat(c, Ap); strcat(c, Bp); c[len+1] = ')';
ret->name = (char*)malloc(sizeof(char)*(len + 2));
strcpy(ret->name, c);
ret->R = A->R; ret->C = B->C;
ret->dim = ret->R*ret->C;
free(Ap); free(Bp); free(c);

This should be:

int   L1 = strlen(A->name);
int   L2 = strlen(B->name);

ret->name = (char *)malloc(L1 + L2 + sizeof("()"));  // That adds 3
sprintf(ret->name, "(%s%s)", A->name, B->name);

ret->R   = A->R;
ret->C   = B->C;
ret->dim = ret->R * ret->C;

This eliminates Ap, Bp and c altogether, and avoids the buffer overflow problems. I'm not sure I'd slam the two names together like you do, but that's your choice.


Apparently, this isn't sufficient of a problem...there are other issues too.

void clone_list( MatrixList *from, MatrixList *to ) {
    if (from->head == NULL) {
        to = NULL;
    } else {
        Node *tmp = from->head;
        while( tmp != NULL ) {
            Matrix *m_copy = (Matrix*)malloc(sizeof(Matrix*));
            char *c_copy = (char*)malloc(sizeof(char*)*strlen(tmp->M->name));

            strcpy(c_copy,tmp->M->name);
            m_copy->name=c_copy;
            m_copy->R=tmp->M->R;
            m_copy->C=tmp->M->C;
            m_copy->concat_matrices = concat_matrices;

            to->append( to,m_copy );
            tmp = tmp->next;
        }
    }
}

The first assignment zeroes the local pointer; it doesn't do a thing to the MatrixList passed in as the target. This should presumably be:

if (from->head == 0)
{
    *to = *from;
}

This does a wholesale structure copy, but sets the head and tail to null, and the function pointers are all fine - they can be shared. Assuming that the size in from was correctly zero, it will be correct in to too. (Again, this is probably not the code you are exercising.)

The next problem is with the memory allocation:

Matrix *m_copy = (Matrix*)malloc(sizeof(Matrix*));

This allocates one pointer's worth of memory, not one Matrix's worth. Use either of these:

Matrix *m_copy = (Matrix *)malloc(sizeof(*m_copy));
Matrix *m_copy = (Matrix *)malloc(sizeof(Matrix));

That is a major source of your trouble (and one which valgrind will find easily).


When the +1 gets forgotten once, it gets forgotten many times, but this time it doesn't cause problems unless the name is the empty string because you multiply by 4 or 8 (32-bit or 64-bit) because of the sizeof(char *) instead of the intended sizeof(char).

char *c_copy = (char*)malloc(sizeof(char*)*strlen(tmp->M->name));
strcpy(c_copy,tmp->M->name);

This should probably be:

m_copy->name = (char *)malloc(strlen(tmp->M->name) + 1);
strcpy(m_copy->name, tmp->M->name);

I'd probably use a name like old instead of tmp. I am also remiss in not reminding previously that you should religiously check every return from every memory allocation. Or use a set of cover functions for the memory allocation routines which do the check for you (often called xmalloc() or emalloc(), etc.).

The code below that does not seem to copy the dim member across, which is a bug if you ever depend on it.

I'm not entirely happy that you seem to rely on the to list being appropriately initializes before calling clone_list(). In particular, the head, tail and size members are not zeroed here, and the function pointers are not set. I'd be happier to see something like:

*to = *from;  // Copy function pointers
to->head = 0;
to->tail = 0;
to->size = 0;
Node *old = from->head;
for (Node *old = from->head; old != NULL; old = old->next)
{
    Matrix *m_copy = clone_matrix(old->M);
    to->append(to, m_copy);
}

Or even:

matrixlist_initialize(to);
Node *old = from->head;
for (Node *old = from->head; old != NULL; old = old->next)
{
    Matrix *m_copy = clone_matrix(old->M);
    to->append(to, m_copy);
}

The clone_matrix() function looks like:

Matrix *clone_matrix(const Matrix *old)
{
    Matrix *m_copy = (Matrix*)malloc(sizeof(*m_copy));

    m_copy->name = (char*)malloc(strlen(old->name)+1);

    strcpy(m_copy->name, old->name);
    m_copy->R   = old->R;
    m_copy->C   = old->C;
    m_copy->dim = old->dim;
    m_copy->concat_matrices = concat_matrices;
    return(m_copy);
}

I downloaded a version your code and it now seems to work, more or less. You should be compiling with at least -Wall as a compiler option; I refuse to compile with anything less and usually use -Wextra too. I make too many simple-to-spot mistakes not to make full use of the compiler, and while you are learning, you will too. (I've only been coding in C for just over a quarter century; the compiler still catches typos and other silly mistakes, but I seldom have big problems once the code is compiling.)

When I turned on -Wall, there was a problem with the (unused) perm() function because it doesn't return a value even though it says it will, and there was a problem because the correct definition for main() with arguments is int main(int argc, char **argv) and you were missing one of the stars. Other than that, it seems to be working OK now - you can continue with your development.

There is a function in POSIX called strdup() which duplicates a string reliably. It is useful and avoids the off-by-one mistakes.

You should review the use of headers. They are for declarations, primarily. If you explicitly use inline (your code doesn't yet), it can be appropriate to include inline functions in a header, but otherwise, function bodies should not be in headers. They should be in source files (.c suffix). Each header should contain the minimum necessary information for the code that uses the functionality provided by the source to use. It should not include extraneous headers, but it should include all necessary headers. In matrix.h, you include <stdio.h> which is unnecessary. And if you removed the code, neither <string.h> nor <stdlib.h> would be needed either.

Upvotes: 6

sarnold
sarnold

Reputation: 104110

    int L1 = strlen( A->name );
    int L2 = strlen( B->name );
    int len = L1 + L2;
    char *Ap = (char*)malloc(L1*sizeof(char)); strcpy(Ap,A->name);
    char *Bp = (char*)malloc(L2*sizeof(char )); strcpy(Bp,B->name);

I'm willing to bet that your strcpy(3) calls here have written outside the bounds of the memory you've allocated: strlen(3) does NOT account for the '\0' terminator at the end of C strings, so your *Ap and *Bp are allocated one-byte-too-short.

Because this is so common it is an easy pattern to find bugs:

int len = strlen(s);
char *new = malloc(len); /* FAIL */

If I don't see a + 1 in the malloc(3) call, it's almost certainly a bug. :) I prefer to see:

int len = strlen(s);
char *new = malloc(len + 1);

rather than:

int len = strlen(s) + 1;
char *new = malloc(len);

If you follow the former, I think it is far easier to spot the wrong cases when you forget the + 1. I think the latter one is too easy to overlook the wrong ones amidst a sea of correct ones.

Similarly, your c and ret->name have been allocated too short.

But, I think there is a far easier answer:

void concat_matrices( Matrix *A, Matrix *B, Matrix *ret ) {
        int len = strlen(A->name) + strlen(B->name) + 2 + 1; /* parens + NUL */
        ret->name = malloc(len);
        sprintf(ret->name, "(%s%s)", A->name, B->name);
        ret->R = A->R; ret->C = B->C;
        ret->dim = ret->R*ret->C;
}

Use sprintf(3) to build the string in one fell swoop. You only need to allocate one string, and that's the one you intend to keep, which will reduce memory fragmentation due to frequent allocate / deallocate cycles. But the most important reason for the re-write is I think this new version is easier to read. Note I broke my rule about the + 1 here -- clarity is improved by adding together all the memory required in one line.

Update

Your clone_list() function suffers from the same problem:

Matrix *m_copy = (Matrix*)malloc(sizeof(Matrix*));
char *c_copy = (char*)malloc(sizeof(char*)*strlen(tmp->M->name));

strcpy(c_copy,tmp->M->name);

Your m_copy looks fine, because it is a binary object and the size is known exactly. But c_copy is allocated one byte too short -- it is not long enough to contain the '\0' byte that is copied into place with the strcpy(3) call immediately following. This routine too is scribbling over unallocated memory.

Upvotes: 2

Related Questions