Christoph
Christoph

Reputation: 169623

Debugging mergesort

I need to sort a doubly-linked list. According to the almighty wikipedia, mergesort is the way to go for that.

The recursive algorithm works reasonably well, but as I'm writing a general-purpose implementation, performance might be an issue.

Porting the iterative version for arrays will kill performance as rescanning the list to divide it into sublists is slow; for anyone interested - here's the code:

static void sort(struct linked_list *list,
    int (*cmp)(const void *, const void *))
{
    size_t slice_size = 1;
    for(; slice_size < list->size; slice_size *= 2)
    {
        struct node *tail = list->first;
        while(tail)
        {
            struct node *head = tail;

            size_t count = slice_size;
            while(tail && count--) // performance killer
                tail = tail->next;

            count = slice_size;
            while(head != tail && tail && count)
            {
                if(cmp(head->data, tail->data) <= 0)
                    head = head->next;
                else
                {
                    struct node *node = tail;
                    tail = tail->next;
                    remove_node(node, list);
                    insert_before(node, list, head);
                    --count;
                }
            }

            while(tail && count--) // performance killer
                tail = tail->next;
        }
    }
}

But there's another iterative version using a stack-based approach:

struct slice
{
    struct node *head;
    size_t size;
};

static void sort(struct linked_list *list,
    int (*cmp)(const void *, const void *))
{
    if(list->size < 2) return;

    struct slice stack[32];
    size_t top = -1;
    struct node *current = list->first;

    for(; current; current = current->next)
    {
        stack[++top] = (struct slice){ current, 1 };
        for(; top && stack[top-1].size <= stack[top].size; --top)
            merge_down(list, cmp, stack + top);
    }

    for(; top; --top)
        merge_down(list, cmp, stack + top);
}

This will push size 1 lists onto the stack and merges down as long as the top list is of greater or equal size than its predecessor.

Unfortunately, there's a bug somewhere as for some input lists, merge_down() will fail a sanity check:

static void merge_down(struct linked_list *list,
    int (*cmp)(const void *, const void *), struct slice *top)
{
    struct node *right = top->head;
    size_t count = top->size;

    --top;

    struct node *left = top->head;
    top->size += count;

{
    // sanity check: count nodes in right list
    int i = count;
    struct node *node = right;
    for(; i--; node = node->next) if(!node)
    {
        puts("too few right nodes");
        exit(0);
    }
}

    // determine merged list's head
    if(cmp(left->data, right->data) <= 0)
    {
        top->head = left;
        left = left->next;
    }
    else
    {
        top->head = right;
        struct node *node = right;
        right = right->next;
        remove_node(node, list);
        insert_before(node, list, left);
        --count;
    }

    while(left != right && count)
    {
        if(cmp(left->data, right->data) <= 0)
            left = left->next;
        else
        {
            struct node *node = right;
            right = right->next;
            remove_node(node, list);
            insert_before(node, list, left);
            --count;
        }
    }
}

The linked list implementation might be relevant as well:

struct node
{
    struct node *prev;
    struct node *next;
    long long data[]; // use `long long` for alignment
};

struct linked_list
{
    struct _list _list; // ignore
    size_t size;
    struct node *first;
    struct node *last;
};

static void insert_before(struct node *node, struct linked_list *list,
    struct node *ref_node)
{
    if(ref_node)
    {
        node->next = ref_node;
        node->prev = ref_node->prev;
        if(ref_node->prev) ref_node->prev->next = node;
        else list->first = node;
        ref_node->prev = node;
    }
    else // empty list
    {
        node->next = NULL;
        node->prev = NULL;
        list->first = node;
        list->last = node;
    }
    ++list->size;
}

static void remove_node(struct node *node, struct linked_list *list)
{
    if(node->prev) node->prev->next = node->next;
    else list->first = node->next;
    if(node->next) node->next->prev = node->prev;
    else list->last = node->prev;
    --list->size;
}

What am I missing here?

Upvotes: 0

Views: 694

Answers (5)

Christoph
Christoph

Reputation: 169623

I found the error myself:

for(; current; current = current->next)
{
    stack[++top] = (struct slice){ current, 1 };
    for(; top && stack[top-1].size <= stack[top].size; --top)
        merge_down(list, cmp, stack + top);
}

Here, the next value of current gets determined after the call to merge_down(), which might move the node around, ie current->next will no longer point to the correct node.

Rearranging fixes the problem:

while(current)
{
    stack[++top] = (struct slice){ current, 1 };
    current = current->next;
    for(; top && stack[top-1].size <= stack[top].size; --top)
        merge_down(list, cmp, stack + top);
}

Thanks to pmg for the effort: I added some votes for that.

Upvotes: 1

Christoph
Christoph

Reputation: 169623

As kriss asked for it, here's the recursive version (a standard mergesort using the merge function from the other exaples):

static struct node *merge(struct linked_list *list,
    int (*cmp)(const void *, const void *),
    struct node *left, struct node *right, size_t right_count)
{
    struct node *head;
    if(cmp(left->data, right->data) <= 0)
    {
        head = left;
        left = left->next;
    }
    else
    {
        head = right;
        struct node *node = right;
        right = right->next;
        remove_node(node, list);
        insert_before(node, list, left);
        --right_count;
    }

    while(left != right && right_count)
    {
        if(cmp(left->data, right->data) <= 0)
            left = left->next;
        else
        {
            struct node *node = right;
            right = right->next;
            remove_node(node, list);
            insert_before(node, list, left);
            --right_count;
        }
    }

    return head;
}

static struct node *mergesort(struct linked_list *list,
    int (*cmp)(const void *, const void *), struct node *head, size_t size)
{
    if(size < 2) return head;
    size_t left_count = size / 2;
    size_t right_count = size - left_count;

    struct node *tail = head;
    size_t count = left_count;
    while(count--) tail = tail->next;

    return merge(list, cmp,
        mergesort(list, cmp, head, left_count),
        mergesort(list, cmp, tail, right_count),
        right_count);
}

static void sort(struct linked_list *list,
    int (*cmp)(const void *, const void *))
{
    mergesort(list, cmp, list->first, list->size);
}

Upvotes: 0

pmg
pmg

Reputation: 108938

stack-based approach

/* ... */
    struct slice stack[32];
    size_t top = -1;
    struct node *current = list->first;

    for(; current; current = current->next)
    {
        stack[++top] = (struct slice){ current, 1 };
        for(; top && stack[top-1].size <= stack[top].size; --top)
        /*    ^^^    */
            merge_down(list, cmp, stack + top);
    }
/* ... */

top is always going to be 0 the first time through the loop, right?
The merge_down() function is never going to get called. I didn't try the code, but that doesn't look right.


Edit
32 elements for the stack is not enough... When the list has more than 32 elements in order (maybe after a few passes) you write beyond the end of stack.

Upvotes: 0

pmg
pmg

Reputation: 108938

I've now run your code and got it to working after I commented out the line indicated below.

static void merge_down(struct linked_list *list,
    int (*cmp)(const void *, const void *), struct slice *top)
{
    struct node *right = top->head;
    size_t count = top->size;

    --top;

    struct node *left = top->head;
    top->size += count; /* possible bug? */
 /* ^^^^^^^^^^^^^^^^^^^ */

Does that work for you too?

Upvotes: 1

pmg
pmg

Reputation: 108938

Do you ever need to copy a node to the end of the list?
What's the insert_before() call then?

insert_before(node, list, NULL);

That would mess up list->first and node->prev.

Upvotes: 1

Related Questions