The Internet
The Internet

Reputation: 8103

Trouble while implementing Heap Sort in C

I'm implementing the classic heap sort algorithm in C. I've been staring at this code for hours and still cannot figure out what is wrong for the life of me. The input 3 1 2 7 4 0 2 works well and produces a correct heap, but when I add an 8 on the end (and increment the size by one). It doesn't produce a heap anymore. Any ideas? I think its just a silly off by one error. For reference http://en.wikipedia.org/wiki/Heapsort

#include <ctype.h>
#include <stdio.h>
#include <limits.h>
#include <stdlib.h>
#include <errno.h>
#include <math.h>
#include <string.h>

#define MAX_ARR 1024

void build_heap(int * fn, int len);
void heapify(int *f, int n, int size);
void verify_relations(int *f, int n);
void print_nums (int n[], int len);
void swap(int * a, int * b);
void heap_sort(int * a, int len);

int main(int argc, char **argv){
    /* input works -- 3 1 2 7 4 0 2 */
    /* input broken -- 3 1 2 7 4 0 2 8 */
    int nums[100] = { 3, 1, 2, 7, 4, 0, 2, 8 };

    int len = 8;

    build_heap(nums, len);
    verify_relations(nums, len);
    print_nums(nums, len);
    return 0;
}


void heap_sort(int nums[], int len){
    int i;
    build_heap(nums, len);
    for (i = len; i > 0; --i)
    {
        swap(&nums[1], &nums[i]);
        heapify(nums, 1, len);
    }

}

void build_heap(int * nums, int len) {
    int size = len, i;
    for (i = size; i >= 0; i--){
        heapify(nums, i, size);
    }
}

void heapify(int f[], int n, int size){

    int left = 2 * n,
    right = 2 * n + 1,
    largest = 0;

    if (left < size && f[left] >= f[n])
        largest = left;
    else
        largest = n;

    if (right < size && f[right] >= f[largest])
        largest = right;

    if (largest != n) {
        swap(&f[n], &f[largest]);
        heapify(&n, largest, size);
    }
}

void swap(int * a, int * b){
    int temp = *a;
    *a = *b;
    *b = temp;
}

void verify_relations(int a[], int n){

    if (a[0] >= a[1] && a[0] >= a[2])
        printf("True - '%d >= %d && %d >= %d' \n", a[0], a[1], a[0], a[2]);
    else
        printf("False\n");

    if (a[1] >= a[3] && a[1] >= a[4])
        printf("True - '%d >= %d && %d >= %d' \n", a[1], a[3], a[1], a[4]);
    else
        printf("False\n");

    if (a[2] >= a[4] && a[2] >= a[5])
        printf("True - '%d >= %d && %d >= %d' \n", a[2], a[4], a[3], a[5]);
    else
        printf("False\n");
}


void print_nums (int n[], int len) {
    int c;
    for (c = 0; c < len; c++){
        printf("%d ", n[c]);
    }
}

Upvotes: 1

Views: 621

Answers (2)

Jonathan Leffler
Jonathan Leffler

Reputation: 753970

Apart from the primary bug which alestanis identified, you should look to make your verify_relations() code more thorough, since it doesn't verify the whole heap.

In a heap where the array indexes are 1-based (instead of 0-based as in C), then for each element a[i], if the corresponding element a[i*2] exists, then a[i] <= a[i*2]; if the corresponding element a[i*2+1] exists, then a[i] <= a[i*2+1].

Since we're working in C with 0-based indexes, you have element 0 with children 1, 2; you have element 1 with children 3, 4; you have element 2 with children 5, 6; so element i has children 2*i+1 and 2*i+2.

So, your verify_relations() code could be based on one of the two functions below (verify_relations1() or verify_relations2()). The difference is that verify_relations1() reports on every error in the heap ordering, but verify_relations2() reports the first mismatch only. In both case, you get told the heap is invalid, of course, but sometimes the more thorough report might make the debugging easier. I've enclosed the code in a test harness, mainly because I like to be reasonably sure the code is correct before posting it here. That means I have 'tag' strings to identify what's being analyzed and what the errors are being found in. You might decide you don't need all the tags. You might also decide to make the functions report on stderr instead of stdout, or on a file stream that is passed to the validation function (so you can choose where the output goes at runtime).

#include <assert.h>
#include <stdio.h>

enum { HEAP_OK, HEAP_FAIL };

static int cmp_heap_elements(const int *heap, int node1, int node2, const char *tag)
{
     int rc = HEAP_OK;
     if (heap[node1] > heap[node2])
     {
         rc = HEAP_FAIL;
         printf("%s: heap relation does not hold between heap[%d] = %d and heap[%d] = %d\n",
                 tag, node1, heap[node1], node2, heap[node2]);
     }
     return rc;
}

/* Report on every violation */
static int verify_relations1(const int *heap, int size)
{
     int rc = HEAP_OK;
     for (int i = 0; i < size / 2; i++)
     {
         assert(2*i+1 < size);
         if (2*i+1 >= size)
             break;
         int rv = cmp_heap_elements(heap, i, i*2+1, "R1");
         if (rc == HEAP_OK)
             rc = rv;
         if (2*i+2 >= size)
             break;
         rv = cmp_heap_elements(heap, i, i*2+2, "R1");
         if (rc == HEAP_OK)
             rc = rv;
     }
     return(rc);
}

/* Report on first violation - leaving others undiagnosed */
static int verify_relations2(const int *heap, int size)
{
     for (int i = 0; i < size / 2; i++)
     {
         assert(2*i+1 < size);
         if (2*i+1 >= size)
             break;          // Or: return(HEAP_OK);
         if (cmp_heap_elements(heap, i, i*2+1, "R2") != HEAP_OK)
             return(HEAP_FAIL);
         if (2*i+2 >= size)
             break;          // Or: return(HEAP_OK);
         if (cmp_heap_elements(heap, i, i*2+1, "R2") != HEAP_OK)
             return(HEAP_FAIL);
     }
     return(HEAP_OK);
}

static void print_heap(const int *heap, int size, const char *tag)
{
    printf("%s:", tag);
    for (int i = 0; i < size; i++)
        printf(" %d", heap[i]);
    putchar('\n');
}

static void check_heap(int *heap, int size, const char *tag)
{
    print_heap(heap, size, tag);
    if (verify_relations1(heap, size) != HEAP_OK)
        printf("%s %d - FAIL\n", tag, size);
    if (verify_relations2(heap, size) != HEAP_OK)
        printf("%s %d - FAIL\n", tag, size);
}

int main(void)
{
    int heap1[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
    int heap2[] = { 10, 9, 8, 7, 6, 5, 4, 3, 2, 1 };
    int heap3[] = { 1, 2, 6, 4, 10, 3, 7, 8, 9, 5 };
    const int heap1_sz = sizeof(heap1) / sizeof(heap1[0]);
    const int heap2_sz = sizeof(heap2) / sizeof(heap2[0]);
    const int heap3_sz = sizeof(heap3) / sizeof(heap3[0]);
    assert(heap1_sz == heap2_sz && heap1_sz == heap3_sz);

    for (int size = 1; size <= heap1_sz; size++)
    {
        check_heap(heap1, size, "heap1");
        check_heap(heap2, size, "heap2");
        check_heap(heap3, size, "heap3");
    }
    return(0);
}

Upvotes: 1

alestanis
alestanis

Reputation: 21863

The line

heapify(&n, largest, size);

should be

heapify(f, largest, size);
        ^

Upvotes: 7

Related Questions