Welez
Welez

Reputation: 487

Merge Sort for multiple types

I'm trying to implement a Merge Sort algorithm that can work with integers, strings and char. Unfortunately, I found that it doesn't work properly with integers when the array length is odd.

For example: Input: 2 2 3 7 1 2 1. Output: 2 2 3 7 0 1 1.

Now I'm trying to find the mistake in my code. Here it is:

#include <stddef.h>
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include "MergeSort.h"

int StrCmp(char *st1, char *st2) {
    char *str1 = st1, *str2 = st2;
    while (*str1 == *str2 && *str1 != 0)
        str1++, str2++;
    return *str1 - *str2;
}

int CompareInt(void *a, void *b) {
    return *(int *)a - *(int *)b;
}

int CompareChar(void *a, void *b) {
    return *(char *)a - *(char *)b;
}

int CompareStr(const void *a, const void *b) {
    return strcmp(*(char **)a, *(char **)b);
}

int merge(void *base, size_t num, size_t el_size, int (*compar)(const void*, const void*)) {
    size_t size = (size_t)num / 2;
    char *char_base = (char *)base;
    char *first = malloc(size * el_size);
    char *second = malloc((num - size) * el_size);

    for (int i = 0; i < size; i++)
        for (int j = 0; j < el_size; j++)
            first[i * el_size + j] = char_base[i * el_size + j];

    for (int i = 0; i < num - size; i++)
        for (int j = 0; j < el_size; j++)
            second[i * el_size + j] = char_base[size * el_size + i * el_size + j];

    size_t i = 0, j = 0, c = 0;
    while (i < size && j < num - size) {
        if (compar(&first[i * el_size], &second[j * el_size]) <= 0) {
            for (int k = 0; k < el_size; k++) 
                char_base[el_size * c + k] = first[i * el_size + k];
            i++;
        } else { 
            for (int k = 0; k < el_size; k++) 
                char_base[el_size * c + k] = second[j * el_size + k];
            j++;
        }
        c++;
    }

    if (i == size) { 
        while (j < num - size) {
            for (int k = 0; k < el_size; k++) 
                char_base[el_size * c + k] = second[j * el_size + k];
            j++;
            c++;
        }
    } else { 
        while (i < size) {
            for (int k = 0; k < el_size; k++) 
                char_base[el_size * c + k] = first[i * el_size + k];
            i++;
            c++;
        }
    }
    free(first);
    free(second);
}

int merge_sort(void *base, size_t num, size_t el_size, int (*compar)(const void*, const void*)) {
    if (num > 1) {
        size_t s = num / 2;
        char *char_base = base;
        merge_sort(char_base, s, el_size, compar);
        merge_sort(char_base + (num - s) * el_size, num - s, el_size, compar);
        merge(char_base, num, el_size, compar);
    }
}

int main() {
    int nums[] = { 2, 2, 3, 7, 1, 2, 1 };
    cmp_t cmp = CompareInt;
    merge_sort(nums, 7, sizeof(int), cmp);
    for (int i = 0; i < 7; i++)
        printf("%i ", nums[i]);
    return 0;
}

Upvotes: 0

Views: 952

Answers (1)

chqrlie
chqrlie

Reputation: 145317

The bug is in function merge_sort(): the second recursive call is done on the wrong base address:

merge_sort(char_base + (num - s) * el_size, num - s, el_size, compar);

fix is with

merge_sort(char_base + s * el_size, num - s, el_size, compar);

Note that there are other issues in your code:

  • the comparison functions have incorrect signatures, they should take const void * arguments.

  • both merge() and merge_sort() should be defined as void since they return no value.

  • CompareInt cannot handle large integer values whose difference exceeds INT_MAX, such as INT_MAX and INT_MIN. It should be written:

    int CompareInt(const void *a, const void *b) {
        int na = *(const int *)a;
        int nb = *(const int *)b;
        return (nb < na) - (na < nb);
    }
    
  • you should print a '\n' after the numbers.

You could also improve the implementation in various ways:

  • if you computed s as (n + 1) / 2, you could use less memory and have a simpler and faster implementation as you would not need the second array in the merge function.

  • using pointers, you would drastically reduce the number of multiplications.

Here is a simpler implementation with the same semantics:

void merge(void *base, size_t num, size_t el_size, size_t size,
           int (*compar)(const void*, const void*)) {
    size_t split = size * el_size;
    char *first = malloc(split);
    char *p1 = memcpy(first, base, split);
    char *dest = base, *p2 = dest + split;
    size_t i = 0, j = size;
    while (i < size) {
        if (j == num || compar(p1, p2) <= 0) {
            for (size_t k = 0; k < el_size; k++)
                *dest++ = *p1++;
            i++;
        } else {
            for (size_t k = 0; k < el_size; k++)
                *dest++ = *p2++;
            j++;
        }
    }
    free(first);
}

void merge_sort(void *base, size_t num, size_t el_size,
                int (*compar)(const void*, const void*)) {
    if (num > 1) {
        size_t s = (num + 1) / 2;
        char *char_base = base;
        merge_sort(char_base, s, el_size, compar);
        merge_sort(char_base + s * el_size, num - s, el_size, compar);
        merge(char_base, num, el_size, s, compar);
    }
}

Upvotes: 3

Related Questions