Reputation: 91
I've been learning & coding sorting algorithms for some time and recently I've coded merge sort in C, and I've also coded a sort_test function to test the function that I write. In the sort test function, I'm declaring an array and assigning random values to it, but when the array size gets to 1,000,000 the program crashes. Why is that happening?
sort_test.c
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include "merge_sort.h"
#include "sort_test.h"
// test size
#define MIN 10
#define MAX 1000000
// int comparator
int cmpInt(const void *elem1,const void * elem2){
int e1 = *(int *)elem1; // i-1
int e2 = *(int *)elem2; // i
if(e2 < e1){
return -1;
} else if(e2 > e1){
return 1;
} else {
return 0;
}
}
// double comparator
int cmpDouble(const void *elem1,const void *elem2){
double e1 = *(double *)elem1;
double e2 = *(double *)elem2;
if(e2 < e1){
return -1;
} else if(e2 > e1){
return 1;
} else {
return 0;
}
}
void initSeed(){
srand(time(NULL));
}
void intSortTest(){
initSeed();
for(size_t i = MIN;i <= MAX;i *=10){
int arr[i];
for(size_t j = 0; j < i;j++){
arr[j] = rand();
}
// sorting the array
mergesort(arr,0,i);
// checking if sorted array hold the
// condition i[0] <= i[1] ... <= i[n].
for(size_t j = 1;j < i;j++){
int *e1 = &arr[j-1];
int *e2 = &arr[j];
assert(cmpInt(e2,e1) <= 0);
}
printf("INT TEST : %7d\tPASSED\n",i);
}
printf("\n");
}
void doubleSortTest(){
initSeed();
for(int i = MIN; i <= MAX; i *= 10){
double arr[i];
for(int j = 0 ; j < i;j++){
arr[j] = (double)(rand() % 100) + 1.0;
}
// perform sort
//insertion_sort(arr,sizeof (double),i,cmpDouble);
for(int j = 1; j < i;j++){
double *e1 = &arr[j-1];
double *e2 = &arr[j];
assert(cmpDouble(e2,e1) <= 0);
}
printf("Double Test : %5d\tPASSED\n",i);
}
printf("\n");
}
sort_test.h
#ifndef SORT_TEST_H
#define SORT_TEST_H
void initSeed();
void intSortTest();
void doubleSortTest();
int cmpDouble(const void *elem1,const void *elem2);
int cmpInt(const void *elem1,const void * elem2);
#endif // SORT_TEST_H
merge_sort.h
#ifndef MERGE_SORT_H
#define MERGE_SORT_H
void mergesort(int *arr,int start,int end);
void merge(int *arr,int start,int med,int end);
#endif // MERGE_SORT_H
merge_sort.c
#include <stdio.h>
#include "sort_test.h"
#include "merge_sort.h"
int main(){
intSortTest();
return 0;
}
void mergesort(int *arr,int start,int end){
if(start < end){
int median = (end + start) / 2;
mergesort(arr,start,median);
mergesort(arr,median+1,end);
merge(arr,start,median,end);
}
}
void merge(int *arr,int start,int median,int end){
int i = start; int j = median+1;
int copy[end+1];
int cIndex = 0;
while(i <= median && j <= end) {
if(arr[j] <= arr[i]){
copy[cIndex++] = arr[j++];
} else {
copy[cIndex++] = arr[i++];
}
}
while(i <= median){
copy[cIndex++] = arr[i++];
}
while(j <= end){
copy[cIndex++] = arr[j++];
}
for(int k = 0; k < cIndex; k++){
arr[start++] = copy[k];
}
}
Upvotes: 0
Views: 83
Reputation: 22134
It is because you are allocating the arrays on the stack. Try the following code instead.
void intSortTest(){
initSeed();
for(size_t i = MIN;i <= MAX;i *=10){
int *arr = malloc(i*sizeof(int)); // <-- changed this
for(size_t j = 0; j < i;j++){
arr[j] = rand();
}
// sorting the array
mergesort(arr,0,i);
// checking if sorted array hold the
// condition i[0] <= i[1] ... <= i[n].
for(size_t j = 1;j < i;j++){
int *e1 = &arr[j-1];
int *e2 = &arr[j];
assert(cmpInt(e2,e1) <= 0);
}
printf("INT TEST : %7d\tPASSED\n",i);
free(arr); // <-- added this
}
printf("\n");
}
EDIT
Also the merge algorithm is incorrect. More precisely, you have a problem with the value list boundaries.
When you define the start and end index of a value list, the values are in arr[start]
to arr[end-1]
, not arr[end]
. The number of values is then end-start
. With this convention, you have an empty list when start == end
.
As a consequence, the function mergesort
becomes:
void mergesort(int *arr,int start,int end){
if (start+1 >= end)
return; // a list with 0 or 1 values is already sorted
int median = (end + start) / 2;
mergesort(arr,start,median);
mergesort(arr,median,end);
merge(arr,start,median,end);
}
The merge function then become as follow:
void merge(int *arr,int start,int median,int end){
int i = start; int j = median;
int *copy = malloc((end-start)*sizeof(int)); // use malloc for huge arrays
int cIndex = 0;
while(i < median && j < end) { // not i <= median && j <= end
if(arr[j] <= arr[i]){
copy[cIndex++] = arr[j++];
} else {
copy[cIndex++] = arr[i++];
}
}
while(i < median){ // not i <= median
copy[cIndex++] = arr[i++];
}
while(j < end){ // not j <= median
copy[cIndex++] = arr[j++];
}
for(int k = 0; k < cIndex; k++){
arr[start++] = copy[k];
}
free(copy);
}
As you can see, there are only minor differences.
With this code, your program runs without error.
Upvotes: 2
Reputation: 753775
Now that the code is visible, it is fairly easy to see that you are indeed blowing the stack as I suggested in one of my many comments.
In merge()
, you have:
int copy[end+1];
as well as in intSortTest()
having:
int arr[i];
where i
reaches 1,000,000.
When end
is 1,000,000 — it is set from i
— you have an array of one million int
values being sorted, and a copy with another one million int
values (plus 1), so you attempt to place two million 4-byte int
values on the stack — and 8,000,000 bytes blows the stack limits. Since 800,000 bytes (the previous size) fits on the stack in both Unix and Windows, it isn't 100% clear which you are using. There isn't much margin for error on Unix/Linux; the limit is thoroughly blown on Windows because neither 4 MB array fits on the stack.
The recommended fix is to use dynamic memory allocation (malloc()
et al) instead of stack allocation — in both the sort test function and in the main merge()
code.
Upvotes: 0