dalvacoder
dalvacoder

Reputation: 57

Getting incorrect output when I implement merge sort with threads, can't figure out what's wrong

I've been at this problem for like 3 days and I've combed my entire code to try to figure out why I'm getting incorrect output. The purpose of this program is to do a merge sort using threads. The first part is simply sorting the elements in parallel into however many segments a user inputs. The only inputs tested will be 2, 5, and 10. And the array to be sorted will always be 50 int array of randomly generated numbers.My code works fine when the segments entered (denoted by the variable 'segments' at the top of main) is 2. However, when I change segments to 5 or 10, I don't get a sorted array at the end. I've tried debugging by using print statements (which I've commented out but you can still see) and there seems to be a problem during the first two merge iterations. For some reason the resulting of those merge iterations are not in order, and they contain duplicate numbers that don't exist in duplicate in the original array passed to it. My sorting method and merging methods work fine when I just pass arrays to them, and don't use threads but when I do use threads I get behavior that I can't explain. Below is my program in its entirety, to merge an array of 50 it should do the following:

My program: (you should mainly focus on my main function, sort is fine, and merge seems fine too, but i've included them anyways just in case)

#include <stdio.h>
#include <time.h>
#include <stdlib.h>
#include <pthread.h>

/**
 * Struct to contain an array partition as well as the size of the    partition.
 * Use struct to pass multiple parameters to pthread_create
 */
struct array_struct{
int *partition;
int size;
};

/**
 * Struct that contains two arrays (should be sorted) to merge
 * Use struct to pass multiple parameters to pthread_create
*/
struct arrays_to_merge{
int *array1;
int *array2;
int size1;
int size2;
};

//comparison function to use with qsort, sorts in ascending order
int cmpfunc (const void * a, const void * b)
{
   return ( *(int*)a - *(int*)b );
}



/**
 * Method that takes a struct containing a pointer to the first int in          an array 
 * partition, as well as the partition size. Object must be type void, used with pthread_create
 * @param pointer to the partition object. Type void
  */
void *sort(void *object){

struct array_struct *structure;
structure = (struct array_struct *) object;

int *array = structure->partition;
int size = structure->size;

int *i, j = 0;
qsort(array, size, sizeof(int), cmpfunc);
printf("Sorted %d elements.\n", size);
}

void *merge(void * object){

struct arrays_to_merge *arrays_struct;
arrays_struct = (struct arrays_to_merge *) object;


int *array1 = arrays_struct->array1;
int *array2 = arrays_struct->array2;
int size1 = arrays_struct->size1;
int size2 = arrays_struct->size2;
int tempArray[size1 + size2];

int i = 0, j = 0, k = 0, duplicates = 0;

while (i < size1 && j < size2) {
    // printf("Merge number : %d  Comparing %d and %d\n", mergenumber, array1[i], array2[j]);
    if (array1[i] <= array2[j]) {
        // printf("Picking %d\n", array1[i]);
        tempArray[k] = array1[i];
        if (array1[i] == array2[j])
        {
            duplicates++;
        }
        i++;
        k++;
    }else {
         // printf("Merge number : %d Picking %d\n", mergenumber, array2[j]);
        tempArray[k] = array2[j];
        k++; 
        j++;
    }
}
while (i < size1) {
    // printf("Merge number : %d   left over Picking %d\n", mergenumber, array1[i]);
    tempArray[k] = array1[i];
    i++;
    k++;
}
while (j < size2) {
    // printf("Merge number : %d   left over Picking %d\n", mergenumber, array2[j]);
    tempArray[k] = array2[j];
    k++;
    j++;
}

array1 = arrays_struct->array1;

for(i = 0; i < size1 + size2; i++){
    array1[i] = tempArray[i];
}

printf("Merged %d and %d elements with %d duplicates\n", size1, size2, duplicates);


}




//return an array of size 50 with randomly generated integers
int *randomArray(){

srand(time(NULL));
static int array[50];
int i;
for (i = 0; i < 50; ++i){
    array[i] = rand() % 51;
}

return array;
}



int main(int argc, char const *argv[])
{

int segments = 10;//make equal to argv input after testing
pthread_t threads[segments];



int i, *numbers; //iterator i, and pointer to int array 'numbers'
numbers = randomArray(); //return an array of random ints and store in 'numbers'

struct array_struct array[segments];

for(i = 0; i < segments; i++){

    int *partition = numbers + (i * (50/segments));//obtain the first index of partition
    array[i].partition = partition;
    array[i].size = 50/segments;
    pthread_create(&threads[i], NULL, sort, (void *) &array[i]);

}

for(i = 0; i < segments; i++){
    pthread_join(threads[i], NULL);
}


int count = segments;

struct arrays_to_merge arrays[segments];    
int j;
int size = 50/ segments;

while(count > 1){

    for(i = 0, j = 0; i < count-1; j++, i += 2){
        int *partition = numbers + (i * (size));
        int *partition2 = numbers + (i+1 * (size));

        arrays[j].array1 = partition;
        arrays[j].array2 = partition2;
        arrays[j].size1 = size;
        arrays[j].size2 = size;
        pthread_create(&threads[j], NULL, merge, (void *) &arrays[j]);

    }
    for(i = 0; i < j; i++){
        pthread_join(threads[i], NULL);
    }

    size = size * 2;
    count = count/2;

}

if(segments != 2){//for segments = 2, no need for his
    int *partition = numbers;
    int *partition2 = numbers + (size);
    arrays[0].array1 = partition;
    arrays[0].array2 = partition2;
    arrays[0].size1 = size;
    arrays[0].size2 = 50 - size;
    pthread_create(&threads[0], NULL, merge, (void *) &arrays[0]);

    pthread_join(threads[0], NULL); 
}   


for(i = 0; i < 50; i++){
    printf("%d\n", numbers[i]);
}

pthread_exit(NULL);





return 0;
}

this is my output:

Sorted 5 elements.
Sorted 5 elements.
Sorted 5 elements.
Sorted 5 elements.
Sorted 5 elements.
Sorted 5 elements.
Sorted 5 elements.
Sorted 5 elements.
Sorted 5 elements.
Sorted 5 elements.
Merged 5 and 5 elements with 0 duplicates
Merged 5 and 5 elements with 0 duplicates
Merged 5 and 5 elements with 0 duplicates
Merged 5 and 5 elements with 0 duplicates
Merged 5 and 5 elements with 0 duplicates
Merged 10 and 10 elements with 3 duplicates
Merged 10 and 10 elements with 1 duplicates
Merged 20 and 20 elements with 7 duplicates
Merged 40 and 10 elements with 17 duplicates
0
6
9
11
12
13
13
14
15
17
19
23
25
25
25
26
26
28
28
28
28
30
32
32
32
34
39
41
41
44
44
44
44
44
50
50
9
15
50
9
15
19
26
50
50
9
15
11
14
50

Sorry for the long wall of text, I've tried resolving this on my own and after countless hairs pulled I can't figure it out. Please help me figure out what I'm doing wrong. I think my problem lies in either the way I'm joining threads, or my merge function but since I cant be sure, i just included the whole thing.

Upvotes: 3

Views: 209

Answers (1)

P.P
P.P

Reputation: 121397

It took a while but finally I got there :)

The problem is with this line:

    int *partition2 = numbers + (i+1 * (size));

which is equivalent to (due to operator precedence).

int *partition2 = numbers + (i + size);

and is not what you want.

It should be:

    int *partition2 = numbers + ((i+1) * (size));

Notice the additional brackets. Without which, the partition2 index is calculated incorrectly. Hence, merging with different parts of the array.

Upvotes: 5

Related Questions