Reputation: 11
I am trying to write a parallelized quicksort partition, but somehow, I am getting a segfault.
Here is how I'm doing it:
unsigned int NumList:: partition_par(vector<int>& keys, unsigned int low,
unsigned int high)
{
// Use the last element as the pivot
int pivot = keys[high];
unsigned int n = high - low + 1;
if(n == 1) {
return low;
}
int tmp[n-1];
unsigned int lt[n-1]; // lt array used to add elements less than the pivot
unsigned int gt[n-1]; // gt array used to add elements greater than the pivot
#pragma omp parallel for
for(unsigned int i = 0; i <= n-1; i++) {
tmp[i] = keys[low + i];
if(tmp[i] < pivot) {
lt[i] = 1;
}
else {
lt[i] = 0;
}
if(tmp[i] > pivot) {
gt[i] = 1;
}
else {
gt[i] = 0;
}
}
for(unsigned int i = 1; i <= n-1; i++) {
lt[i] = lt[i] + lt[i-1];
gt[i] = gt[i] + gt[i-1];
}
unsigned int k = low + lt[n-1]; // get position of the pivot
keys[k] = pivot;
#pragma omp parallel for
for(unsigned int i = 0; i <= n-1; i++) {
if(tmp[i] < pivot) {
keys[low + lt[i] - 1] = tmp[i];
}
else if(tmp[i] > pivot) {
keys[k + gt[i]] = tmp[i];
}
}
return k;
}
I'm not sure why I'm getting this segfault. I tried debugging it, but I still can't seem to find the problem. What needs to be fixed here?
Upvotes: 0
Views: 140
Reputation: 32742
Your tmp
, lt
and gt
arrays are not long enough. The last element you access in your loops is n-1
, so the arrays need to be of size n
, not n - 1
.
Using std::vector
(instead of the non-standard Variable Length Array) can avoid other problems (like a stack overflow if n
is too large), and could detect this problem if indexing using at
.
Upvotes: 1
Reputation: 11
I changed my code to look like this:
unsigned int NumList:: partition_par(vector<int>& keys, unsigned int low, unsigned int high)
{
// Use the last element as the pivot
int pivot = keys[high];
unsigned int n = high - low + 1;
if(n == 1) {
return low;
}
int* tmp = new int[n];
unsigned int* lt = new unsigned int[n]; // lt array used to add elements less than the pivot
unsigned int* gt = new unsigned int[n]; // gt array used to add elements greater than the pivot
unsigned int* prefix_lt = new unsigned int[n];
unsigned int* prefix_gt = new unsigned int[n];
// creates the lt and gt arrays
#pragma omp parallel for
for(unsigned int i = 0; i < n; i++) {
tmp[i] = keys[low + i];
if(tmp[i] < pivot) {
lt[i] = 1;
gt[i] = 0;
}
else if(tmp[i] > pivot) {
lt[i] = 0;
gt[i] = 1;
}
else {
lt[i] = 0;
gt[i] = 0;
}
}
prefix_lt[0] = lt[0];
prefix_gt[0] = gt[0];
// Uses prefix sum algorithm to get the proper positions of each element
for(unsigned int i = 1; i < n; i++) {
prefix_lt[i] = prefix_lt[i-1] + lt[i];
prefix_gt[i] = prefix_gt[i-1] + gt[i];
}
unsigned int k = low + prefix_lt[n-1]; // get position of the pivot
keys[k] = pivot;
// Copies each element to the correct position
#pragma omp parallel for
for(unsigned int i = 0; i < n; i++) {
if(tmp[i] < pivot) {
keys[low + prefix_lt[i] - 1] = tmp[i];
}
else if(tmp[i] > pivot) {
keys[k + prefix_gt[i]] = tmp[i];
}
}
return k;
}
However, this doesn't seem to sort the elements properly. It seems to be putting some of the elements in the wrong indices (i.e., puts some numbers one index behind of its desired index). What seems to be the problem here?
Upvotes: 0