bmbigo
bmbigo

Reputation: 3

Cuda c++ odd even sort implementation

This is my code code for odd even sort: This code compiling, and running okay but, not sorting I guess. Please help me I am currently using CUDA 11.3 on visual studio 2019. My idea is creating odd and even functions and run it on after another. I'm multi threading the process of each function. In other words if(arr[2k]>arr[2k+1]) swap(arr[2k],arr[2k+1]).

#include <stdio.h>
#include<iostream>
#include<chrono>
#include <cuda_runtime.h>
#include "device_launch_parameters.h"

using namespace std;
using namespace std::chrono;

__global__ void Even(int *arr, int n) {
    int index = threadIdx.x;
    index = index * 2;
    if  (index < n-1) {
        if (arr[index ] > arr[index + 1]) {
            int temp = arr[index];
            arr[index] = arr[index+ 1];
            arr[index + 1] = temp;
        }
    }
}

__global__ void Odd(int* arr, int n) {
    int index = threadIdx.x;
        index = index * 2+1;
    if (index <= n - 2) {
        if (arr[index ] > arr[index + 1]) {
            int temp = arr[index];
                arr[index] = arr[index + 1];
                arr[index+ 1] = temp;
        }
    }
}
        
#define n 10
int main(){
    int *a;
    int* ptr;
    const int Size = sizeof(int) * n;

    cudaMalloc((void**)&ptr, Size);

    a = (int*)malloc(n * Size);

    srand(time(NULL));
    
    for(int i =0 ;i<n;i++){
        a[i] = rand()%n;
    }


    for (int i = 0; i < n; i++) {
       std:: cout << a[i] << " ";
    }
    std::cout << endl;

    cudaMemcpy(ptr, a, Size, cudaMemcpyHostToDevice);

    auto starttime = high_resolution_clock::now();
 

    for (int i = 0; i < n / 2; i++) {
       Even<<<1,n >>>(a, n);
        Odd<<<1,n >>>(a, n);

    }

    cudaMemcpy( a, ptr, Size, cudaMemcpyDeviceToHost);

    auto stoptime = high_resolution_clock::now();
    auto duration = duration_cast<microseconds>(stoptime-starttime);

    std::cout<<" time : " <<duration.count()<<"ms"<<endl;

    for (int i = 0; i < n; i++) {
        std::cout << a[i] << " ";
    }
    std::cout << endl;
    free(a);
    cudaFree(ptr);
 
    return 0;

}

Upvotes: 0

Views: 298

Answers (1)

Dillon
Dillon

Reputation: 275

I suspect there are two problems.

First, you are overwriting the first value in the array every time you run Odd(). You should remove the line arr[0] = 0; to fix this problem.

Second, you are passing the host pointer a instead of the device pointer ptr to the kernels. You should pass ptr instead.

With these (untested) edits, the code looks like this:

#include <stdio.h>
#include<iostream>
#include<chrono>
#include <cuda_runtime.h>
#include "device_launch_parameters.h"

using namespace std;
using namespace std::chrono;

__global__ void Even(int *arr, int n) {
    int index = threadIdx.x;
    index = index * 2;
    if  (index < n-1) {
        if (arr[index ] > arr[index + 1]) {
            int temp = arr[index];
            arr[index] = arr[index+ 1];
            arr[index + 1] = temp;
        }
    }
}

__global__ void Odd(int* arr, int n) {
    int index = threadIdx.x;
    index = index * 2+1;
    // no longer setting a[0] = 0
    if (index <= n - 2) {
        if (arr[index ] > arr[index + 1]) {
            int temp = arr[index];
                arr[index] = arr[index + 1];
                arr[index+ 1] = temp;
        }
    }
}
        
#define n 10
int main(){
    int *a;
    int* ptr;
    const int Size = sizeof(int) * n;

    cudaMalloc((void**)&ptr, Size);

    a = (int*)malloc(n * Size);

    srand(time(NULL));
    
    for(int i =0 ;i<n;i++){
        a[i] = rand()%n;
    }


    for (int i = 0; i < n; i++) {
       std:: cout << a[i] << " ";
    }
    std::cout << endl;

    cudaMemcpy(ptr, a, Size, cudaMemcpyHostToDevice);

    auto starttime = high_resolution_clock::now();
 

    for (int i = 0; i < n / 2; i++) {
       Even<<<1,n >>>(ptr, n);  // ptr instead of a
        Odd<<<1,n >>>(ptr, n);  // ptr instead of a

    }

    cudaMemcpy( a, ptr, Size, cudaMemcpyDeviceToHost);

    auto stoptime = high_resolution_clock::now();
    auto duration = duration_cast<microseconds>(stoptime-starttime);

    std::cout<<" time : " <<duration.count()<<"ms"<<endl;

    for (int i = 0; i < n; i++) {
        std::cout << a[i] << " ";
    }
    std::cout << endl;
    free(a);
    cudaFree(ptr);
 
    return 0;

}

Upvotes: 1

Related Questions