Reputation: 3
This is my code code for odd even sort: This code compiling, and running okay but, not sorting I guess. Please help me I am currently using CUDA 11.3 on visual studio 2019. My idea is creating odd and even functions and run it on after another. I'm multi threading the process of each function. In other words if(arr[2k]>arr[2k+1]) swap(arr[2k],arr[2k+1]).
#include <stdio.h>
#include<iostream>
#include<chrono>
#include <cuda_runtime.h>
#include "device_launch_parameters.h"
using namespace std;
using namespace std::chrono;
__global__ void Even(int *arr, int n) {
int index = threadIdx.x;
index = index * 2;
if (index < n-1) {
if (arr[index ] > arr[index + 1]) {
int temp = arr[index];
arr[index] = arr[index+ 1];
arr[index + 1] = temp;
}
}
}
__global__ void Odd(int* arr, int n) {
int index = threadIdx.x;
index = index * 2+1;
if (index <= n - 2) {
if (arr[index ] > arr[index + 1]) {
int temp = arr[index];
arr[index] = arr[index + 1];
arr[index+ 1] = temp;
}
}
}
#define n 10
int main(){
int *a;
int* ptr;
const int Size = sizeof(int) * n;
cudaMalloc((void**)&ptr, Size);
a = (int*)malloc(n * Size);
srand(time(NULL));
for(int i =0 ;i<n;i++){
a[i] = rand()%n;
}
for (int i = 0; i < n; i++) {
std:: cout << a[i] << " ";
}
std::cout << endl;
cudaMemcpy(ptr, a, Size, cudaMemcpyHostToDevice);
auto starttime = high_resolution_clock::now();
for (int i = 0; i < n / 2; i++) {
Even<<<1,n >>>(a, n);
Odd<<<1,n >>>(a, n);
}
cudaMemcpy( a, ptr, Size, cudaMemcpyDeviceToHost);
auto stoptime = high_resolution_clock::now();
auto duration = duration_cast<microseconds>(stoptime-starttime);
std::cout<<" time : " <<duration.count()<<"ms"<<endl;
for (int i = 0; i < n; i++) {
std::cout << a[i] << " ";
}
std::cout << endl;
free(a);
cudaFree(ptr);
return 0;
}
Upvotes: 0
Views: 298
Reputation: 275
I suspect there are two problems.
First, you are overwriting the first value in the array every time you run Odd()
. You should remove the line arr[0] = 0;
to fix this problem.
Second, you are passing the host pointer a
instead of the device pointer ptr
to the kernels. You should pass ptr
instead.
With these (untested) edits, the code looks like this:
#include <stdio.h>
#include<iostream>
#include<chrono>
#include <cuda_runtime.h>
#include "device_launch_parameters.h"
using namespace std;
using namespace std::chrono;
__global__ void Even(int *arr, int n) {
int index = threadIdx.x;
index = index * 2;
if (index < n-1) {
if (arr[index ] > arr[index + 1]) {
int temp = arr[index];
arr[index] = arr[index+ 1];
arr[index + 1] = temp;
}
}
}
__global__ void Odd(int* arr, int n) {
int index = threadIdx.x;
index = index * 2+1;
// no longer setting a[0] = 0
if (index <= n - 2) {
if (arr[index ] > arr[index + 1]) {
int temp = arr[index];
arr[index] = arr[index + 1];
arr[index+ 1] = temp;
}
}
}
#define n 10
int main(){
int *a;
int* ptr;
const int Size = sizeof(int) * n;
cudaMalloc((void**)&ptr, Size);
a = (int*)malloc(n * Size);
srand(time(NULL));
for(int i =0 ;i<n;i++){
a[i] = rand()%n;
}
for (int i = 0; i < n; i++) {
std:: cout << a[i] << " ";
}
std::cout << endl;
cudaMemcpy(ptr, a, Size, cudaMemcpyHostToDevice);
auto starttime = high_resolution_clock::now();
for (int i = 0; i < n / 2; i++) {
Even<<<1,n >>>(ptr, n); // ptr instead of a
Odd<<<1,n >>>(ptr, n); // ptr instead of a
}
cudaMemcpy( a, ptr, Size, cudaMemcpyDeviceToHost);
auto stoptime = high_resolution_clock::now();
auto duration = duration_cast<microseconds>(stoptime-starttime);
std::cout<<" time : " <<duration.count()<<"ms"<<endl;
for (int i = 0; i < n; i++) {
std::cout << a[i] << " ";
}
std::cout << endl;
free(a);
cudaFree(ptr);
return 0;
}
Upvotes: 1