Reputation: 693
I need to generate N unique random integers within a range (A,B) using cuda. I would prefer them to be uniformly distributed but I dont know if that conflicts with the necessity of each number being unique.
This question has been asked before without any answer with a coding hint.
How can I generate a fixed number of unique random integers within an interval without repetition?
My attempt as below generates random numbers but they are not unique.
#include <stdio.h>
#include <curand.h>
#include <curand_kernel.h>
#include <math.h>
#include <assert.h>
__global__ void setup_kernel ( curandState * state, unsigned long seed )
{
int id = threadIdx.x;
curand_init ( seed, id, 0, &state[id] );
}
__global__ void generate( curandState* globalState, int * result, int *max, int *min, int count )
{
int ind = threadIdx.x;
curandState localState = globalState[ind];
float RANDOM = curand_uniform( &localState );
globalState[ind] = localState;
if (ind < count)
result[ind] = truncf(*min +(*max - *min)*RANDOM);
}
int main( int argc, char** argv)
{
int N = 32; // no of random numbers to be generated
int MIN = 10; // max range of random number
int MAX = 100; // min range of random number
dim3 tpb(N,1,1);
curandState* devStates;
cudaMalloc ( &devStates, N*sizeof( curandState ) );
// setup seeds
setup_kernel <<< 1, tpb >>> ( devStates, time(NULL) );
int *d_result, *h_result;
cudaMalloc(&d_result, N * sizeof(int));
h_result = (int *)malloc(N * sizeof(int));
int *d_max, *h_max, *d_min, *h_min;
cudaMalloc(&d_max, sizeof(int));
h_max = (int *)malloc(sizeof(int));
cudaMalloc(&d_min, sizeof(int));
h_min = (int *)malloc(sizeof(int));
*h_max =MAX;
*h_min =MIN;
cudaMemcpy(d_max, h_max, sizeof(int), cudaMemcpyHostToDevice);
cudaMemcpy(d_min, h_min, sizeof(int), cudaMemcpyHostToDevice);
// generate random numbers
generate <<< 1, tpb >>> ( devStates, d_result, d_max, d_min, N );
cudaMemcpy(h_result, d_result, N * sizeof(float), cudaMemcpyDeviceToHost);
for (int i = 0; i < N; i++)
printf("random number= %d\n", h_result[i]);
return 0;
}
20, 39, 43, 72, 39, 70, 58, 31, 44, 47, 30, 26, 42, 35, 20, 66, 94, 81, 42(repeated), 50, 90, 31(repeated), 51, 53, 39(repeated), 20, 66, 37, 42(repeated), 21, 45, 57
Upvotes: 0
Views: 2621
Reputation: 152269
One possible approach, probably much less efficient than the Fisher-Yates shuffle mentioned in the comments:
Determine the length of the range of integers desired to select from (B-A). Generate a set of random numbers using CURAND of this length.
Use sort by key (e.g. thrust::sort_by_key
) using this sequence of random numbers along with the sequence of the range of integers to select from, to reorder that sequence.
Take the first N numbers from that sequence (where N is the desired number of random integers to produce), as your selected values.
This will obviously be prohibitive at the point where the length of the range (B-A) of integers to select from implies memory requirements that exceed what the GPU can hold. Thrust sort-by-key requires O(N) temporary storage, so at the point where the range of integers * 8 bytes exceeds about 40% of your available GPU memory, this will become unworkable.
This has the advantages of relatively simple to implement, using ordinary libraries. It has the disadvantage of probably being much less efficient than an expertly written F-Y shuffle. However from what I can see, F-Y shuffle requires that:
Here's an example:
$ cat t1504.cu
#include <stdio.h>
#include <curand.h>
#include <curand_kernel.h>
#include <math.h>
#include <assert.h>
#include <thrust/device_vector.h>
#include <thrust/host_vector.h>
#include <thrust/sequence.h>
#include <thrust/sort.h>
__global__ void setup_kernel ( curandState * state, unsigned long seed, int n)
{
int id = threadIdx.x+blockDim.x*blockIdx.x;
if (id < n)
curand_init ( seed, id, 0, &state[id] );
}
__global__ void generate( curandState* globalState, float * result, int count )
{
int ind = threadIdx.x+blockDim.x*blockIdx.x;
if (ind < count){
curandState localState = globalState[ind];
float RANDOM = curand_uniform( &localState );
globalState[ind] = localState;
result[ind] = RANDOM;}
}
int main( int argc, char** argv)
{
int N = 32; // no of random numbers to be generated
int MIN = 10; // max range of random number
int MAX = 100; // min range of random number
curandState* devStates;
int R = MAX-MIN;
cudaMalloc ( &devStates, R*sizeof( curandState ) );
// setup seeds
setup_kernel <<< (R+255)/256, 256 >>> ( devStates, time(NULL), R );
float *d_result;
cudaMalloc(&d_result, R * sizeof(float));
// generate random numbers
generate <<< (R+255)/256, 256>>> ( devStates, d_result, R );
thrust::device_vector<int> d_r(R);
thrust::sequence(d_r.begin(), d_r.end(), MIN);
thrust::device_ptr<float> dp_res = thrust::device_pointer_cast(d_result);
thrust::sort_by_key(dp_res, dp_res+R, d_r.begin());
thrust::host_vector<int> h_result = d_r;
for (int i = 0; i < N; i++)
printf("random number= %d\n", h_result[i]);
return 0;
}
$ nvcc -o t1504 t1504.cu -lcurand
[user2@dc10 misc]$ ./t1504
random number= 16
random number= 97
random number= 31
random number= 80
random number= 61
random number= 21
random number= 98
random number= 70
random number= 46
random number= 41
random number= 30
random number= 71
random number= 52
random number= 92
random number= 48
random number= 39
random number= 59
random number= 63
random number= 96
random number= 40
random number= 81
random number= 32
random number= 34
random number= 79
random number= 73
random number= 49
random number= 19
random number= 24
random number= 11
random number= 78
random number= 42
random number= 12
$
Upvotes: 1