Reputation: 91
I'm a beginner at CUDA and trying to figure out the most efficient way to do something.
I have an array of values. I want to build an array that's essentially the number of times each value appears in the array. Is there an efficient algorithm to do this using CUDA?
For example, let's assume that the values range from 0-10. In reality I have negative values as well, but I need to ignore those. (EDIT: I've tried thrust::remove_if followed by thrust::reduce_by_key, but I'm looking for something that's more efficient in terms of ignoring the elements I don't care about. Almost like a thrust::reduce_by_key_if). The list is much much smaller than the range of the values (ie, the vast majority of values are outside the range I care about). I might have:
int32_t values[5] = {3, 5, 2, 5, 1, -1};
And I want to build the array:
int32_t result[10] = {0, 1, 1, 1, 0, 2, 0, 0, 0, 0};
Right now I'm doing it on the CPU mostly. I've tried sorting the list of indices it using thrust to improve memory caching performance, but the performance improvements there are marginal at best.
Any thoughts? Is there an elegant way to do this?
Upvotes: 2
Views: 190
Reputation: 91
thrust::remove_if followed by thrust::reduce_by_key ended up being the best way to do it for my application.
Upvotes: 0
Reputation: 16324
You can modify the thrust histogram example to only take into account the non-negative values when building the histogram after sorting:
#include <thrust/device_vector.h>
#include <thrust/sort.h>
#include <thrust/copy.h>
#include <thrust/adjacent_difference.h>
#include <thrust/iterator/counting_iterator.h>
#include <thrust/find.h>
#include <thrust/functional.h>
#include <thrust/binary_search.h>
#include <iostream>
#include <iomanip>
#include <iterator>
#include <cstdint>
template <typename Vector>
void print_vector(const std::string& name, const Vector& v)
{
typedef typename Vector::value_type T;
std::cout << " " << std::setw(20) << name << " ";
thrust::copy(v.begin(), v.end(), std::ostream_iterator<T>(std::cout, " "));
std::cout << std::endl;
}
int main(void)
{
const std::int32_t N = 6;
std::int32_t values[6] = {3, 5, 2, 5, 1, -1};
thrust::device_vector<std::int32_t> d_values(values, values + N);
print_vector("initial data", d_values);
// sort values to bring equal elements together
thrust::sort(d_values.begin(), d_values.end());
print_vector("sorted data", d_values);
using thrust::placeholders::_1;
auto first_non_negative = thrust::find_if(d_values.begin(), d_values.end(), _1>=0);
// number of histogram bins is equal to the maximum value plus one
std::int32_t num_bins = d_values.back() + 1;
thrust::device_vector<std::int32_t> histogram(num_bins);
thrust::counting_iterator<std::int32_t> search_begin(0);
thrust::upper_bound(first_non_negative, d_values.end(),
search_begin, search_begin + num_bins,
histogram.begin());
thrust::adjacent_difference(histogram.begin(), histogram.end(),
histogram.begin());
print_vector("histogram", histogram);
return 0;
}
output
initial data 3 5 2 5 1 -1
sorted data -1 1 2 3 5 5
histogram 0 1 1 1 0 2
Upvotes: 2