Reputation: 644
I want to sort a matrix by row, but return the rank of each element.
Values Rank
------------- --------------
[5, 4, 1, 9] [2, 1, 0, 3]
[1, 4, 3, 2] --> [0, 3, 2, 1]
[2, 4, 2, 0] [1, 3, 2, 0]
I've these two examples:
Ranking rows of a matrix
Sort rows of a matrix
The first shows how to use an index vector and a permutation iterator to return the index of the sorted values. The second shows how to use the "back-to-back" method to sort a matrix by row. (Sort by key 2x). But I can't figure out how to combine these two ideas.
I tried using a zip_iterator to combine values and indexes into a tuple, and then do the back to back method, but I can't do a sort-by-key on ziped tuples.
I also tried using the back-to-back sort, and then indexing the values, but then the index is just the already sorted values, so the index is always [0, 1, 2, 3] for each row of the matrix.
#include <iostream>
#include <iomanip>
#include <fstream>
#include <thrust/device_vector.h>
#include <thrust/device_ptr.h>
#include <thrust/host_vector.h>
#include <thrust/sort.h>
#include <thrust/execution_policy.h>
#include <thrust/generate.h>
#include <thrust/equal.h>
#include <thrust/sequence.h>
#include <thrust/for_each.h>
#include <iostream>
#include <stdlib.h>
using namespace std;
#define NSORTS 5
#define DSIZE 4
// -------------------
// Print
// -------------------
template <class Vector>
void print(std::string name, Vector toPrint)
{
cout << setw(13) << name << " :: ";
int i = 0;
for (auto x : toPrint)
{
i++;
std::cout << setw(2) << x << " ";
if (!(i%4))
cout << " ";
}
std::cout << std::endl;
}
// ---------------------
// Print Title
// ---------------------
void print_title(const std::string title)
{
cout << "\n\n";
cout << "-------------------\n";
cout << " " << title << "\n";
cout << "-------------------\n";
}
// ---------------------
// My Mod
// ---------------------
int my_mod_start = 0;
int my_mod(){
return (my_mod_start++)/DSIZE;
}
// ------------------
// Clamp
// ------------------
struct clamp
{
template <typename T>
__host__ __device__
T operator()(T data){
if (data <= 0) return 0;
return 1;}
};
int main()
{
// Initialize
thrust::host_vector<int> h_data(DSIZE * NSORTS);
thrust::generate(h_data.begin(), h_data.end(), rand);
thrust::transform(h_data.begin(), h_data.end(), h_data.begin(), thrust::placeholders::_1 % 10);
int size = DSIZE * NSORTS;
// Device Vectors
thrust::device_vector<int> d_data = h_data;
thrust::device_vector<int> d_idx(size);
thrust::device_vector<int> d_result(size);
thrust::sequence(d_idx.begin(), d_idx.end());
// Segments
thrust::host_vector<int> h_segments(size);
thrust::generate(h_segments.begin(), h_segments.end(), my_mod);
thrust::device_vector<int> d_segments = h_segments;
print_title("Generate");
print("Data", d_data);
print("Index", d_idx);
print("Segments", d_segments);
// Sort 1
thrust::stable_sort_by_key(d_data.begin(), d_data.end(), d_segments.begin());
print_title("Sort 1");
print("Data", d_data);
print("Index", d_idx);
print("Segments", d_segments);
// Sort 2
thrust::stable_sort_by_key(d_segments.begin(), d_segments.end(), d_data.begin());
print_title("Sort 2");
print("Data", d_data);
print("Index", d_idx);
print("Segments", d_segments);
// Adjacent Difference
thrust::device_vector<int> d_diff(size);
thrust::adjacent_difference(d_data.begin(), d_data.end(), d_diff.begin());
d_diff[0] = 0;
print_title("Adj Diff");
print("Data", d_data);
print("Index", d_idx);
print("Segments", d_segments);
print("Difference", d_diff);
// Transform
thrust::transform(d_diff.begin(), d_diff.end(), d_diff.begin(), clamp());
print_title("Transform");
print("Data", d_data);
print("Index", d_idx);
print("Segments", d_segments);
print("Difference", d_diff);
// Inclusive Scan
thrust::inclusive_scan_by_key(d_segments.begin(), d_segments.end(), d_diff.begin(), d_diff.begin());
print_title("Inclusive");
print("Data", d_data);
print("Index", d_idx);
print("Segments", d_segments);
print("Difference", d_diff);
// Results
thrust::copy(d_diff.begin(), d_diff.end(), thrust::make_permutation_iterator(d_result.begin(), d_idx.begin()));
print_title("Results");
print("Data", d_data);
print("Index", d_idx);
print("Segments", d_segments);
print("Difference", d_diff);
print("Results", d_result);
}
Edit -- example rank matrix was wrong
Upvotes: 0
Views: 700
Reputation: 152164
This can be done with a single sort_by_key operation, followed by a "rearrangement" of the sorted values.
The "keys" (things we sort on) will be a zip_iterator combining your actual values to be sorted, along with a row indicator. The sort functor will be arranged to sort first by row, then by value within the row. The "values" (things that get moved along with the keys) will be a column index in each row.
These "values" after sorting can be rearranged to be our indicator of "rank" within a row.
We can use a numerical example following the 3rd row of your matrix:
before sort:
keys: [2, 4, 2, 0]
values: [0, 1, 2, 3]
after sort:
keys: [0, 2, 2, 4]
values: [3, 0, 2, 1]
before rearranging:
destination map: [3, 0, 2, 1]
values: [0, 1, 2, 3]
after rearranging:
destination: [1, 3, 2, 0]
(as it happens, for the first two rows in your example, there is no change between the sorted values and the destination values)
Here is a worked example:
$ cat t1633.cu
#include <iostream>
#include <thrust/sort.h>
#include <thrust/device_vector.h>
#include <thrust/iterator/zip_iterator.h>
#include <thrust/iterator/transform_iterator.h>
#include <thrust/iterator/counting_iterator.h>
#include <thrust/iterator/permutation_iterator.h>
#include <thrust/copy.h>
typedef int dtype;
// creates row indices:
// 0 0 0 0 ...
// 1 1 1 1 ...
// 2 2 2 2 ...
struct row_f : public thrust::unary_function<int, int>
{
int ncols;
row_f(int _nc) : ncols(_nc) {};
__host__ __device__
int operator()(int i){
return i/ncols;}
};
// creates column indices:
// 0 1 2 3 ...
// 0 1 2 3 ...
// 0 1 2 3 ...
struct col_f : public thrust::unary_function<int, int>
{
int ncols;
col_f(int _nc) : ncols(_nc) {};
__host__ __device__
int operator()(int i){
return i%ncols;}
};
struct map_f : public thrust::unary_function<thrust::tuple<int, int>, int>
{
int ncols;
map_f(int _nc) : ncols(_nc) {};
__host__ __device__
int operator()(thrust::tuple<int, int> t){
return thrust::get<0>(t) + ncols*thrust::get<1>(t);}
};
struct sort_f
{
template <typename T1, typename T2>
__host__ __device__
bool operator()(T1 k1, T2 k2){
// sort by row first
if (thrust::get<1>(k1) < thrust::get<1>(k2)) return true;
if (thrust::get<1>(k1) > thrust::get<1>(k2)) return false;
// then sort within the row
if (thrust::get<0>(k1) < thrust::get<0>(k2)) return true;
return false;}
};
int main(){
// example data setup
dtype keys[] = {5, 4, 1, 9, 1, 4, 3, 2, 2, 4, 2, 0};
int nrows = 3;
int ds = sizeof(keys)/sizeof(keys[0]);
int ncols = ds/nrows;
thrust::device_vector<dtype> d_keys(keys, keys+ds);
// create values to be moved which is effectively index within row, i.e. column indices
thrust::device_vector<int> d_vals(nrows*ncols);
thrust::sequence(d_vals.begin(), d_vals.end());
thrust::transform(d_vals.begin(), d_vals.end(), d_vals.begin(), col_f(ncols));
// sort
thrust::sort_by_key(thrust::make_zip_iterator(thrust::make_tuple(d_keys.begin(), thrust::make_transform_iterator(thrust::counting_iterator<int>(0), row_f(ncols)))), thrust::make_zip_iterator(thrust::make_tuple(d_keys.end(), thrust::make_transform_iterator(thrust::counting_iterator<int>(nrows*ncols), row_f(ncols)))), d_vals.begin(), sort_f());
// rearrange
thrust::device_vector<int> d_rank(nrows*ncols);
thrust::copy_n(thrust::make_transform_iterator(thrust::counting_iterator<int>(0), col_f(ncols)), nrows*ncols, thrust::make_permutation_iterator(d_rank.begin(), thrust::make_transform_iterator(thrust::make_zip_iterator(thrust::make_tuple(d_vals.begin(), thrust::make_transform_iterator(thrust::counting_iterator<int>(0), row_f(ncols)))), map_f(ncols))));
// print results
thrust::copy_n(d_rank.begin(), ncols*nrows, std::ostream_iterator<int>(std::cout, ","));
std::cout << std::endl;
}
$ nvcc -o t1633 t1633.cu
$ ./t1633
2,1,0,3,0,3,2,1,1,3,2,0,
$
Upvotes: 2