John Mansell
John Mansell

Reputation: 644

Thrust -- sort two vectors by key

Problem

I want to sort a matrix by row, but return the rank of each element.

Example

  Values            Rank
-------------   --------------
[5, 4, 1, 9]     [2, 1, 0, 3]
[1, 4, 3, 2] --> [0, 3, 2, 1]
[2, 4, 2, 0]     [1, 3, 2, 0]

Attempt

I've these two examples:
Ranking rows of a matrix
Sort rows of a matrix

The first shows how to use an index vector and a permutation iterator to return the index of the sorted values. The second shows how to use the "back-to-back" method to sort a matrix by row. (Sort by key 2x). But I can't figure out how to combine these two ideas.

I tried using a zip_iterator to combine values and indexes into a tuple, and then do the back to back method, but I can't do a sort-by-key on ziped tuples.

I also tried using the back-to-back sort, and then indexing the values, but then the index is just the already sorted values, so the index is always [0, 1, 2, 3] for each row of the matrix.

Code

#include <iostream>
#include <iomanip>
#include <fstream>
#include <thrust/device_vector.h>
#include <thrust/device_ptr.h>
#include <thrust/host_vector.h>
#include <thrust/sort.h>
#include <thrust/execution_policy.h>
#include <thrust/generate.h>
#include <thrust/equal.h>
#include <thrust/sequence.h>
#include <thrust/for_each.h>
#include <iostream>
#include <stdlib.h>

using namespace std;

#define NSORTS 5
#define DSIZE 4

// -------------------
//      Print
// -------------------

    template <class Vector>
    void print(std::string name, Vector toPrint)
    {
        cout << setw(13) << name << " :: ";

        int i = 0;
        for (auto x : toPrint)
        {
            i++;
            std::cout << setw(2) << x << " ";
            if (!(i%4))
                cout << "   ";
        }
        std::cout << std::endl;
    }

// ---------------------
//      Print Title
// ---------------------
    void print_title(const std::string title)
    {
        cout << "\n\n";
        cout << "-------------------\n";
        cout << "      " << title << "\n";
        cout << "-------------------\n";
    }

// ---------------------
//      My Mod
// ---------------------
    int my_mod_start = 0;
    int my_mod(){
        return (my_mod_start++)/DSIZE;
    }

// ------------------
//      Clamp
// ------------------
    struct clamp
    {
        template <typename T>
        __host__ __device__
        T operator()(T data){
            if (data <= 0) return 0;
            return 1;}
    };


int main()
{
    // Initialize
        thrust::host_vector<int> h_data(DSIZE * NSORTS);
        thrust::generate(h_data.begin(), h_data.end(), rand);
        thrust::transform(h_data.begin(), h_data.end(), h_data.begin(), thrust::placeholders::_1 % 10);
        int size = DSIZE * NSORTS;

    // Device Vectors
        thrust::device_vector<int> d_data = h_data;
        thrust::device_vector<int> d_idx(size);
        thrust::device_vector<int> d_result(size);

        thrust::sequence(d_idx.begin(), d_idx.end());

    // Segments
        thrust::host_vector<int> h_segments(size);
        thrust::generate(h_segments.begin(), h_segments.end(), my_mod);
        thrust::device_vector<int> d_segments = h_segments;

            print_title("Generate");

            print("Data", d_data);
            print("Index", d_idx);
            print("Segments", d_segments);

    // Sort 1
        thrust::stable_sort_by_key(d_data.begin(), d_data.end(), d_segments.begin());

            print_title("Sort 1");

            print("Data", d_data);
            print("Index", d_idx);
            print("Segments", d_segments);

    // Sort 2
        thrust::stable_sort_by_key(d_segments.begin(), d_segments.end(), d_data.begin());

            print_title("Sort 2");

            print("Data", d_data);
            print("Index", d_idx);
            print("Segments", d_segments);

    // Adjacent Difference
        thrust::device_vector<int> d_diff(size);
        thrust::adjacent_difference(d_data.begin(), d_data.end(), d_diff.begin());
        d_diff[0] = 0;

            print_title("Adj Diff");

            print("Data", d_data);
            print("Index", d_idx);
            print("Segments", d_segments);
            print("Difference", d_diff);

    // Transform
        thrust::transform(d_diff.begin(), d_diff.end(), d_diff.begin(), clamp());

            print_title("Transform");

            print("Data", d_data);
            print("Index", d_idx);
            print("Segments", d_segments);
            print("Difference", d_diff);

    // Inclusive Scan
        thrust::inclusive_scan_by_key(d_segments.begin(), d_segments.end(), d_diff.begin(), d_diff.begin());

            print_title("Inclusive");

            print("Data", d_data);
            print("Index", d_idx);
            print("Segments", d_segments);
            print("Difference", d_diff);

    // Results
        thrust::copy(d_diff.begin(), d_diff.end(), thrust::make_permutation_iterator(d_result.begin(), d_idx.begin()));

            print_title("Results");

            print("Data", d_data);
            print("Index", d_idx);
            print("Segments", d_segments);
            print("Difference", d_diff);
            print("Results", d_result);

}

Edit -- example rank matrix was wrong

Upvotes: 0

Views: 700

Answers (1)

Robert Crovella
Robert Crovella

Reputation: 152164

This can be done with a single sort_by_key operation, followed by a "rearrangement" of the sorted values.

The "keys" (things we sort on) will be a zip_iterator combining your actual values to be sorted, along with a row indicator. The sort functor will be arranged to sort first by row, then by value within the row. The "values" (things that get moved along with the keys) will be a column index in each row.

These "values" after sorting can be rearranged to be our indicator of "rank" within a row.

We can use a numerical example following the 3rd row of your matrix:

before sort:

keys:     [2, 4, 2, 0]
values:   [0, 1, 2, 3]

after sort:

keys:     [0, 2, 2, 4]
values:   [3, 0, 2, 1]

before rearranging:

destination map:      [3, 0, 2, 1]
values:               [0, 1, 2, 3]

after rearranging:

destination:          [1, 3, 2, 0]

(as it happens, for the first two rows in your example, there is no change between the sorted values and the destination values)

Here is a worked example:

$ cat t1633.cu
#include <iostream>
#include <thrust/sort.h>
#include <thrust/device_vector.h>
#include <thrust/iterator/zip_iterator.h>
#include <thrust/iterator/transform_iterator.h>
#include <thrust/iterator/counting_iterator.h>
#include <thrust/iterator/permutation_iterator.h>
#include <thrust/copy.h>


typedef int dtype;

// creates row indices:
// 0 0 0 0 ...
// 1 1 1 1 ...
// 2 2 2 2 ...
struct row_f : public thrust::unary_function<int, int>
{
  int ncols;
  row_f(int _nc) : ncols(_nc) {};
  __host__ __device__
  int operator()(int i){
    return i/ncols;}
};

// creates column indices:
// 0 1 2 3 ...
// 0 1 2 3 ...
// 0 1 2 3 ...
struct col_f : public thrust::unary_function<int, int>
{
  int ncols;
  col_f(int _nc) : ncols(_nc) {};
  __host__ __device__
  int operator()(int i){
    return i%ncols;}
};

struct map_f : public thrust::unary_function<thrust::tuple<int, int>, int>
{
  int ncols;
  map_f(int _nc) : ncols(_nc) {};
  __host__ __device__
  int operator()(thrust::tuple<int, int> t){
    return thrust::get<0>(t) + ncols*thrust::get<1>(t);}
};

struct sort_f
{
  template <typename T1, typename T2>
  __host__ __device__
  bool operator()(T1 k1, T2 k2){
// sort by row first
    if (thrust::get<1>(k1) < thrust::get<1>(k2)) return true;
    if (thrust::get<1>(k1) > thrust::get<1>(k2)) return false;
// then sort within the row
    if (thrust::get<0>(k1) < thrust::get<0>(k2)) return true;
    return false;}
};

int main(){

// example data setup
  dtype keys[] = {5, 4, 1, 9, 1, 4, 3, 2, 2, 4, 2, 0};
  int nrows = 3;
  int ds = sizeof(keys)/sizeof(keys[0]);
  int ncols = ds/nrows;

  thrust::device_vector<dtype> d_keys(keys, keys+ds);
// create values to be moved which is effectively index within row, i.e. column indices
  thrust::device_vector<int> d_vals(nrows*ncols);
  thrust::sequence(d_vals.begin(), d_vals.end());
  thrust::transform(d_vals.begin(), d_vals.end(), d_vals.begin(), col_f(ncols));
// sort
  thrust::sort_by_key(thrust::make_zip_iterator(thrust::make_tuple(d_keys.begin(), thrust::make_transform_iterator(thrust::counting_iterator<int>(0), row_f(ncols)))), thrust::make_zip_iterator(thrust::make_tuple(d_keys.end(), thrust::make_transform_iterator(thrust::counting_iterator<int>(nrows*ncols), row_f(ncols)))), d_vals.begin(), sort_f());
// rearrange
  thrust::device_vector<int> d_rank(nrows*ncols);
  thrust::copy_n(thrust::make_transform_iterator(thrust::counting_iterator<int>(0), col_f(ncols)), nrows*ncols, thrust::make_permutation_iterator(d_rank.begin(), thrust::make_transform_iterator(thrust::make_zip_iterator(thrust::make_tuple(d_vals.begin(), thrust::make_transform_iterator(thrust::counting_iterator<int>(0), row_f(ncols)))), map_f(ncols))));
// print results
  thrust::copy_n(d_rank.begin(), ncols*nrows, std::ostream_iterator<int>(std::cout, ","));
  std::cout << std::endl;
}
$ nvcc -o t1633 t1633.cu
$ ./t1633
2,1,0,3,0,3,2,1,1,3,2,0,
$

Upvotes: 2

Related Questions