Reputation: 2722
I'm trying to use the QR linear system solver in cuSOLVER, this
#include <cusparse_v2.h>
#include <stdio.h>
#include <cuda.h>
#include <cuda_runtime.h>
#include "device_launch_parameters.h"
#include <iostream>
#include <cassert>
#include <cublas_v2.h>
#include <cusolverDn.h>
#include <cusolverSp.h>
#include <thrust/host_vector.h>
#include <thrust/device_vector.h>
using namespace thrust;
#define CUSPARSE_CHECK(x) {cusparseStatus_t _c=x; if (_c != CUSPARSE_STATUS_SUCCESS) {printf("cusparse fail: %d, line: %d\n", (int)_c, __LINE__); exit(-1);}}
void init_handlers_and_matr_descriptor(cusolverSpHandle_t& cusolverH,cusparseHandle_t& cusparseH,cusparseMatDescr_t& descrA) {
assert(CUSOLVER_STATUS_SUCCESS == cusolverSpCreate(&cusolverH));
assert(CUSPARSE_STATUS_SUCCESS == cusparseCreate(&cusparseH));
assert(CUSPARSE_STATUS_SUCCESS == cusparseCreateMatDescr(&descrA)); //CUSPARSE_INDEX_ZERO,CUSPARSE_MATRIX_TYPE_GENERAL
assert(CUSPARSE_STATUS_SUCCESS == cusparseSetMatIndexBase(descrA,CUSPARSE_INDEX_BASE_ZERO));
assert(CUSPARSE_STATUS_SUCCESS == cusparseSetMatType(descrA, CUSPARSE_MATRIX_TYPE_GENERAL));
}
int sparse_solver_test() {
//Init csr format A and b for solving Ax = b
/*
A =
[ 1.0 2.0 0.0 0.0 0.0 0.0 ]
[ 3.0 4.0 5.0 0.0 0.0 0.0 ]
[ 0.0 6.0 7.0 8.0 0.0 0.0 ]
[ 0.0 0.0 9.0 10.0 11.0 0.0 ]
[ 0.0 0.0 0.0 12.0 13.0 14.0 ]
[ 0.0 0.0 0.0 0.0 15.0 16.0 ]
b = [0.0,2.0,4.0,6.0,8.0,10.0]^T
*/
int nnz_A = 16, m = 6;
cusolverSpHandle_t cusolverH = NULL;
cusparseHandle_t cusparseH = NULL; //cuBLAS or cuSPARSE?
cusolverStatus_t cusolver_status = CUSOLVER_STATUS_SUCCESS;
cusparseMatDescr_t descrA;
cudaError_t cudaStat;
init_handlers_and_matr_descriptor(cusolverH, cusparseH, descrA);
host_vector<double> h_csrValA(nnz_A), h_dataB(m), h_dataX(m);
host_vector<int> h_csrRowPtrA(m + 1), h_csrColIndA(nnz_A);
device_vector<double> d_csrValA(nnz_A), d_dataB(m), d_dataX(m);
device_vector<int> d_csrRowPtrA(m + 1), d_csrColIndA(nnz_A);
//Init matrix
for (auto i = 0; i < nnz_A; ++i) h_csrValA[i] = static_cast<double>(i + 1);
h_csrRowPtrA[0] = 0;
h_csrRowPtrA[1] = 2;
for (auto i = 2; i < m; ++i) h_csrRowPtrA[i] = h_csrRowPtrA[i - 1] + 3;
h_csrRowPtrA[m] = nnz_A;
h_csrColIndA[0] = 0;
int v[] = {0, 1, 0, 1, 2, 1, 2, 3, 2, 3, 4, 3, 4, 5, 4, 5 };
for (auto i = 0; i < nnz_A; ++i) h_csrColIndA[i] = v[i];
for (auto i = 0; i < m; ++i) h_dataB[i] = static_cast<double>(2 * i);
//device memory and descriptor A init
d_csrValA = h_csrValA;
d_csrRowPtrA = h_csrRowPtrA;
d_csrColIndA = h_csrColIndA;
d_dataB = h_dataB;
//step4, solve the linear system?
int singularity;
cusolver_status = cusolverSpDcsrlsvqr(
cusolverH, m, nnz_A, descrA,
d_csrValA.data().get(), d_csrRowPtrA.data().get(), d_csrColIndA.data().get(), d_dataB.data().get(),
0.0, 0, d_dataX.data().get(), &singularity);
std::cout << "singularity = " << singularity << std::endl;
assert(CUSOLVER_STATUS_SUCCESS == cusolver_status);
h_dataX = d_dataX;
std::cout << "x = (";
for (auto i = 0; i < m; ++i) {
std::cout << h_dataX[i];
if (i < m - 1) std::cout << ", ";
}
std::cout << std::endl;
if (cusparseH)
cusparseDestroy(cusparseH);
if (cusolverH)
cusolverSpDestroy(cusolverH);
if (descrA)
cusparseDestroyMatDescr(descrA);
cudaDeviceReset();
return 0;
}
int main(int argc, char** argv) {
sparse_solver_test();
return 0;
}
Not sure if the setup of my function is wrong, can anyone help?
Update I simplified the code a bit using the thrust library, still the error is the same though, but at least I got rid of all the malloc etc...
Update Corrected the csrIndColA
(changed the code accordingly) array as suggested. Now the solver works (i.e. I'm not getting anymore the error I was getting earlier), still though I'm getting 0 as result.
Update With all the changes I've done I also forgot to initialize h_dataB
, together with the indices in csrIndColA
that solved the problem, the full code is above for future reference.
Upvotes: 0
Views: 462
Reputation: 7245
The csrColIndA
array in your example is too short, so that cuSOLVER tries to read past the end of it.
According to the cuSOLVER documentation and common convention, the column index array has the same length as the array of nonzero matrix entries, and stores the column index of each nonzero element (and not just the first nonzero element in each column as in your example, which would limit the format to sparsity patterns where all nonzero elements are vertically consecutive).
So your example output should have
csrColIndA = {0, 1, 0, 1, 2, 1, 2, 3, 2, 3, 4, 3, 4, 5, 4, 5}
Upvotes: 1