Reputation: 71
I have a small script for manipulating a sparse matrix in C++. It works perfectly fine except taking too much time. Since I'm doing this manipulation over and over, it is critical to speed it up. I appreciate any idea.Thanks
#include <stdio.h> /* printf, scanf, puts, NULL */
#include <stdlib.h> /* srand, rand */
#include <time.h> /* time */
#include <iostream> /* cout, fixed, scientific */
#include <string>
#include <cmath>
#include <vector>
#include <list>
#include <string>
#include <sstream> /* SJW 08/09/2010 */
#include <fstream>
#include <Eigen/Dense>
#include <Eigen/Sparse>
using namespace Eigen;
using namespace std;
SparseMatrix<double> MatMaker (int n1, int n2, double prob)
{
MatrixXd A = (MatrixXd::Random(n1, n2) + MatrixXd::Ones(n1, n2))/2;
A = (A.array() > prob).select(0, A);
return A.sparseView();
}
////////////////This needs to be optimized/////////////////////
int SD_func(SparseMatrix<double> &W, VectorXd &STvec, SparseMatrix<double> &Wo, int tauR, int tauD)
{
W = W + 1/tauR*(Wo - W);
for (int k = 0; k < W.outerSize(); ++k)
for (SparseMatrix<double>::InnerIterator it(W, k); it; ++it)
W.coeffRef(it.row(),it.col()) = it.value() * (1-STvec(it.col())/tauD);
return 1;
}
int main ()
{
SparseMatrix<double> Wo = MatMaker(5000, 5000, 0.1);
SparseMatrix<double> W = MatMaker(5000, 5000, 0.1);
VectorXd STvec = VectorXd::Random(5000);
clock_t tsd1,tsd2;
float Timesd = 0.0;
tsd1 = clock();
///////////////////////////////// Any way to speed up this function???????
SD_func(W, STvec, Wo, 8000, 50);
//////////////////////////////// ??????????
tsd2 = clock();
Timesd += (tsd2 - tsd1);
cout<<"SD time: " << Timesd / CLOCKS_PER_SEC << " s" << endl;
return 0;
}
Upvotes: 1
Views: 564
Reputation: 10596
The most critical performance improvement (IMO) you can make is to not use W.coeffRef(it.row(),it.col())
. It performs a binary search in W
for the element each time. As you are already using SparseMatrix<double>::InnerIterator it(W, k);
it is very simple to change your function to skip the binary search:
int SD_func_2(SparseMatrix<double> &W, VectorXd &STvec, SparseMatrix<double> &Wo, int tauR, int tauD)
{
W = W + 1/tauR*(Wo - W);
double tauDInv = 1./tauD;
for (int k = 0; k < W.outerSize(); ++k)
for (SparseMatrix<double>::InnerIterator it(W, k); it; ++it)
it.valueRef() *= (1-STvec(it.col())*tauDInv);
return 1;
}
This results in a roughly x3 speedup. Note that I've incorporated @dshin's comment that multiplying is faster than division, however the performance improvement is about 90% removing the binary search, 10% multiplication vs. division.
Upvotes: 3