Buddy
Buddy

Reputation: 84

C++: OpenMP parallel loop memory leaks

Good day everyone, I have encountered quite a severe problem with memory leaks using OpenMP in C++ code. I am writing a library for som geophysical computations and they are quite time-consuming. I created a simple piece of code to give you an idea (the original code is quite long, hope not necessary for the solution). To avoid rewriting the "same" lines of code over and over I have some templates and they are specified by using pointers to functions of methods (it's like I know the position in the space and need to compute different quantities). I am also using "armadillo" (http://arma.sourceforge.net/) library for some of the computation but the problem remains with or without it.

If the code is run with only single thread it's without any troubles. But using OpenMP (#pragma directives) is causing memory leaks over time. The program effectively consumes all the available memory and then crashes. You can reproduce the program with the code I provided (just change the "iteration size" from 5000 to something bigger )

I have tried to replace "armadillo vectors" with my own but it seems to not cause the problem. I think that armadillo is not a problem. I ran the valgrid memcheck but not sure what really happened (two types of "errors"):

149,520,000 bytes in 3,738 blocks are definitely lost in loss record 24 of 25
  in Openmp_class::generate_data(unsigned long) in $HOME/Programovanie/Ptr2memOpenMP/openmp_class.cpp:20
  1: operator new[](unsigned long) in /builddir/build/BUILD/valgrind-3.15.0/coregrind/m_replacemalloc/vg_replace_malloc.c:433
  2: Openmp_class::generate_data(unsigned long) in $HOME/Programovanie/Ptr2memOpenMP/openmp_class.cpp:20
  3: main._omp_fn.0 in $HOME/Programovanie/Ptr2memOpenMP/main.cpp:44
  4: /usr/lib64/libgomp.so.1.0.0
  5: start_thread in /usr/lib64/libpthread-2.29.so
  6: clone in /usr/lib64/libc-2.29.so

49,840,000 bytes in 1,246 blocks are definitely lost in loss record 22 of 25
  in Openmp_class::multiply_elements() in $HOME/Programovanie/Ptr2memOpenMP/openmp_class.cpp:90
  1: operator new[](unsigned long) in /builddir/build/BUILD/valgrind-3.15.0/coregrind/m_replacemalloc/vg_replace_malloc.c:433
  2: Openmp_class::multiply_elements() in $HOME/Programovanie/Ptr2memOpenMP/openmp_class.cpp:90
  3: main._omp_fn.0 in $HOME/Programovanie/Ptr2memOpenMP/main.cpp:45
  4: GOMP_parallel in /usr/lib64/libgomp.so.1.0.0
  5: main in $HOME/Programovanie/Ptr2memOpenMP/main.cpp:14

Header file: openmp_class.h

#ifndef OPENMP_CLASS_H
#define OPENMP_CLASS_H
#include <iomanip>
#include <iostream>
#include <cmath>
#include <armadillo>

using namespace std;

class Openmp_class
{
    double* xvec;
    double* yvec;

    size_t size;
public:
    Openmp_class();
    ~Openmp_class();

    void generate_data( size_t n );

    double add_element( size_t n );
    double substract_element( size_t n );

    arma::vec add_elements( size_t upto_n );
    arma::vec multiply_elements( size_t upto_n );

    double *multiply_elements();
};

#endif // OPENMP_CLASS_H

CPP file openmp_class.cpp

#include "openmp_class.h"

Openmp_class::Openmp_class()
{

}

Openmp_class::~Openmp_class()
{
    this->xvec = nullptr;
    this->yvec = nullptr;

    delete [] this->xvec;
    delete [] this->yvec;
}

void Openmp_class::generate_data(size_t n)
{
    this->xvec = new double[n];
    this->yvec = new double[n];

    this->size = n;

    arma::vec xrand = arma::randu<arma::vec>(n);
    arma::vec yrand = arma::randu<arma::vec>(n);

    for (unsigned int i = 0; i < xrand.n_elem; i++) {
        this->xvec[i] = xrand(i);
        this->yvec[i] = yrand(i);
    }

    xrand.reset();
    yrand.reset();
}

double Openmp_class::add_element(size_t n)
{
    if ( n < this->size ) {
        return  this->xvec[n] + this->yvec[n];
    } else {
        string errmsg = "Openmp_class::add_element index n out of bounds!";
        throw runtime_error( errmsg );
    }
}

double Openmp_class::substract_element(size_t n)
{
    if ( n < this->size ) {
        return  this->xvec[n] - this->yvec[n];
    } else {
        string errmsg = "Openmp_class::substract_element index n out of bounds!";
        throw runtime_error( errmsg );
    }
}

arma::vec Openmp_class::add_elements(size_t upto_n)
{
    if ( upto_n < this->size ) {
        arma::vec results = arma::zeros<arma::vec>( upto_n );

        for (unsigned int i = 0; i < upto_n; i++ ) {
            results(i) = this->xvec[i] + this->yvec[i];
        }

        return results;
    } else {
        string errmsg = "Openmp_class::add_elements index n out of bounds!";
        throw runtime_error( errmsg );
    }
}

arma::vec Openmp_class::multiply_elements(size_t upto_n)
{
    if ( upto_n < this->size ) {
        arma::vec results = arma::zeros<arma::vec>( upto_n );

        for (unsigned int i = 0; i < upto_n; i++ ) {
            results(i) = this->xvec[i] * this->yvec[i];
        }

        return  results;
    } else {
        string errmsg = "Openmp_class::add_elements index n out of bounds!";
        throw runtime_error( errmsg );
    }
}

double *Openmp_class::multiply_elements()
{
    double *xy = new double[this->size ];

    for (unsigned int i = 0; i < this->size; i++) {
        xy[i] = this->xvec[i] * this->yvec[i];
    }

    return xy;
}

main file main.cpp

#include <iostream>
#include <iomanip>
#include <cmath>

#define ARMA_USE_HDF5
#include <armadillo>
#include "openmp_class.h"
using namespace std;

//#define ARMA_OPEN_MP
int main(/*int argc, char *argv[]*/ void)
{
    Openmp_class Myclass;
    Myclass.generate_data( 10 );

    #ifdef ARMA_OPEN_MP
    {
        #pragma omp parallel
        {

        #pragma omp for
        for (unsigned int j = 10; j <= 500000; j++) {
            arma::vec (Openmp_class::*ptrmem) (size_t) = &Openmp_class::multiply_elements;

            Openmp_class TestClass;

            TestClass.generate_data( 5000 );
            arma::vec x_vec = (TestClass.*ptrmem)(4999);
            ptrmem = nullptr;
        }
        #pragma omp barrier
        }
    }
    #else
    {
        #pragma omp parallel
        {

        #pragma omp for
        for (unsigned int j = 10; j <= 500000; j++) {
            double* (Openmp_class::*ptre2mltply)() = &Openmp_class::multiply_elements;
            Openmp_class TestClass;

            TestClass.generate_data( 5000 );
            double* x_vec = (TestClass.*ptre2mltply)();

            x_vec = nullptr;
            delete [] x_vec;
            ptre2mltply = nullptr;
        }
        #pragma omp barrier
        }
    }
    #endif

    return 1;
}

Has anyone dealt with the problem already? Any suggestions?

Thank you for your time.

P.S. How exactly is pointer to function (or class member) shared between multiple threads?

Upvotes: 2

Views: 1042

Answers (2)

jpmarinier
jpmarinier

Reputation: 4733

I would suggest that you manage your own vectors just as you seem to manage your Armadillo objects, that is by letting the C++ runtime system take care of memory allocation/deallocation.

In your situation, this is made easy by the fact that most accesses are done using a syntax like x_vec[index]. This is perfectly compatible with std::vector objects. So you can get rid of raw pointers and just use std::vector objects instead. Then you no longer need to delete anything manually.

In your second multiply_elements() function, thanks to the existence of the STL-provided std::vector move constructor, you can efficiently return an std::vector object by value.

I took the liberty to modify your code accordingly, and the resulting program seems to keep Valgrind happy.

Header file: openmp_class.h

#ifndef OPENMP_CLASS_H
#define OPENMP_CLASS_H

#include <iomanip>
#include <iostream>
#include <cmath>
#include <armadillo>
#include <vector>

using namespace std;


class Openmp_class
{
private:
    // CHANGE:
    std::vector<double>  xvec;
    std::vector<double>  yvec;

    size_t size;

public:
    Openmp_class();
    ~Openmp_class();

    void generate_data( size_t n );

    double add_element( size_t n );
    double substract_element( size_t n );

    arma::vec add_elements( size_t upto_n );
    arma::vec multiply_elements( size_t upto_n );

    std::vector<double> multiply_elements();
};

#endif // OPENMP_CLASS_H

CPP file openmp_class.cpp:

#include "openmp_class.h"


Openmp_class::Openmp_class()
{

}

Openmp_class::~Openmp_class()
{
    // vector components automatically deleted
}


void Openmp_class::generate_data(size_t n)
{
    // CHANGE:
    this->xvec.resize(n);
    this->yvec.resize(n);

    this->size = n;

    arma::vec xrand = arma::randu<arma::vec>(n);
    arma::vec yrand = arma::randu<arma::vec>(n);

    for (unsigned int i = 0; i < xrand.n_elem; i++) {
        this->xvec[i] = xrand(i);
        this->yvec[i] = yrand(i);
    }

    xrand.reset();
    yrand.reset();
}

double Openmp_class::add_element(size_t n)
{
    if ( n < this->size ) {
        return  this->xvec[n] + this->yvec[n];
    } else {
        string errmsg = "Openmp_class::add_element index n out of bounds!";
        throw runtime_error( errmsg );
    }
}


double Openmp_class::substract_element(size_t n)
{
    if ( n < this->size ) {
        return  this->xvec[n] - this->yvec[n];
    } else {
        string errmsg = "Openmp_class::substract_element index n out of bounds!";
        throw runtime_error( errmsg );
    }
}

arma::vec Openmp_class::add_elements(size_t upto_n)
{
    if ( upto_n < this->size ) {
        arma::vec results = arma::zeros<arma::vec>( upto_n );

        for (unsigned int i = 0; i < upto_n; i++ ) {
            results(i) = this->xvec[i] + this->yvec[i];
        }

        return results;
    } else {
        string errmsg = "Openmp_class::add_elements index n out of bounds!";
        throw runtime_error( errmsg );
    }
}


arma::vec Openmp_class::multiply_elements(size_t upto_n)
{
    if ( upto_n < this->size ) {
        arma::vec results = arma::zeros<arma::vec>( upto_n );

        for (unsigned int i = 0; i < upto_n; i++ ) {
            results(i) = this->xvec[i] * this->yvec[i];
        }

        return  results;
    } else {
        string errmsg = "Openmp_class::add_elements index n out of bounds!";
        throw runtime_error( errmsg );
    }
}


std::vector<double> Openmp_class::multiply_elements()
{
    // CHANGE:
    std::vector<double>  xy(this->size);

    for (unsigned int i = 0; i < this->size; i++) {
        xy[i] = this->xvec[i] * this->yvec[i];
    }

    return xy;
}

main file main.cpp:

#include <iostream>
#include <iomanip>
#include <cmath>

#define ARMA_USE_HDF5
#include <armadillo>
#include "openmp_class.h"
using namespace std;


//#define ARMA_OPEN_MP
int main(/*int argc, char *argv[]*/ void)
{
    Openmp_class Myclass;
    Myclass.generate_data( 10 );

    #ifdef ARMA_OPEN_MP
    {
        #pragma omp parallel
        {

        #pragma omp for
        for (unsigned int j = 10; j <= 500000; j++) {

            arma::vec (Openmp_class::*ptrmem) (size_t) = &Openmp_class::multiply_elements;

            Openmp_class  testObj;

            testObj.generate_data( 5000 );
            arma::vec x_vec = (testObj.*ptrmem)(4999);
            ptrmem = nullptr;
        }
        #pragma omp barrier
        }
    }
    #else
    {
        #pragma omp parallel
        {

        #pragma omp for
        for (unsigned int j = 10; j <= 500000; j++) {

            std::vector<double> (Openmp_class::*ptre2mltply)() = &Openmp_class::multiply_elements;
            Openmp_class testObj;

            testObj.generate_data( 5000 );
            std::vector<double> x_vec = (testObj.*ptre2mltply)();

            ptre2mltply = nullptr;
        }
        #pragma omp barrier
        }
    }
    #endif

    return 1;
}

P.S. How exactly is pointer to function (or class member) shared between multiple threads?

A pointer to class member is physically a memory offset, hence an integer constant. So each thread can have its own (cheap) copy.

Upvotes: 0

Oblivion
Oblivion

Reputation: 7374

You should not assign nullptr to your pointers before deleting them.

In your dtor you assign nullptr to your members then you free them.

this->xvec = nullptr;
this->yvec = nullptr;

delete [] this->xvec;
delete [] this->yvec;

and also in the main function:

x_vec = nullptr;
delete [] x_vec;
ptre2mltply = nullptr;

just delete those assignments from your code.

Upvotes: 4

Related Questions