Frank
Frank

Reputation: 2706

returning multiple py::array without copying in pybind11

I am trying to build a python module in C++ using pybind11. I have the following code:

#include <pybind11/pybind11.h>
#include <pybind11/stl.h>
#include <pybind11/numpy.h>

namespace py = pybind11;

struct ContainerElement
{
    uint8_t i;
    double d;
    double d2;
};

class Container
{
private:
    std::vector<uint8_t> ints;
    std::vector<double> doubles;
    std::vector<double> doubles2;

public:

    std::vector<uint8_t>& getInts() { return ints; }
    std::vector<double>& getDoubles() { return doubles; }
    std::vector<double>& getDoubles2() { return doubles2; }

    void addElement(ContainerElement element)
    {
        ints.emplace_back(element.i);
        doubles.emplace_back(element.d);
        doubles2.emplace_back(element.d2);
    }
};

void fillContainer(Container& container)
{
    for (int i = 0; i < 1e6; ++i)
    {
        container.addElement({(uint8_t)i, (double)i,(double)i });
    }
}

PYBIND11_MODULE(containerInterface, m) {
    py::class_<Container>(m, "Container")
        .def(py::init<>())
        .def("getInts", [](Container& container)
        {
            return py::array_t<uint8_t>(
                    { container.getInts().size() },
                    { sizeof(uint8_t) },
                    container.getInts().data());
        })
        .def("getDoubles", [](Container& container)
        {
            return py::array_t<double>(
                    { container.getDoubles().size() },
                    { sizeof(double) },
                    container.getDoubles().data());
        })
        .def("getDoubles2", [](Container& container)
        {
            return py::array_t<double>(
                    { container.getDoubles2().size() },
                    { sizeof(double) },
                    container.getDoubles2().data());
        });

    m.def("fillContainer", &fillContainer);
}

When I call this code in python:

import containerInterface

container = containerInterface.Container()

containerInterface.fillContainer(container)

i = container.getInts()
d = container.getDoubles()
d2 = container.getDoubles2()

This works, however when I check the memory usage of the program (using psutil.Process(os.getpid()).memory_info().rss) it seems to make a copy when I call the functions getInts, getDoubles and getDoubles2. Is there a way to avoid this?

I have tried using np.array(container.getInts(), copy=False), but it still makes a copy. Also I tried using the py::buffer_protocol() on the Container class as mentioned here: https://pybind11.readthedocs.io/en/stable/advanced/pycpp/numpy.html . However I can only make that work for either the Ints vector or the Doubles vectors and not for all at the same time.

PYBIND11_MODULE(containerInterface, m) {
    py::class_<Container>(m, "Container", py::buffer_protocol())
        .def(py::init<>())
        .def("getInts", &Container::getInts)
        .def("getDoubles", &Container::getDoubles)
        .def_buffer([](Container& container) -> py::buffer_info {
            return py::buffer_info(
                container.getInts().data(),
                sizeof(uint8_t),
                py::format_descriptor<uint8_t>::format(),
                1,
                { container.getInts().size() },
                { sizeof(uint8_t) * container.getInts().size() }
        );
        });
m.def("fillContainer", &fillContainer);

Then I can use i = np.array(container, copy=False), without a copy being made. However as I said it only works for the Ints vector now.

Upvotes: 7

Views: 4083

Answers (3)

driedler
driedler

Reputation: 4190

This doesn't directly solve the question, but still allows for returning an array buffer without doing a copy. Inspiration was taken from this thread: https://github.com/pybind/pybind11/issues/1042

Basically, just supply a py::capsule to the py::array() constructor. With this, the py::array() constructor does not allocate a separate buffer and copy. e.g.:

// Use this if the C++ buffer should NOT be deallocated
// once Python no longer has a reference to it
py::capsule buffer_handle([](){});

// Use this if the C++ buffer SHOULD be deallocated
// once the Python no longer has a reference to it
// py::capsule buffer_handle(data_buffer, [](void* p){ free(p); });

return py::array(py::buffer_info(
        data_buffer,
        element_size,
        data_type,
        dims_length,
        dims,
        strides
), buffer_handle);

Upvotes: 2

Frank
Frank

Reputation: 2706

I have found a solution that works. Though it might not be the most elegant. I have created three new classes Ints, Doubles and Doubles2 that take the original container and expose the respective vectors by a function call getValues(). With these three classes I can specify the buffer protocol three times for all classes.

#include <pybind11/pybind11.h>
#include <pybind11/stl.h>
#include <pybind11/numpy.h>
#include <pybind11/buffer_info.h>

namespace py = pybind11;

struct ContainerElement
{
    uint8_t i;
    double d;
    double d2;
};

class Container
{
private:
    std::vector<uint8_t> ints;
    std::vector<double> doubles;
    std::vector<double> doubles2;

public:

    std::vector<uint8_t>& getInts() { return ints; }
    std::vector<double>& getDoubles() { return doubles; }
    std::vector<double>& getDoubles2() { return doubles2; }

    void addElement(ContainerElement element)
    {
        ints.emplace_back(element.i);
        doubles.emplace_back(element.d);
        doubles2.emplace_back(element.d2);
    }
};

void fillContainer(Container& container)
{
    for (int i = 0; i < 1e6; ++i)
    {
        container.addElement({ (uint8_t)i, (double)i,(double)i });
    }
}

class Ints
{
private:
    Container& cont;
public:
    Ints(Container& cont) : cont(cont) {}
    std::vector<uint8_t>& getValues() { return cont.getInts(); }
};

class Doubles
{
private:
    Container& cont;
public:
    Doubles(Container& cont) : cont(cont) {}
    std::vector<double>& getValues() { return cont.getDoubles(); }
};

class Doubles2
{
private:
    Container& cont;
public:
    Doubles2(Container& cont) : cont(cont) {}
    std::vector<double>& getValues() { return cont.getDoubles2(); }
};

PYBIND11_MODULE(newInterface, m) {
    py::class_<Container>(m, "Container")
        .def(py::init<>());

    py::class_<Ints>(m, "Ints", py::buffer_protocol())
        .def(py::init<Container&>(), py::keep_alive<1, 2>())
        .def_buffer([](Ints& ints) -> py::buffer_info {
            return py::buffer_info(
                ints.getValues().data(),
                sizeof(uint8_t),
                py::format_descriptor<uint8_t>::format(),
                ints.getValues().size()
            );
        });

    py::class_<Doubles>(m, "Doubles", py::buffer_protocol())
        .def(py::init<Container&>(), py::keep_alive<1, 2>())
        .def_buffer([](Doubles& doubles) -> py::buffer_info {
        return py::buffer_info(
            doubles.getValues().data(),
            sizeof(double),
            py::format_descriptor<double>::format(),
            doubles.getValues().size()
            );
        });

    py::class_<Doubles2>(m, "Doubles2", py::buffer_protocol())
        .def(py::init<Container&>(), py::keep_alive<1, 2>())
        .def_buffer([](Doubles2& doubles2) -> py::buffer_info {
        return py::buffer_info(
            doubles2.getValues().data(),
            sizeof(double),
            py::format_descriptor<double>::format(),
            doubles2.getValues().size()
            );
        });

    m.def("fillContainer", &fillContainer);
}

This way I can use the code in the following way in Python:

import newInterface as ci
import numpy as np

container = ci.Container()
ci.fillContainer(container)

i = np.array(ci.Ints(container), copy=False)   
d = np.array(ci.Doubles(container), copy=False)    
d2 = np.array(ci.Doubles2(container), copy=False)

Once the fillContainer has filled the container, the construction of the the numpy arrays does not copy the values from this container.

Upvotes: 3

Nick
Nick

Reputation: 27996

I'm guessing that you have to specify that the access functions return references instead of a copy, which is probably the default. I don't know how you do this with pybind but I've done this with boost::python and Ponder.

I.e. you need to specify the return policy.

Upvotes: 1

Related Questions