Rami Hassan
Rami Hassan

Reputation: 155

C_Python not releasing buffer memory

I'm writing C code for python (Python C API), and I noticed that python is not releasing the memory of the file, I'm wondering if the issue is in my code.

I want to simplify as much as passable, but I hope that no details will be missing.

The file is a binary file with buffers, first 4 bytes is the buffer size, then the buffer.

The binary file (big_file.comp):

du ~/Desktop/TEST_FILES/big_file.comp
4175416 ~/Desktop/TEST_FILES/big_file.comp

The python code (test.py):

#!/usr/bin/env python3

from struct import unpack_from
from psutil import Process
from os import getpid
import decomplib


def file_handler(file_name):
    with open(file_name, 'rb') as reader:
        while True:
            next_4_bytes = reader.read(4)
            if next_4_bytes == b'':
                break
            next_size, *_ = unpack_from("I", next_4_bytes)
            buffer = reader.read(next_size)
            yield buffer, next_size


def main():
    args = _parse_args()
    decompress = decomplib.Decompress()
    for buf, buf_size in file_handler(args.file):
        for msg in decompress.decompress_buffer(buf, buf_size):
            print(msg)


if __name__ == "__main__":
    pid = getpid()
    ps = Process(pid)
    main()
    print(ps.memory_info())

Some of the C code simplified:

#include <Python.h>
#include "structmember.h"

typedef struct {
    PyObject_HEAD
    uint32_t arr_size;
} DecompressObject;


static int Decompress_init(DecompressObject *self, PyObject *args, PyObject *kwds){
    return 0;
}

static PyObject* Decompress_handle_buffer(DecompressObject* self, PyObject* args){
    uint32_t buf_size = 0;
    uint8_t *buf = NULL;

    // get buffer and buffer length from python function
    if(!PyArg_ParseTuple(args, "y*i", &buf, &buf_size)){
        PyErr_SetString(PyExc_Exception, "Failed to parse function arguments");
        return NULL;
    }

    self->arr_size = 10;
    Py_XINCREF(self);
    return (PyObject *) self;
}

static PyObject* Decompress_next(DecompressObject *self, PyObject *Py_UNUSED(ignored)){
    static uint32_t seq_index = 0;
    if (seq_index < self->arr_size) {
        seq_index++;
        Py_RETURN_NONE;
    }
    seq_index = 0;
    return NULL;
}

static void Decompress_dealloc(DecompressObject *self){
    Py_TYPE(self)->tp_free((PyObject *) self);
}


static PyMethodDef Decompress_methods[] = {
    {"decompress_buffer", (PyCFunction) Decompress_handle_buffer, METH_VARARGS, "Decompress a buffer to asc data."},
    {NULL}  /* Sentinel */
};

static PyTypeObject DecompressType = {
    PyVarObject_HEAD_INIT(NULL, 0)
    .tp_name = "decomplib.Decompress",
    .tp_doc = "Decompress object",
    .tp_basicsize = sizeof(DecompressObject),
    .tp_itemsize = 0,
    .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,
    .tp_alloc = PyType_GenericAlloc,
    .tp_new = PyType_GenericNew,
    .tp_iter = PyObject_SelfIter,
    .tp_init = (initproc) Decompress_init,
    .tp_dealloc = (destructor) Decompress_dealloc,
    .tp_iternext = (iternextfunc) Decompress_next,
    .tp_methods = Decompress_methods,
};

static PyModuleDef Decompressmodule = {
    PyModuleDef_HEAD_INIT,
    .m_name = "decomplib",
    .m_doc = "Decompress an compressed file.",
    .m_size = -1,
};


PyMODINIT_FUNC PyInit_decomplib(void){
    PyObject *d;
    if (PyType_Ready(&DecompressType) < 0)
        return NULL;

    d = PyModule_Create(&Decompressmodule);
    if (d == NULL)
        return NULL;

    Py_INCREF(&DecompressType);
    if (PyModule_AddObject(d, "Decompress", (PyObject *) &DecompressType) < 0) {
        Py_DECREF(&DecompressType);
        Py_DECREF(d);
        return NULL;
    }

    return d;
}

As a result, I got the following output:

./test.py -f ~/Desktop/TEST_CAN_OPT/big_fie.comp
None
None
None
...
None
None
None
pmem(rss=4349915136, vms=4412583936, shared=6270976, text=2867200, lib=0, data=4344135680, dirty=0)

While playing around I noticed that if I change in the C function Decompress_handle_buffer the call to the function PyArg_ParseTuple the second argument from "y*i" to "Si", Python do cleanup the memory...

./test.py -f ~/Desktop/TEST_CAN_OPT/big_fie.comp
None
None
None
...
None
None
None
pmem(rss=22577152, vms=84869120, shared=6361088, text=2867200, lib=0, data=16420864, dirty=0)

However, The buffer is NOT correctly read.
Any ideas?!

Extra Info:

  • I'm using a virtual machine (VMware Workstation 15)
  • OS Ubuntu 18.4
  • Python 3.6.9

    Upvotes: 1

    Views: 144

  • Answers (1)

    user2357112
    user2357112

    Reputation: 280485

    y* does not correspond to uint8_t like you're using it. As stated in the documentation, it fills a Py_buffer struct that you're supposed to provide.

    You need to actually provide a Py_buffer, and when you're done with it, you need to release the buffer with PyBuffer_Release.

    Upvotes: 1

    Related Questions