Reputation: 155
I'm writing C code for python (Python C API), and I noticed that python is not releasing the memory of the file, I'm wondering if the issue is in my code.
I want to simplify as much as passable, but I hope that no details will be missing.
The file is a binary file with buffers, first 4 bytes is the buffer size, then the buffer.
The binary file (big_file.comp):
du ~/Desktop/TEST_FILES/big_file.comp
4175416 ~/Desktop/TEST_FILES/big_file.comp
The python code (test.py):
#!/usr/bin/env python3
from struct import unpack_from
from psutil import Process
from os import getpid
import decomplib
def file_handler(file_name):
with open(file_name, 'rb') as reader:
while True:
next_4_bytes = reader.read(4)
if next_4_bytes == b'':
break
next_size, *_ = unpack_from("I", next_4_bytes)
buffer = reader.read(next_size)
yield buffer, next_size
def main():
args = _parse_args()
decompress = decomplib.Decompress()
for buf, buf_size in file_handler(args.file):
for msg in decompress.decompress_buffer(buf, buf_size):
print(msg)
if __name__ == "__main__":
pid = getpid()
ps = Process(pid)
main()
print(ps.memory_info())
Some of the C code simplified:
#include <Python.h>
#include "structmember.h"
typedef struct {
PyObject_HEAD
uint32_t arr_size;
} DecompressObject;
static int Decompress_init(DecompressObject *self, PyObject *args, PyObject *kwds){
return 0;
}
static PyObject* Decompress_handle_buffer(DecompressObject* self, PyObject* args){
uint32_t buf_size = 0;
uint8_t *buf = NULL;
// get buffer and buffer length from python function
if(!PyArg_ParseTuple(args, "y*i", &buf, &buf_size)){
PyErr_SetString(PyExc_Exception, "Failed to parse function arguments");
return NULL;
}
self->arr_size = 10;
Py_XINCREF(self);
return (PyObject *) self;
}
static PyObject* Decompress_next(DecompressObject *self, PyObject *Py_UNUSED(ignored)){
static uint32_t seq_index = 0;
if (seq_index < self->arr_size) {
seq_index++;
Py_RETURN_NONE;
}
seq_index = 0;
return NULL;
}
static void Decompress_dealloc(DecompressObject *self){
Py_TYPE(self)->tp_free((PyObject *) self);
}
static PyMethodDef Decompress_methods[] = {
{"decompress_buffer", (PyCFunction) Decompress_handle_buffer, METH_VARARGS, "Decompress a buffer to asc data."},
{NULL} /* Sentinel */
};
static PyTypeObject DecompressType = {
PyVarObject_HEAD_INIT(NULL, 0)
.tp_name = "decomplib.Decompress",
.tp_doc = "Decompress object",
.tp_basicsize = sizeof(DecompressObject),
.tp_itemsize = 0,
.tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,
.tp_alloc = PyType_GenericAlloc,
.tp_new = PyType_GenericNew,
.tp_iter = PyObject_SelfIter,
.tp_init = (initproc) Decompress_init,
.tp_dealloc = (destructor) Decompress_dealloc,
.tp_iternext = (iternextfunc) Decompress_next,
.tp_methods = Decompress_methods,
};
static PyModuleDef Decompressmodule = {
PyModuleDef_HEAD_INIT,
.m_name = "decomplib",
.m_doc = "Decompress an compressed file.",
.m_size = -1,
};
PyMODINIT_FUNC PyInit_decomplib(void){
PyObject *d;
if (PyType_Ready(&DecompressType) < 0)
return NULL;
d = PyModule_Create(&Decompressmodule);
if (d == NULL)
return NULL;
Py_INCREF(&DecompressType);
if (PyModule_AddObject(d, "Decompress", (PyObject *) &DecompressType) < 0) {
Py_DECREF(&DecompressType);
Py_DECREF(d);
return NULL;
}
return d;
}
As a result, I got the following output:
./test.py -f ~/Desktop/TEST_CAN_OPT/big_fie.comp
None
None
None
...
None
None
None
pmem(rss=4349915136, vms=4412583936, shared=6270976, text=2867200, lib=0, data=4344135680, dirty=0)
While playing around I noticed that if I change in the C
function Decompress_handle_buffer
the call to the function PyArg_ParseTuple
the second argument from "y*i"
to "Si"
, Python do cleanup the memory...
./test.py -f ~/Desktop/TEST_CAN_OPT/big_fie.comp
None
None
None
...
None
None
None
pmem(rss=22577152, vms=84869120, shared=6361088, text=2867200, lib=0, data=16420864, dirty=0)
However, The buffer is NOT correctly read.
Any ideas?!
Extra Info:
Upvotes: 1
Views: 144
Reputation: 280485
y*
does not correspond to uint8_t
like you're using it. As stated in the documentation, it fills a Py_buffer
struct that you're supposed to provide.
You need to actually provide a Py_buffer, and when you're done with it, you need to release the buffer with PyBuffer_Release
.
Upvotes: 1