diff --git a/benchmark_scripts/memory_leak_test.py b/benchmark_scripts/memory_leak_test.py index aba7d65c..f49387de 100644 --- a/benchmark_scripts/memory_leak_test.py +++ b/benchmark_scripts/memory_leak_test.py @@ -1,8 +1,20 @@ import sys from isal import igzip +from pathlib import Path import resource import gc +README = Path(__file__).parent.parent / "README.rst" +GZIPPED_DATA = igzip.compress(README.read_bytes()) + +for _ in range(10): + for _ in range(1000): + a = igzip.decompress(GZIPPED_DATA) + memory_usage = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss + memory_usage_mb = memory_usage / 1024 + print(f"Maximum memory usage: {memory_usage_mb:.2f} MiB") + + for _ in range(10): with igzip.open(sys.argv[1], "rb") as reader: a = reader.read() @@ -12,6 +24,8 @@ memory_usage_mb = memory_usage / 1024 print(f"Maximum memory usage: {memory_usage_mb:.2f} MiB") del(a) + + objects_and_size = [(sys.getsizeof(obj), type(obj)) for obj in gc.get_objects()] objects_and_size.sort(key=lambda x: x[0], reverse=True) diff --git a/src/isal/isal_zlibmodule.c b/src/isal/isal_zlibmodule.c index 257260f8..1331b2b7 100644 --- a/src/isal/isal_zlibmodule.c +++ b/src/isal/isal_zlibmodule.c @@ -1948,75 +1948,66 @@ GzipReader_seek(GzipReader *self, PyObject *args, PyObject *kwargs) return PyLong_FromLongLong(self->_pos); } +#define READALL_MAX_BLOCKS 48 static PyObject * GzipReader_readall(GzipReader *self, PyObject *Py_UNUSED(ignore)) { - /* Try to consume the entire buffer without too much overallocation */ - Py_ssize_t chunk_size = self->buffer_size * 4; - /* Rather than immediately creating a list, read one chunk first and - only create a list when more read operations are necessary. */ - PyObject *first_chunk = PyBytes_FromStringAndSize(NULL, chunk_size); - if (first_chunk == NULL) { - return NULL; - } - ENTER_ZLIB(self); - Py_ssize_t written_size = GzipReader_read_into_buffer( - self, (uint8_t *)PyBytes_AS_STRING(first_chunk), chunk_size); - LEAVE_ZLIB(self); - if (written_size < 0) { - Py_DECREF(first_chunk); - return NULL; - } - if (written_size < chunk_size) { - if (_PyBytes_Resize(&first_chunk, written_size) < 0) { - return NULL; - } - return first_chunk; - } - - PyObject *chunk_list = PyList_New(1); - if (chunk_list == NULL) { - return NULL; - } - PyList_SET_ITEM(chunk_list, 0, first_chunk); + Py_ssize_t bufsize = Py_MIN(DEF_BUF_SIZE, self->buffer_size * 4); + PyObject *read_in_blocks[READALL_MAX_BLOCKS]; + size_t blocks_read = 0; + Py_ssize_t total_read_size = 0; + PyObject *ret = NULL; while (1) { - PyObject *chunk = PyBytes_FromStringAndSize(NULL, chunk_size); - if (chunk == NULL) { - Py_DECREF(chunk_list); - return NULL; - } - ENTER_ZLIB(self); - written_size = GzipReader_read_into_buffer( - self, (uint8_t *)PyBytes_AS_STRING(chunk), chunk_size); - LEAVE_ZLIB(self); - if (written_size < 0) { - Py_DECREF(chunk); - Py_DECREF(chunk_list); - return NULL; + if (blocks_read >= READALL_MAX_BLOCKS) { + PyErr_Format( + PyExc_OverflowError, + "Total size exceeds %zd bytes", total_read_size + ); } - if (written_size == 0) { - Py_DECREF(chunk); - break; + PyObject *block = PyBytes_FromStringAndSize(NULL, bufsize); + if (block == NULL) { + PyErr_NoMemory(); + goto readall_end; } - if (_PyBytes_Resize(&chunk, written_size) < 0) { - Py_DECREF(chunk_list); - return NULL; + + Py_ssize_t read_size = GzipReader_read_into_buffer( + self, (uint8_t *)PyBytes_AS_STRING(block), bufsize); + if (read_size < 0) { + Py_DECREF(block); + goto readall_end; } - int ret = PyList_Append(chunk_list, chunk); - Py_DECREF(chunk); - if (ret < 0) { - Py_DECREF(chunk_list); - return NULL; + total_read_size += read_size; + if (read_size < bufsize) { + if (_PyBytes_Resize(&block, read_size) < 0) { + goto readall_end; + } + read_in_blocks[blocks_read] = block; + blocks_read += 1; + break; } + read_in_blocks[blocks_read] = block; + blocks_read += 1; + bufsize *= 2; } - PyObject *empty_bytes = PyBytes_FromStringAndSize(NULL, 0); - if (empty_bytes == NULL) { - Py_DECREF(chunk_list); - return NULL; + if (blocks_read == 1) { + return read_in_blocks[0]; + } + ret = PyBytes_FromStringAndSize(NULL, total_read_size); + if (ret == NULL) { + PyErr_NoMemory(); + goto readall_end; + } + char *ret_ptr = PyBytes_AS_STRING(ret); + for (size_t i=0; i