diff --git a/Modules/_testinternalcapi/clinic/test_lock.c.h b/Modules/_testinternalcapi/clinic/test_lock.c.h index 86875767343cd2..234eca2b8d6a67 100644 --- a/Modules/_testinternalcapi/clinic/test_lock.c.h +++ b/Modules/_testinternalcapi/clinic/test_lock.c.h @@ -2,35 +2,74 @@ preserve [clinic start generated code]*/ +#if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) +# include "pycore_gc.h" // PyGC_Head +# include "pycore_runtime.h" // _Py_ID() +#endif #include "pycore_abstract.h" // _PyNumber_Index() -#include "pycore_modsupport.h" // _PyArg_CheckPositional() +#include "pycore_modsupport.h" // _PyArg_UnpackKeywords() PyDoc_STRVAR(_testinternalcapi_benchmark_locks__doc__, -"benchmark_locks($module, num_threads, use_pymutex=True,\n" -" critical_section_length=1, time_ms=1000, /)\n" +"benchmark_locks($module, num_threads, /, *, num_locks=1,\n" +" critical_section_length=1, work_outside_length=0,\n" +" time_ms=1000, iters_limit=0)\n" "--\n" "\n"); #define _TESTINTERNALCAPI_BENCHMARK_LOCKS_METHODDEF \ - {"benchmark_locks", _PyCFunction_CAST(_testinternalcapi_benchmark_locks), METH_FASTCALL, _testinternalcapi_benchmark_locks__doc__}, + {"benchmark_locks", _PyCFunction_CAST(_testinternalcapi_benchmark_locks), METH_FASTCALL|METH_KEYWORDS, _testinternalcapi_benchmark_locks__doc__}, static PyObject * _testinternalcapi_benchmark_locks_impl(PyObject *module, Py_ssize_t num_threads, - int use_pymutex, + Py_ssize_t num_locks, int critical_section_length, - int time_ms); + int work_outside_length, int time_ms, + Py_ssize_t iters_limit); static PyObject * -_testinternalcapi_benchmark_locks(PyObject *module, PyObject *const *args, Py_ssize_t nargs) +_testinternalcapi_benchmark_locks(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) { PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 5 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + Py_hash_t ob_hash; + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_hash = -1, + .ob_item = { &_Py_ID(num_locks), &_Py_ID(critical_section_length), &_Py_ID(work_outside_length), &_Py_ID(time_ms), &_Py_ID(iters_limit), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"", "num_locks", "critical_section_length", "work_outside_length", "time_ms", "iters_limit", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "benchmark_locks", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[6]; + Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1; Py_ssize_t num_threads; - int use_pymutex = 1; + Py_ssize_t num_locks = 1; int critical_section_length = 1; + int work_outside_length = 0; int time_ms = 1000; + Py_ssize_t iters_limit = 0; - if (!_PyArg_CheckPositional("benchmark_locks", nargs, 1, 4)) { + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, + /*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); + if (!args) { goto exit; } { @@ -45,31 +84,69 @@ _testinternalcapi_benchmark_locks(PyObject *module, PyObject *const *args, Py_ss } num_threads = ival; } - if (nargs < 2) { - goto skip_optional; + if (!noptargs) { + goto skip_optional_kwonly; } - use_pymutex = PyObject_IsTrue(args[1]); - if (use_pymutex < 0) { - goto exit; + if (args[1]) { + { + Py_ssize_t ival = -1; + PyObject *iobj = _PyNumber_Index(args[1]); + if (iobj != NULL) { + ival = PyLong_AsSsize_t(iobj); + Py_DECREF(iobj); + } + if (ival == -1 && PyErr_Occurred()) { + goto exit; + } + num_locks = ival; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } } - if (nargs < 3) { - goto skip_optional; + if (args[2]) { + critical_section_length = PyLong_AsInt(args[2]); + if (critical_section_length == -1 && PyErr_Occurred()) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } } - critical_section_length = PyLong_AsInt(args[2]); - if (critical_section_length == -1 && PyErr_Occurred()) { - goto exit; + if (args[3]) { + work_outside_length = PyLong_AsInt(args[3]); + if (work_outside_length == -1 && PyErr_Occurred()) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } } - if (nargs < 4) { - goto skip_optional; + if (args[4]) { + time_ms = PyLong_AsInt(args[4]); + if (time_ms == -1 && PyErr_Occurred()) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } } - time_ms = PyLong_AsInt(args[3]); - if (time_ms == -1 && PyErr_Occurred()) { - goto exit; + { + Py_ssize_t ival = -1; + PyObject *iobj = _PyNumber_Index(args[5]); + if (iobj != NULL) { + ival = PyLong_AsSsize_t(iobj); + Py_DECREF(iobj); + } + if (ival == -1 && PyErr_Occurred()) { + goto exit; + } + iters_limit = ival; } -skip_optional: - return_value = _testinternalcapi_benchmark_locks_impl(module, num_threads, use_pymutex, critical_section_length, time_ms); +skip_optional_kwonly: + return_value = _testinternalcapi_benchmark_locks_impl(module, num_threads, num_locks, critical_section_length, work_outside_length, time_ms, iters_limit); exit: return return_value; } -/*[clinic end generated code: output=105105d759c0c271 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=c53bf7118fb334cc input=a9049054013a1b77]*/ diff --git a/Modules/_testinternalcapi/test_lock.c b/Modules/_testinternalcapi/test_lock.c index ded76ca9fe6819..14f502c810bf74 100644 --- a/Modules/_testinternalcapi/test_lock.c +++ b/Modules/_testinternalcapi/test_lock.c @@ -2,7 +2,7 @@ #include "parts.h" #include "pycore_lock.h" -#include "pycore_pythread.h" // PyThread_get_thread_ident_ex() +#include "pycore_pythread.h" // PyThread_get_thread_ident_ex() #include "clinic/test_lock.c.h" @@ -10,7 +10,7 @@ #define WIN32_LEAN_AND_MEAN #include #else -#include // usleep() +#include // usleep() #endif /*[clinic input] @@ -18,490 +18,476 @@ module _testinternalcapi [clinic start generated code]*/ /*[clinic end generated code: output=da39a3ee5e6b4b0d input=7bb583d8c9eb9a78]*/ - -static void -pysleep(int ms) -{ +static void pysleep(int ms) { #ifdef MS_WINDOWS - Sleep(ms); + Sleep(ms); #else - usleep(ms * 1000); + usleep(ms * 1000); #endif } -static PyObject * -test_lock_basic(PyObject *self, PyObject *obj) -{ - PyMutex m = (PyMutex){0}; +static PyObject *test_lock_basic(PyObject *self, PyObject *obj) { + PyMutex m = (PyMutex){0}; - // uncontended lock and unlock - PyMutex_Lock(&m); - assert(m._bits == 1); - PyMutex_Unlock(&m); - assert(m._bits == 0); + // uncontended lock and unlock + PyMutex_Lock(&m); + assert(m._bits == 1); + PyMutex_Unlock(&m); + assert(m._bits == 0); - Py_RETURN_NONE; + Py_RETURN_NONE; } struct test_lock2_data { - PyMutex m; - PyEvent done; - int started; + PyMutex m; + PyEvent done; + int started; }; -static void -lock_thread(void *arg) -{ - struct test_lock2_data *test_data = arg; - PyMutex *m = &test_data->m; - _Py_atomic_store_int(&test_data->started, 1); +static void lock_thread(void *arg) { + struct test_lock2_data *test_data = arg; + PyMutex *m = &test_data->m; + _Py_atomic_store_int(&test_data->started, 1); - PyMutex_Lock(m); - // gh-135641: in rare cases the lock may still have `_Py_HAS_PARKED` set - // (m->_bits == 3) due to bucket collisions in the parking lot hash table - // between this mutex and the `test_data.done` event. - assert(m->_bits == 1 || m->_bits == 3); + PyMutex_Lock(m); + // gh-135641: in rare cases the lock may still have `_Py_HAS_PARKED` set + // (m->_bits == 3) due to bucket collisions in the parking lot hash table + // between this mutex and the `test_data.done` event. + assert(m->_bits == 1 || m->_bits == 3); - PyMutex_Unlock(m); - assert(m->_bits == 0); + PyMutex_Unlock(m); + assert(m->_bits == 0); - _PyEvent_Notify(&test_data->done); + _PyEvent_Notify(&test_data->done); } -static PyObject * -test_lock_two_threads(PyObject *self, PyObject *obj) -{ - // lock attempt by two threads - struct test_lock2_data test_data; - memset(&test_data, 0, sizeof(test_data)); - - PyMutex_Lock(&test_data.m); - assert(test_data.m._bits == 1); - - PyThread_start_new_thread(lock_thread, &test_data); - - // wait up to two seconds for the lock_thread to attempt to lock "m" - int iters = 0; - uint8_t v; - do { - pysleep(10); // allow some time for the other thread to try to lock - v = _Py_atomic_load_uint8_relaxed(&test_data.m._bits); - assert(v == 1 || v == 3); - iters++; - } while (v != 3 && iters < 200); - - // both the "locked" and the "has parked" bits should be set +static PyObject *test_lock_two_threads(PyObject *self, PyObject *obj) { + // lock attempt by two threads + struct test_lock2_data test_data; + memset(&test_data, 0, sizeof(test_data)); + + PyMutex_Lock(&test_data.m); + assert(test_data.m._bits == 1); + + PyThread_start_new_thread(lock_thread, &test_data); + + // wait up to two seconds for the lock_thread to attempt to lock "m" + int iters = 0; + uint8_t v; + do { + pysleep(10); // allow some time for the other thread to try to lock v = _Py_atomic_load_uint8_relaxed(&test_data.m._bits); - assert(v == 3); + assert(v == 1 || v == 3); + iters++; + } while (v != 3 && iters < 200); - PyMutex_Unlock(&test_data.m); - PyEvent_Wait(&test_data.done); - assert(test_data.m._bits == 0); + // both the "locked" and the "has parked" bits should be set + v = _Py_atomic_load_uint8_relaxed(&test_data.m._bits); + assert(v == 3); - Py_RETURN_NONE; + PyMutex_Unlock(&test_data.m); + PyEvent_Wait(&test_data.done); + assert(test_data.m._bits == 0); + + Py_RETURN_NONE; } #define COUNTER_THREADS 5 #define COUNTER_ITERS 10000 struct test_data_counter { - PyMutex m; - Py_ssize_t counter; + PyMutex m; + Py_ssize_t counter; }; struct thread_data_counter { - struct test_data_counter *test_data; - PyEvent done_event; + struct test_data_counter *test_data; + PyEvent done_event; }; -static void -counter_thread(void *arg) -{ - struct thread_data_counter *thread_data = arg; - struct test_data_counter *test_data = thread_data->test_data; +static void counter_thread(void *arg) { + struct thread_data_counter *thread_data = arg; + struct test_data_counter *test_data = thread_data->test_data; - for (Py_ssize_t i = 0; i < COUNTER_ITERS; i++) { - PyMutex_Lock(&test_data->m); - test_data->counter++; - PyMutex_Unlock(&test_data->m); - } - _PyEvent_Notify(&thread_data->done_event); + for (Py_ssize_t i = 0; i < COUNTER_ITERS; i++) { + PyMutex_Lock(&test_data->m); + test_data->counter++; + PyMutex_Unlock(&test_data->m); + } + _PyEvent_Notify(&thread_data->done_event); } -static PyObject * -test_lock_counter(PyObject *self, PyObject *obj) -{ - // Test with rapidly locking and unlocking mutex - struct test_data_counter test_data; - memset(&test_data, 0, sizeof(test_data)); +static PyObject *test_lock_counter(PyObject *self, PyObject *obj) { + // Test with rapidly locking and unlocking mutex + struct test_data_counter test_data; + memset(&test_data, 0, sizeof(test_data)); - struct thread_data_counter thread_data[COUNTER_THREADS]; - memset(&thread_data, 0, sizeof(thread_data)); + struct thread_data_counter thread_data[COUNTER_THREADS]; + memset(&thread_data, 0, sizeof(thread_data)); - for (Py_ssize_t i = 0; i < COUNTER_THREADS; i++) { - thread_data[i].test_data = &test_data; - PyThread_start_new_thread(counter_thread, &thread_data[i]); - } + for (Py_ssize_t i = 0; i < COUNTER_THREADS; i++) { + thread_data[i].test_data = &test_data; + PyThread_start_new_thread(counter_thread, &thread_data[i]); + } - for (Py_ssize_t i = 0; i < COUNTER_THREADS; i++) { - PyEvent_Wait(&thread_data[i].done_event); - } + for (Py_ssize_t i = 0; i < COUNTER_THREADS; i++) { + PyEvent_Wait(&thread_data[i].done_event); + } - assert(test_data.counter == COUNTER_THREADS * COUNTER_ITERS); - Py_RETURN_NONE; + assert(test_data.counter == COUNTER_THREADS * COUNTER_ITERS); + Py_RETURN_NONE; } #define SLOW_COUNTER_ITERS 100 -static void -slow_counter_thread(void *arg) -{ - struct thread_data_counter *thread_data = arg; - struct test_data_counter *test_data = thread_data->test_data; - - for (Py_ssize_t i = 0; i < SLOW_COUNTER_ITERS; i++) { - PyMutex_Lock(&test_data->m); - if (i % 7 == 0) { - pysleep(2); - } - test_data->counter++; - PyMutex_Unlock(&test_data->m); +static void slow_counter_thread(void *arg) { + struct thread_data_counter *thread_data = arg; + struct test_data_counter *test_data = thread_data->test_data; + + for (Py_ssize_t i = 0; i < SLOW_COUNTER_ITERS; i++) { + PyMutex_Lock(&test_data->m); + if (i % 7 == 0) { + pysleep(2); } - _PyEvent_Notify(&thread_data->done_event); + test_data->counter++; + PyMutex_Unlock(&test_data->m); + } + _PyEvent_Notify(&thread_data->done_event); } -static PyObject * -test_lock_counter_slow(PyObject *self, PyObject *obj) -{ - // Test lock/unlock with occasional "long" critical section, which will - // trigger handoff of the lock. - struct test_data_counter test_data; - memset(&test_data, 0, sizeof(test_data)); +static PyObject *test_lock_counter_slow(PyObject *self, PyObject *obj) { + // Test lock/unlock with occasional "long" critical section, which will + // trigger handoff of the lock. + struct test_data_counter test_data; + memset(&test_data, 0, sizeof(test_data)); - struct thread_data_counter thread_data[COUNTER_THREADS]; - memset(&thread_data, 0, sizeof(thread_data)); + struct thread_data_counter thread_data[COUNTER_THREADS]; + memset(&thread_data, 0, sizeof(thread_data)); - for (Py_ssize_t i = 0; i < COUNTER_THREADS; i++) { - thread_data[i].test_data = &test_data; - PyThread_start_new_thread(slow_counter_thread, &thread_data[i]); - } + for (Py_ssize_t i = 0; i < COUNTER_THREADS; i++) { + thread_data[i].test_data = &test_data; + PyThread_start_new_thread(slow_counter_thread, &thread_data[i]); + } - for (Py_ssize_t i = 0; i < COUNTER_THREADS; i++) { - PyEvent_Wait(&thread_data[i].done_event); - } + for (Py_ssize_t i = 0; i < COUNTER_THREADS; i++) { + PyEvent_Wait(&thread_data[i].done_event); + } - assert(test_data.counter == COUNTER_THREADS * SLOW_COUNTER_ITERS); - Py_RETURN_NONE; + assert(test_data.counter == COUNTER_THREADS * SLOW_COUNTER_ITERS); + Py_RETURN_NONE; } +struct lock_state { + PyMutex mutex; + double value; + char padding[64]; +}; + struct bench_data_locks { - int stop; - int use_pymutex; - int critical_section_length; - char padding[200]; - PyThread_type_lock lock; - PyMutex m; - double value; - Py_ssize_t total_iters; + int stop; + int critical_section_length; + int work_outside_length; + Py_ssize_t num_locks; + Py_ssize_t iters_limit; + struct lock_state *locks; + Py_ssize_t total_iters; }; struct bench_thread_data { - struct bench_data_locks *bench_data; - Py_ssize_t iters; - PyEvent done; + struct bench_data_locks *bench_data; + Py_ssize_t iters; + PyEvent done; + Py_ssize_t index; }; -static void -thread_benchmark_locks(void *arg) -{ - struct bench_thread_data *thread_data = arg; - struct bench_data_locks *bench_data = thread_data->bench_data; - int use_pymutex = bench_data->use_pymutex; - int critical_section_length = bench_data->critical_section_length; - - double my_value = 1.0; - Py_ssize_t iters = 0; - while (!_Py_atomic_load_int_relaxed(&bench_data->stop)) { - if (use_pymutex) { - PyMutex_Lock(&bench_data->m); - for (int i = 0; i < critical_section_length; i++) { - bench_data->value += my_value; - my_value = bench_data->value; - } - PyMutex_Unlock(&bench_data->m); - } - else { - PyThread_acquire_lock(bench_data->lock, 1); - for (int i = 0; i < critical_section_length; i++) { - bench_data->value += my_value; - my_value = bench_data->value; - } - PyThread_release_lock(bench_data->lock); - } - iters++; +static void thread_benchmark_locks(void *arg) { + struct bench_thread_data *thread_data = arg; + struct bench_data_locks *bench_data = thread_data->bench_data; + int critical_section_length = bench_data->critical_section_length; + int work_outside_length = bench_data->work_outside_length; + Py_ssize_t iters_limit = bench_data->iters_limit; + Py_ssize_t num_locks = bench_data->num_locks; + struct lock_state *state = &bench_data->locks[thread_data->index % num_locks]; + + double my_value = 1.0; + Py_ssize_t iters = 0; + while (!_Py_atomic_load_int_relaxed(&bench_data->stop)) { + if (iters_limit > 0 && iters >= iters_limit) { + break; + } + + for (int i = 0; i < work_outside_length; i++) { + my_value += 1.0; + } + + PyMutex_Lock(&state->mutex); + for (int i = 0; i < critical_section_length; i++) { + state->value += my_value; + my_value = state->value; } + PyMutex_Unlock(&state->mutex); + iters++; + } - thread_data->iters = iters; - _Py_atomic_add_ssize(&bench_data->total_iters, iters); - _PyEvent_Notify(&thread_data->done); + thread_data->iters = iters; + _Py_atomic_add_ssize(&bench_data->total_iters, iters); + _PyEvent_Notify(&thread_data->done); } /*[clinic input] _testinternalcapi.benchmark_locks num_threads: Py_ssize_t - use_pymutex: bool = True + / + * + num_locks: Py_ssize_t = 1 critical_section_length: int = 1 + work_outside_length: int = 0 time_ms: int = 1000 - / + iters_limit: Py_ssize_t = 0 [clinic start generated code]*/ static PyObject * _testinternalcapi_benchmark_locks_impl(PyObject *module, Py_ssize_t num_threads, - int use_pymutex, + Py_ssize_t num_locks, int critical_section_length, - int time_ms) -/*[clinic end generated code: output=381df8d7e9a74f18 input=f3aeaf688738c121]*/ + int work_outside_length, int time_ms, + Py_ssize_t iters_limit) +/*[clinic end generated code: output=1060df8700b70a72 input=6b1638d1cfa4f152]*/ { - // Run from Tools/lockbench/lockbench.py - // Based on the WebKit lock benchmarks: - // https://github.com/WebKit/WebKit/blob/main/Source/WTF/benchmarks/LockSpeedTest.cpp - // See also https://webkit.org/blog/6161/locking-in-webkit/ - PyObject *thread_iters = NULL; - PyObject *res = NULL; - - struct bench_data_locks bench_data; - memset(&bench_data, 0, sizeof(bench_data)); - bench_data.use_pymutex = use_pymutex; - bench_data.critical_section_length = critical_section_length; - - bench_data.lock = PyThread_allocate_lock(); - if (bench_data.lock == NULL) { - return PyErr_NoMemory(); - } - - struct bench_thread_data *thread_data = NULL; - thread_data = PyMem_Calloc(num_threads, sizeof(*thread_data)); - if (thread_data == NULL) { - PyErr_NoMemory(); - goto exit; - } - - thread_iters = PyList_New(num_threads); - if (thread_iters == NULL) { - goto exit; - } - - PyTime_t start, end; - if (PyTime_PerfCounter(&start) < 0) { - goto exit; - } - + // Run from Tools/lockbench/lockbench.py + // Based on the WebKit lock benchmarks: + // https://github.com/WebKit/WebKit/blob/main/Source/WTF/benchmarks/LockSpeedTest.cpp + // See also https://webkit.org/blog/6161/locking-in-webkit/ + PyObject *thread_iters = NULL; + PyObject *res = NULL; + + struct bench_data_locks bench_data; + memset(&bench_data, 0, sizeof(bench_data)); + bench_data.critical_section_length = critical_section_length; + bench_data.work_outside_length = work_outside_length; + bench_data.num_locks = num_locks; + bench_data.iters_limit = iters_limit; + + bench_data.locks = PyMem_Calloc(num_locks, sizeof(struct lock_state)); + if (bench_data.locks == NULL) { + return PyErr_NoMemory(); + } + + struct bench_thread_data *thread_data = NULL; + thread_data = PyMem_Calloc(num_threads, sizeof(*thread_data)); + if (thread_data == NULL) { + PyErr_NoMemory(); + goto exit; + } + + thread_iters = PyList_New(num_threads); + if (thread_iters == NULL) { + goto exit; + } + + PyTime_t start, end; + if (PyTime_PerfCounter(&start) < 0) { + goto exit; + } + + for (Py_ssize_t i = 0; i < num_threads; i++) { + thread_data[i].bench_data = &bench_data; + thread_data[i].index = i; + PyThread_start_new_thread(thread_benchmark_locks, &thread_data[i]); + } + + if (iters_limit > 0) { + // Wait for all threads to finish their iterations for (Py_ssize_t i = 0; i < num_threads; i++) { - thread_data[i].bench_data = &bench_data; - PyThread_start_new_thread(thread_benchmark_locks, &thread_data[i]); + PyEvent_Wait(&thread_data[i].done); } - + _Py_atomic_store_int(&bench_data.stop, 1); + } else { // Let the threads run for `time_ms` milliseconds pysleep(time_ms); _Py_atomic_store_int(&bench_data.stop, 1); // Wait for the threads to finish for (Py_ssize_t i = 0; i < num_threads; i++) { - PyEvent_Wait(&thread_data[i].done); + PyEvent_Wait(&thread_data[i].done); } - - Py_ssize_t total_iters = bench_data.total_iters; - if (PyTime_PerfCounter(&end) < 0) { - goto exit; - } - - // Return the total number of acquisitions and the number of acquisitions - // for each thread. - for (Py_ssize_t i = 0; i < num_threads; i++) { - PyObject *iter = PyLong_FromSsize_t(thread_data[i].iters); - if (iter == NULL) { - goto exit; - } - PyList_SET_ITEM(thread_iters, i, iter); + } + + Py_ssize_t total_iters = bench_data.total_iters; + if (PyTime_PerfCounter(&end) < 0) { + goto exit; + } + + // Return the total number of acquisitions and the number of acquisitions + // for each thread. + for (Py_ssize_t i = 0; i < num_threads; i++) { + PyObject *iter = PyLong_FromSsize_t(thread_data[i].iters); + if (iter == NULL) { + goto exit; } + PyList_SET_ITEM(thread_iters, i, iter); + } - assert(end != start); - double rate = total_iters * 1e9 / (end - start); - res = Py_BuildValue("(dO)", rate, thread_iters); + assert(end != start); + double rate = total_iters * 1e9 / (end - start); + res = Py_BuildValue("(dO)", rate, thread_iters); exit: - PyThread_free_lock(bench_data.lock); - PyMem_Free(thread_data); - Py_XDECREF(thread_iters); - return res; + PyMem_Free(bench_data.locks); + PyMem_Free(thread_data); + Py_XDECREF(thread_iters); + return res; } -static PyObject * -test_lock_benchmark(PyObject *module, PyObject *obj) -{ - // Just make sure the benchmark runs without crashing - PyObject *res = _testinternalcapi_benchmark_locks_impl( - module, 1, 1, 1, 100); - if (res == NULL) { - return NULL; - } - Py_DECREF(res); - Py_RETURN_NONE; +static PyObject *test_lock_benchmark(PyObject *module, PyObject *obj) { + // Just make sure the benchmark runs without crashing + PyObject *res = + _testinternalcapi_benchmark_locks_impl(module, 1, 1, 1, 0, 100, 0); + if (res == NULL) { + return NULL; + } + Py_DECREF(res); + Py_RETURN_NONE; } -static int -init_maybe_fail(void *arg) -{ - int *counter = (int *)arg; - (*counter)++; - if (*counter < 5) { - // failure - return -1; - } - assert(*counter == 5); - return 0; +static int init_maybe_fail(void *arg) { + int *counter = (int *)arg; + (*counter)++; + if (*counter < 5) { + // failure + return -1; + } + assert(*counter == 5); + return 0; } -static PyObject * -test_lock_once(PyObject *self, PyObject *obj) -{ - _PyOnceFlag once = {0}; - int counter = 0; - for (int i = 0; i < 10; i++) { - int res = _PyOnceFlag_CallOnce(&once, init_maybe_fail, &counter); - if (i < 4) { - assert(res == -1); - } - else { - assert(res == 0); - assert(counter == 5); - } +static PyObject *test_lock_once(PyObject *self, PyObject *obj) { + _PyOnceFlag once = {0}; + int counter = 0; + for (int i = 0; i < 10; i++) { + int res = _PyOnceFlag_CallOnce(&once, init_maybe_fail, &counter); + if (i < 4) { + assert(res == -1); + } else { + assert(res == 0); + assert(counter == 5); } - Py_RETURN_NONE; + } + Py_RETURN_NONE; } struct test_rwlock_data { - Py_ssize_t nthreads; - _PyRWMutex rw; - PyEvent step1; - PyEvent step2; - PyEvent step3; - PyEvent done; + Py_ssize_t nthreads; + _PyRWMutex rw; + PyEvent step1; + PyEvent step2; + PyEvent step3; + PyEvent done; }; -static void -rdlock_thread(void *arg) -{ - struct test_rwlock_data *test_data = arg; +static void rdlock_thread(void *arg) { + struct test_rwlock_data *test_data = arg; - // Acquire the lock in read mode - _PyRWMutex_RLock(&test_data->rw); - PyEvent_Wait(&test_data->step1); - _PyRWMutex_RUnlock(&test_data->rw); + // Acquire the lock in read mode + _PyRWMutex_RLock(&test_data->rw); + PyEvent_Wait(&test_data->step1); + _PyRWMutex_RUnlock(&test_data->rw); - _PyRWMutex_RLock(&test_data->rw); - PyEvent_Wait(&test_data->step3); - _PyRWMutex_RUnlock(&test_data->rw); + _PyRWMutex_RLock(&test_data->rw); + PyEvent_Wait(&test_data->step3); + _PyRWMutex_RUnlock(&test_data->rw); - if (_Py_atomic_add_ssize(&test_data->nthreads, -1) == 1) { - _PyEvent_Notify(&test_data->done); - } + if (_Py_atomic_add_ssize(&test_data->nthreads, -1) == 1) { + _PyEvent_Notify(&test_data->done); + } } -static void -wrlock_thread(void *arg) -{ - struct test_rwlock_data *test_data = arg; +static void wrlock_thread(void *arg) { + struct test_rwlock_data *test_data = arg; - // First acquire the lock in write mode - _PyRWMutex_Lock(&test_data->rw); - PyEvent_Wait(&test_data->step2); - _PyRWMutex_Unlock(&test_data->rw); + // First acquire the lock in write mode + _PyRWMutex_Lock(&test_data->rw); + PyEvent_Wait(&test_data->step2); + _PyRWMutex_Unlock(&test_data->rw); - if (_Py_atomic_add_ssize(&test_data->nthreads, -1) == 1) { - _PyEvent_Notify(&test_data->done); - } + if (_Py_atomic_add_ssize(&test_data->nthreads, -1) == 1) { + _PyEvent_Notify(&test_data->done); + } } -static void -wait_until(uintptr_t *ptr, uintptr_t value) -{ - // wait up to two seconds for *ptr == value - int iters = 0; - uintptr_t bits; - do { - pysleep(10); - bits = _Py_atomic_load_uintptr(ptr); - iters++; - } while (bits != value && iters < 200); +static void wait_until(uintptr_t *ptr, uintptr_t value) { + // wait up to two seconds for *ptr == value + int iters = 0; + uintptr_t bits; + do { + pysleep(10); + bits = _Py_atomic_load_uintptr(ptr); + iters++; + } while (bits != value && iters < 200); } -static PyObject * -test_lock_rwlock(PyObject *self, PyObject *obj) -{ - struct test_rwlock_data test_data = {.nthreads = 3}; +static PyObject *test_lock_rwlock(PyObject *self, PyObject *obj) { + struct test_rwlock_data test_data = {.nthreads = 3}; - _PyRWMutex_Lock(&test_data.rw); - assert(test_data.rw.bits == 1); + _PyRWMutex_Lock(&test_data.rw); + assert(test_data.rw.bits == 1); - _PyRWMutex_Unlock(&test_data.rw); - assert(test_data.rw.bits == 0); + _PyRWMutex_Unlock(&test_data.rw); + assert(test_data.rw.bits == 0); - // Start two readers - PyThread_start_new_thread(rdlock_thread, &test_data); - PyThread_start_new_thread(rdlock_thread, &test_data); + // Start two readers + PyThread_start_new_thread(rdlock_thread, &test_data); + PyThread_start_new_thread(rdlock_thread, &test_data); - // wait up to two seconds for the threads to attempt to read-lock "rw" - wait_until(&test_data.rw.bits, 8); - assert(test_data.rw.bits == 8); + // wait up to two seconds for the threads to attempt to read-lock "rw" + wait_until(&test_data.rw.bits, 8); + assert(test_data.rw.bits == 8); - // start writer (while readers hold lock) - PyThread_start_new_thread(wrlock_thread, &test_data); - wait_until(&test_data.rw.bits, 10); - assert(test_data.rw.bits == 10); + // start writer (while readers hold lock) + PyThread_start_new_thread(wrlock_thread, &test_data); + wait_until(&test_data.rw.bits, 10); + assert(test_data.rw.bits == 10); - // readers release lock, writer should acquire it - _PyEvent_Notify(&test_data.step1); - wait_until(&test_data.rw.bits, 3); - assert(test_data.rw.bits == 3); + // readers release lock, writer should acquire it + _PyEvent_Notify(&test_data.step1); + wait_until(&test_data.rw.bits, 3); + assert(test_data.rw.bits == 3); - // writer releases lock, readers acquire it - _PyEvent_Notify(&test_data.step2); - wait_until(&test_data.rw.bits, 8); - assert(test_data.rw.bits == 8); + // writer releases lock, readers acquire it + _PyEvent_Notify(&test_data.step2); + wait_until(&test_data.rw.bits, 8); + assert(test_data.rw.bits == 8); - // readers release lock again - _PyEvent_Notify(&test_data.step3); - wait_until(&test_data.rw.bits, 0); - assert(test_data.rw.bits == 0); + // readers release lock again + _PyEvent_Notify(&test_data.step3); + wait_until(&test_data.rw.bits, 0); + assert(test_data.rw.bits == 0); - PyEvent_Wait(&test_data.done); - Py_RETURN_NONE; + PyEvent_Wait(&test_data.done); + Py_RETURN_NONE; } -static PyObject * -test_lock_recursive(PyObject *self, PyObject *obj) -{ - _PyRecursiveMutex m = (_PyRecursiveMutex){0}; - assert(!_PyRecursiveMutex_IsLockedByCurrentThread(&m)); +static PyObject *test_lock_recursive(PyObject *self, PyObject *obj) { + _PyRecursiveMutex m = (_PyRecursiveMutex){0}; + assert(!_PyRecursiveMutex_IsLockedByCurrentThread(&m)); - _PyRecursiveMutex_Lock(&m); - assert(m.thread == PyThread_get_thread_ident_ex()); - assert(PyMutex_IsLocked(&m.mutex)); - assert(m.level == 0); + _PyRecursiveMutex_Lock(&m); + assert(m.thread == PyThread_get_thread_ident_ex()); + assert(PyMutex_IsLocked(&m.mutex)); + assert(m.level == 0); - _PyRecursiveMutex_Lock(&m); - assert(m.level == 1); - _PyRecursiveMutex_Unlock(&m); + _PyRecursiveMutex_Lock(&m); + assert(m.level == 1); + _PyRecursiveMutex_Unlock(&m); - _PyRecursiveMutex_Unlock(&m); - assert(m.thread == 0); - assert(!PyMutex_IsLocked(&m.mutex)); - assert(m.level == 0); + _PyRecursiveMutex_Unlock(&m); + assert(m.thread == 0); + assert(!PyMutex_IsLocked(&m.mutex)); + assert(m.level == 0); - Py_RETURN_NONE; + Py_RETURN_NONE; } static PyMethodDef test_methods[] = { @@ -509,19 +495,17 @@ static PyMethodDef test_methods[] = { {"test_lock_two_threads", test_lock_two_threads, METH_NOARGS}, {"test_lock_counter", test_lock_counter, METH_NOARGS}, {"test_lock_counter_slow", test_lock_counter_slow, METH_NOARGS}, - _TESTINTERNALCAPI_BENCHMARK_LOCKS_METHODDEF - {"test_lock_benchmark", test_lock_benchmark, METH_NOARGS}, + _TESTINTERNALCAPI_BENCHMARK_LOCKS_METHODDEF{ + "test_lock_benchmark", test_lock_benchmark, METH_NOARGS}, {"test_lock_once", test_lock_once, METH_NOARGS}, {"test_lock_rwlock", test_lock_rwlock, METH_NOARGS}, {"test_lock_recursive", test_lock_recursive, METH_NOARGS}, {NULL, NULL} /* sentinel */ }; -int -_PyTestInternalCapi_Init_Lock(PyObject *mod) -{ - if (PyModule_AddFunctions(mod, test_methods) < 0) { - return -1; - } - return 0; +int _PyTestInternalCapi_Init_Lock(PyObject *mod) { + if (PyModule_AddFunctions(mod, test_methods) < 0) { + return -1; + } + return 0; } diff --git a/Tools/lockbench/lockbench.py b/Tools/lockbench/lockbench.py index 9833d703e00cbb..c7cd75b37b20cb 100644 --- a/Tools/lockbench/lockbench.py +++ b/Tools/lockbench/lockbench.py @@ -1,8 +1,6 @@ -# Measure the performance of PyMutex and PyThread_type_lock locks +# Measure the performance of PyMutex # with short critical sections. # -# Usage: python Tools/lockbench/lockbench.py [CRITICAL_SECTION_LENGTH] -# # How to interpret the results: # # Acquisitions (kHz): Reports the total number of lock acquisitions in @@ -18,36 +16,49 @@ # lock. # See https://en.wikipedia.org/wiki/Fairness_measure#Jain's_fairness_index +import argparse from _testinternalcapi import benchmark_locks -import sys - -# Max number of threads to test -MAX_THREADS = 10 - -# How much "work" to do while holding the lock -CRITICAL_SECTION_LENGTH = 1 - def jains_fairness(values): # Jain's fairness index # See https://en.wikipedia.org/wiki/Fairness_measure + if not values: + return 0.0 return (sum(values) ** 2) / (len(values) * sum(x ** 2 for x in values)) def main(): - print("Lock Type Threads Acquisitions (kHz) Fairness") - for lock_type in ["PyMutex", "PyThread_type_lock"]: - use_pymutex = (lock_type == "PyMutex") - for num_threads in range(1, MAX_THREADS + 1): - acquisitions, thread_iters = benchmark_locks( - num_threads, use_pymutex, CRITICAL_SECTION_LENGTH) + parser = argparse.ArgumentParser(description="Measure the performance of PyMutex") + parser.add_argument("threads", type=int, nargs="?", default=1, + help="Number of threads") + parser.add_argument("--num-locks", type=int, default=1, + help="Number of locks") + parser.add_argument("--critical-section", type=int, default=1, + help="Work inside the lock") + parser.add_argument("--work-outside", type=int, default=0, + help="Work outside the lock") + parser.add_argument("--time", type=int, default=1000, + help="Benchmark duration in milliseconds") + parser.add_argument("--total-iters", type=int, default=0, + help="Fixed number of iterations per thread") + + args = parser.parse_args() - acquisitions /= 1000 # report in kHz for readability - fairness = jains_fairness(thread_iters) + acquisitions, thread_iters = benchmark_locks( + args.threads, + num_locks=args.num_locks, + critical_section_length=args.critical_section, + work_outside_length=args.work_outside, + time_ms=args.time, + iters_limit=args.total_iters + ) - print(f"{lock_type: <20}{num_threads: <18}{acquisitions: >5.0f}{fairness: >20.2f}") + acquisitions /= 1000 # report in kHz for readability + fairness = jains_fairness(thread_iters) + print(f"Threads: {args.threads}") + print(f"Locks: {args.num_locks}") + print(f"Acquisitions (kHz): {acquisitions: >5.0f}") + print(f"Fairness: {fairness: >20.2f}") if __name__ == "__main__": - if len(sys.argv) > 1: - CRITICAL_SECTION_LENGTH = int(sys.argv[1]) main()