Compare commits

...

1 Commits

Author SHA1 Message Date
Ondřej Surý
2deb9296f5 Create per-loop call_rcu thread
The current version of Userspace-RCU creates just a single call_rcu
thread to do all the cleaning.  The only advantage of that is the
serialization of the call_rcu() calls, but on machine with many CPUs, it
is going to slow down the memory reclamation and not fully utilize all
the CPU cores.

Create per-loop call_rcu_data structure and assign each to the
respective isc_loop.  We ignore isc_work threads for now - they will
still use the default call_rcu thread.
2024-12-06 17:17:20 +00:00
2 changed files with 42 additions and 1 deletions

View File

@@ -276,6 +276,10 @@ loop_close(isc_loop_t *loop) {
static void *
helper_thread(void *arg) {
isc_loop_t *helper = (isc_loop_t *)arg;
isc_loopmgr_t *loopmgr = helper->loopmgr;
struct call_rcu_data *crdp = loopmgr->call_rcu_datas[helper->tid];
set_thread_call_rcu_data(crdp);
int r = uv_prepare_start(&helper->quiescent, quiescent_cb);
UV_RUNTIME_CHECK(uv_prepare_start, r);
@@ -290,6 +294,8 @@ helper_thread(void *arg) {
isc_barrier_wait(&helper->loopmgr->stopping);
set_thread_call_rcu_data(NULL);
return NULL;
}
@@ -299,8 +305,9 @@ loop_thread(void *arg) {
isc_loopmgr_t *loopmgr = loop->loopmgr;
isc_loop_t *helper = &loopmgr->helpers[loop->tid];
char name[32];
/* Initialize the thread_local variables*/
struct call_rcu_data *crdp = loopmgr->call_rcu_datas[loop->tid];
/* Initialize the thread_local variables*/
REQUIRE(isc__loop_local == NULL || isc__loop_local == loop);
isc__loop_local = loop;
@@ -311,6 +318,8 @@ loop_thread(void *arg) {
snprintf(name, sizeof(name), "isc-helper-%04" PRIu32, loop->tid);
isc_thread_setname(helper->thread, name);
set_thread_call_rcu_data(crdp);
int r = uv_prepare_start(&loop->quiescent, quiescent_cb);
UV_RUNTIME_CHECK(uv_prepare_start, r);
@@ -338,6 +347,8 @@ loop_thread(void *arg) {
isc_barrier_wait(&loopmgr->stopping);
set_thread_call_rcu_data(NULL);
return NULL;
}
@@ -391,6 +402,13 @@ isc_loopmgr_create(isc_mem_t *mctx, uint32_t nloops, isc_loopmgr_t **loopmgrp) {
isc_barrier_init(&loopmgr->starting, loopmgr->nloops * 2);
isc_barrier_init(&loopmgr->stopping, loopmgr->nloops * 2);
loopmgr->call_rcu_datas =
isc_mem_cget(loopmgr->mctx, loopmgr->nloops,
sizeof(loopmgr->call_rcu_datas[0]));
for (size_t i = 0; i < loopmgr->nloops; i++) {
loopmgr->call_rcu_datas[i] = create_call_rcu_data(0, -1);
}
loopmgr->loops = isc_mem_cget(loopmgr->mctx, loopmgr->nloops,
sizeof(loopmgr->loops[0]));
for (size_t i = 0; i < loopmgr->nloops; i++) {
@@ -613,6 +631,28 @@ isc_loopmgr_destroy(isc_loopmgr_t **loopmgrp) {
isc_mem_cput(loopmgr->mctx, loopmgr->loops, loopmgr->nloops,
sizeof(loopmgr->loops[0]));
/*
* From urcu-call-rcu-impl.h:
*
* The caller must wait for a grace-period to pass between return from
* set_cpu_call_rcu_data() and call to call_rcu_data_free() passing the
* previous call rcu data as argument.
*/
synchronize_rcu();
/*
* Now wait for the call_rcu tasks to finish to reduce the amount of the
* task to be moved from the one that we are deleting and the default
* call_rcu thread.
*/
rcu_barrier();
for (size_t i = 0; i < loopmgr->nloops; i++) {
struct call_rcu_data *crdp = loopmgr->call_rcu_datas[i];
call_rcu_data_free(crdp);
}
isc_mem_cput(loopmgr->mctx, loopmgr->call_rcu_datas, loopmgr->nloops,
sizeof(loopmgr->call_rcu_datas[0]));
isc_barrier_destroy(&loopmgr->starting);
isc_barrier_destroy(&loopmgr->stopping);
isc_barrier_destroy(&loopmgr->resuming);

View File

@@ -111,6 +111,7 @@ struct isc_loopmgr {
/* per-thread objects */
isc_loop_t *loops;
isc_loop_t *helpers;
struct call_rcu_data **call_rcu_datas;
};
/*