Reduce the memory used by hazard pointers

The hazard pointers implementation was bit of frivolous with memory
usage allocating memory based on maximum constants rather than on the
usage.

Make the retired list bit use exactly the memory needed for specified
number of hazard pointers.  This reduced the memory used by hazard
pointers to one quarter in our specific case because we only use single
HP in the queue implementation (as opposed to allocating memory for
HP_MAX_HPS = 4).

Previously, the alignment to prevent false sharing was double the
cacheline size.  This was copied from the ConcurrencyFreaks
implementation, but one cacheline size is enough to prevent false
sharing, so we are using this now to save few bits of memory.

The top level hazard pointers and retired list arrays are now not
aligned to the cacheline size - they are read-only for the whole
life-time of the isc_hp object.  Only hp (hazard pointer) and
rl (retired list) array members are allocated aligned to the cacheline
size to avoid false sharing between threads.

Cleanup HP_MAX_HPS and HP_THRESHOLD_R constants from the paper, because
we don't use them in the code.  HP_THRESHOLD_R was 0, so the check
whether the retired list size was smaller than the value was basically a
dead code.
This commit is contained in:
Ondřej Surý
2021-12-09 11:40:02 +01:00
parent c917a2ca88
commit c84eb55049

View File

@@ -45,34 +45,32 @@
#include <inttypes.h>
#include <isc/align.h>
#include <isc/atomic.h>
#include <isc/hp.h>
#include <isc/mem.h>
#include <isc/once.h>
#include <isc/os.h>
#include <isc/string.h>
#include <isc/thread.h>
#include <isc/util.h>
#define HP_MAX_THREADS 128
static int isc__hp_max_threads = 1;
#define HP_MAX_HPS 4 /* This is named 'K' in the HP paper */
#define CLPAD (128 / sizeof(uintptr_t))
#define HP_THRESHOLD_R 0 /* This is named 'R' in the HP paper */
/* Maximum number of retired objects per thread */
static int isc__hp_max_retired = HP_MAX_THREADS * HP_MAX_HPS;
typedef struct retirelist {
int size;
uintptr_t *list;
} retirelist_t;
typedef atomic_uintptr_t isc_hp_uintptr_t;
struct isc_hp {
int max_hps;
int max_retired;
isc_mem_t *mctx;
atomic_uintptr_t **hp;
retirelist_t **rl;
isc_hp_deletefunc_t *deletefunc;
isc_hp_uintptr_t **hp;
retirelist_t **rl;
};
static inline int
@@ -89,7 +87,6 @@ isc_hp_init(int max_threads) {
}
isc__hp_max_threads = max_threads;
isc__hp_max_retired = max_threads * HP_MAX_HPS;
}
isc_hp_t *
@@ -97,29 +94,42 @@ isc_hp_new(isc_mem_t *mctx, size_t max_hps, isc_hp_deletefunc_t *deletefunc) {
isc_hp_t *hp = isc_mem_get(mctx, sizeof(*hp));
REQUIRE(isc__hp_max_threads > 0);
REQUIRE(max_hps <= HP_MAX_HPS);
REQUIRE(max_hps > 0);
if (max_hps == 0) {
max_hps = HP_MAX_HPS;
}
*hp = (isc_hp_t){ .max_hps = max_hps, .deletefunc = deletefunc };
*hp = (isc_hp_t){
.max_hps = max_hps,
.max_retired = isc__hp_max_threads * max_hps,
.deletefunc = deletefunc,
};
isc_mem_attach(mctx, &hp->mctx);
hp->hp = isc_mem_get(mctx, isc__hp_max_threads * sizeof(hp->hp[0]));
for (int i = 0; i < isc__hp_max_threads; i++) {
isc_hp_uintptr_t *hps;
hps = isc_mem_get_aligned(mctx, hp->max_hps * sizeof(*hps),
ISC_OS_CACHELINE_SIZE);
for (int j = 0; j < hp->max_hps; j++) {
atomic_init(&hps[j], 0);
}
hp->hp[i] = hps;
}
hp->rl = isc_mem_get(mctx, isc__hp_max_threads * sizeof(hp->rl[0]));
for (int i = 0; i < isc__hp_max_threads; i++) {
hp->hp[i] = isc_mem_get(mctx, CLPAD * 2 * sizeof(hp->hp[i][0]));
hp->rl[i] = isc_mem_get(mctx, sizeof(*hp->rl[0]));
*hp->rl[i] = (retirelist_t){ .size = 0 };
retirelist_t *rl;
for (int j = 0; j < hp->max_hps; j++) {
atomic_init(&hp->hp[i][j], 0);
}
hp->rl[i]->list = isc_mem_get(
hp->mctx, isc__hp_max_retired * sizeof(uintptr_t));
rl = isc_mem_get_aligned(mctx, sizeof(*rl),
ISC_OS_CACHELINE_SIZE);
rl->size = 0;
rl->list = isc_mem_get(hp->mctx,
hp->max_retired * sizeof(uintptr_t));
memset(rl->list, 0, hp->max_retired * sizeof(uintptr_t));
hp->rl[i] = rl;
}
return (hp);
@@ -128,16 +138,21 @@ isc_hp_new(isc_mem_t *mctx, size_t max_hps, isc_hp_deletefunc_t *deletefunc) {
void
isc_hp_destroy(isc_hp_t *hp) {
for (int i = 0; i < isc__hp_max_threads; i++) {
isc_mem_put(hp->mctx, hp->hp[i],
CLPAD * 2 * sizeof(hp->hp[i][0]));
retirelist_t *rl = hp->rl[i];
for (int j = 0; j < hp->rl[i]->size; j++) {
void *data = (void *)hp->rl[i]->list[j];
for (int j = 0; j < rl->size; j++) {
void *data = (void *)rl->list[j];
hp->deletefunc(data);
}
isc_mem_put(hp->mctx, hp->rl[i]->list,
isc__hp_max_retired * sizeof(uintptr_t));
isc_mem_put(hp->mctx, hp->rl[i], sizeof(*hp->rl[0]));
isc_mem_put(hp->mctx, rl->list,
hp->max_retired * sizeof(uintptr_t));
isc_mem_put_aligned(hp->mctx, rl, sizeof(*rl),
ISC_OS_CACHELINE_SIZE);
}
for (int i = 0; i < isc__hp_max_threads; i++) {
isc_hp_uintptr_t *hps = hp->hp[i];
isc_mem_put_aligned(hp->mctx, hps, hp->max_hps * sizeof(*hps),
ISC_OS_CACHELINE_SIZE);
}
isc_mem_put(hp->mctx, hp->hp, isc__hp_max_threads * sizeof(hp->hp[0]));
isc_mem_put(hp->mctx, hp->rl, isc__hp_max_threads * sizeof(hp->rl[0]));
@@ -182,15 +197,12 @@ isc_hp_protect_release(isc_hp_t *hp, int ihp, atomic_uintptr_t ptr) {
void
isc_hp_retire(isc_hp_t *hp, uintptr_t ptr) {
hp->rl[tid()]->list[hp->rl[tid()]->size++] = ptr;
INSIST(hp->rl[tid()]->size < isc__hp_max_retired);
retirelist_t *rl = hp->rl[tid()];
rl->list[rl->size++] = ptr;
INSIST(rl->size < hp->max_retired);
if (hp->rl[tid()]->size < HP_THRESHOLD_R) {
return;
}
for (int iret = 0; iret < hp->rl[tid()]->size; iret++) {
uintptr_t obj = hp->rl[tid()]->list[iret];
for (int iret = 0; iret < rl->size; iret++) {
uintptr_t obj = rl->list[iret];
bool can_delete = true;
for (int itid = 0; itid < isc__hp_max_threads && can_delete;
itid++) {
@@ -203,11 +215,9 @@ isc_hp_retire(isc_hp_t *hp, uintptr_t ptr) {
}
if (can_delete) {
size_t bytes = (hp->rl[tid()]->size - iret) *
sizeof(hp->rl[tid()]->list[0]);
memmove(&hp->rl[tid()]->list[iret],
&hp->rl[tid()]->list[iret + 1], bytes);
hp->rl[tid()]->size--;
size_t bytes = (rl->size - iret) * sizeof(rl->list[0]);
memmove(&rl->list[iret], &rl->list[iret + 1], bytes);
rl->size--;
hp->deletefunc((void *)obj);
}
}