Compare commits

...

1 Commits

Author SHA1 Message Date
Ondřej Surý
3401faa8ef WIP: Implement phase-fair rwlock 2023-02-13 06:17:49 +01:00
2 changed files with 93 additions and 414 deletions

View File

@@ -13,6 +13,8 @@
#pragma once
#define USE_PF_RWLOCK 1
#include <inttypes.h>
#include <stdlib.h>
@@ -162,38 +164,12 @@ typedef pthread_rwlock_t isc__rwlock_t;
struct isc_rwlock {
/* Unlocked. */
unsigned int magic;
isc_mutex_t lock;
atomic_int_fast32_t spins;
unsigned int magic;
/*
* When some atomic instructions with hardware assistance are
* available, rwlock will use those so that concurrent readers do not
* interfere with each other through mutex as long as no writers
* appear, massively reducing the lock overhead in the typical case.
*
* The basic algorithm of this approach is the "simple
* writer-preference lock" shown in the following URL:
* http://www.cs.rochester.edu/u/scott/synchronization/pseudocode/rw.html
* but our implementation does not rely on the spin lock unlike the
* original algorithm to be more portable as a user space application.
*/
/* Read or modified atomically. */
atomic_int_fast32_t write_requests;
atomic_int_fast32_t write_completions;
atomic_int_fast32_t cnt_and_flag;
/* Locked by lock. */
isc_condition_t readable;
isc_condition_t writeable;
unsigned int readers_waiting;
/* Locked by rwlock itself. */
atomic_uint_fast32_t write_granted;
/* Unlocked. */
unsigned int write_quota;
atomic_uint_fast32_t rin;
atomic_uint_fast32_t rout;
atomic_uint_fast32_t win;
atomic_uint_fast32_t wout;
};
typedef struct isc_rwlock isc_rwlock_t;

View File

@@ -30,18 +30,6 @@
#define RWLOCK_MAGIC ISC_MAGIC('R', 'W', 'L', 'k')
#define VALID_RWLOCK(rwl) ISC_MAGIC_VALID(rwl, RWLOCK_MAGIC)
#ifndef RWLOCK_DEFAULT_READ_QUOTA
#define RWLOCK_DEFAULT_READ_QUOTA 4
#endif /* ifndef RWLOCK_DEFAULT_READ_QUOTA */
#ifndef RWLOCK_DEFAULT_WRITE_QUOTA
#define RWLOCK_DEFAULT_WRITE_QUOTA 4
#endif /* ifndef RWLOCK_DEFAULT_WRITE_QUOTA */
#ifndef RWLOCK_MAX_ADAPTIVE_COUNT
#define RWLOCK_MAX_ADAPTIVE_COUNT 100
#endif /* ifndef RWLOCK_MAX_ADAPTIVE_COUNT */
#if defined(_MSC_VER)
#include <intrin.h>
#define isc_rwlock_pause() YieldProcessor()
@@ -90,32 +78,13 @@ void
isc__rwlock_init(isc__rwlock_t *rwl, unsigned int read_quota,
unsigned int write_quota) {
REQUIRE(rwl != NULL);
UNUSED(read_quota);
UNUSED(write_quota);
/*
* In case there's trouble initializing, we zero magic now. If all
* goes well, we'll set it to RWLOCK_MAGIC.
*/
rwl->magic = 0;
atomic_init(&rwl->spins, 0);
atomic_init(&rwl->write_requests, 0);
atomic_init(&rwl->write_completions, 0);
atomic_init(&rwl->cnt_and_flag, 0);
rwl->readers_waiting = 0;
atomic_init(&rwl->write_granted, 0);
if (read_quota != 0) {
UNEXPECTED_ERROR("read quota is not supported");
}
if (write_quota == 0) {
write_quota = RWLOCK_DEFAULT_WRITE_QUOTA;
}
rwl->write_quota = write_quota;
isc_mutex_init(&rwl->lock);
isc_condition_init(&rwl->readable);
isc_condition_init(&rwl->writeable);
atomic_init(&rwl->rin, 0);
atomic_init(&rwl->rout, 0);
atomic_init(&rwl->win, 0);
atomic_init(&rwl->wout, 0);
rwl->magic = RWLOCK_MAGIC;
}
@@ -123,385 +92,119 @@ void
isc__rwlock_destroy(isc__rwlock_t *rwl) {
REQUIRE(VALID_RWLOCK(rwl));
REQUIRE(atomic_load_acquire(&rwl->write_requests) ==
atomic_load_acquire(&rwl->write_completions) &&
atomic_load_acquire(&rwl->cnt_and_flag) == 0 &&
rwl->readers_waiting == 0);
REQUIRE(atomic_load_acquire(&rwl->win) ==
atomic_load_acquire(&rwl->wout));
REQUIRE(atomic_load_acquire(&rwl->rin) ==
atomic_load_acquire(&rwl->rout));
rwl->magic = 0;
isc_condition_destroy(&rwl->readable);
isc_condition_destroy(&rwl->writeable);
isc_mutex_destroy(&rwl->lock);
}
/*
* When some architecture-dependent atomic operations are available,
* rwlock can be more efficient than the generic algorithm defined below.
* The basic algorithm is described in the following URL:
* http://www.cs.rochester.edu/u/scott/synchronization/pseudocode/rw.html
*
* The key is to use the following integer variables modified atomically:
* write_requests, write_completions, and cnt_and_flag.
*
* write_requests and write_completions act as a waiting queue for writers
* in order to ensure the FIFO order. Both variables begin with the initial
* value of 0. When a new writer tries to get a write lock, it increments
* write_requests and gets the previous value of the variable as a "ticket".
* When write_completions reaches the ticket number, the new writer can start
* writing. When the writer completes its work, it increments
* write_completions so that another new writer can start working. If the
* write_requests is not equal to write_completions, it means a writer is now
* working or waiting. In this case, a new readers cannot start reading, or
* in other words, this algorithm basically prefers writers.
*
* cnt_and_flag is a "lock" shared by all readers and writers. This integer
* variable is a kind of structure with two members: writer_flag (1 bit) and
* reader_count (31 bits). The writer_flag shows whether a writer is working,
* and the reader_count shows the number of readers currently working or almost
* ready for working. A writer who has the current "ticket" tries to get the
* lock by exclusively setting the writer_flag to 1, provided that the whole
* 32-bit is 0 (meaning no readers or writers working). On the other hand,
* a new reader tries to increment the "reader_count" field provided that
* the writer_flag is 0 (meaning there is no writer working).
*
* If some of the above operations fail, the reader or the writer sleeps
* until the related condition changes. When a working reader or writer
* completes its work, some readers or writers are sleeping, and the condition
* that suspended the reader or writer has changed, it wakes up the sleeping
* readers or writers.
*
* As already noted, this algorithm basically prefers writers. In order to
* prevent readers from starving, however, the algorithm also introduces the
* "writer quota" (Q). When Q consecutive writers have completed their work,
* suspending readers, the last writer will wake up the readers, even if a new
* writer is waiting.
*
* Implementation specific note: due to the combination of atomic operations
* and a mutex lock, ordering between the atomic operation and locks can be
* very sensitive in some cases. In particular, it is generally very important
* to check the atomic variable that requires a reader or writer to sleep after
* locking the mutex and before actually sleeping; otherwise, it could be very
* likely to cause a deadlock. For example, assume "var" is a variable
* atomically modified, then the corresponding code would be:
* if (var == need_sleep) {
* LOCK(lock);
* if (var == need_sleep)
* WAIT(cond, lock);
* UNLOCK(lock);
* }
* The second check is important, since "var" is protected by the atomic
* operation, not by the mutex, and can be changed just before sleeping.
* (The first "if" could be omitted, but this is also important in order to
* make the code efficient by avoiding the use of the mutex unless it is
* really necessary.)
*/
#define WRITER_ACTIVE 0x1
#define READER_INCR 0x2
#define ISC_RWLOCK_LSB 0xFFFFFFF0
#define ISC_RWLOCK_RINC 0x100 /* Reader increment value. */
#define ISC_RWLOCK_WBITS 0x3 /* Writer bits in reader. */
#define ISC_RWLOCK_PRES 0x2 /* Writer present bit. */
#define ISC_RWLOCK_PHID 0x1 /* Phase ID bit. */
static void
rwlock_lock(isc__rwlock_t *rwl, isc_rwlocktype_t type) {
int32_t cntflag;
isc__rwlock_write_unlock(isc__rwlock_t *rwl) {
/* Migrate from write phase to read phase. */
atomic_fetch_and_release(&rwl->rin, ISC_RWLOCK_LSB);
REQUIRE(VALID_RWLOCK(rwl));
/* Allow other writers to continue. */
atomic_fetch_add_release(&rwl->wout, 1);
}
#ifdef ISC_RWLOCK_TRACE
print_lock("prelock", rwl, type);
#endif /* ifdef ISC_RWLOCK_TRACE */
static void
isc__rwlock_write_lock(isc__rwlock_t *rwl) {
uint32_t ticket = atomic_fetch_add_release(&rwl->win, 1);
if (type == isc_rwlocktype_read) {
if (atomic_load_acquire(&rwl->write_requests) !=
atomic_load_acquire(&rwl->write_completions))
{
/* there is a waiting or active writer */
LOCK(&rwl->lock);
if (atomic_load_acquire(&rwl->write_requests) !=
atomic_load_acquire(&rwl->write_completions))
{
rwl->readers_waiting++;
WAIT(&rwl->readable, &rwl->lock);
rwl->readers_waiting--;
}
UNLOCK(&rwl->lock);
}
cntflag = atomic_fetch_add_release(&rwl->cnt_and_flag,
READER_INCR);
POST(cntflag);
while (1) {
if ((atomic_load_acquire(&rwl->cnt_and_flag) &
WRITER_ACTIVE) == 0)
{
break;
}
/* A writer is still working */
LOCK(&rwl->lock);
rwl->readers_waiting++;
if ((atomic_load_acquire(&rwl->cnt_and_flag) &
WRITER_ACTIVE) != 0)
{
WAIT(&rwl->readable, &rwl->lock);
}
rwl->readers_waiting--;
UNLOCK(&rwl->lock);
/*
* Typically, the reader should be able to get a lock
* at this stage:
* (1) there should have been no pending writer when
* the reader was trying to increment the
* counter; otherwise, the writer should be in
* the waiting queue, preventing the reader from
* proceeding to this point.
* (2) once the reader increments the counter, no
* more writer can get a lock.
* Still, it is possible another writer can work at
* this point, e.g. in the following scenario:
* A previous writer unlocks the writer lock.
* This reader proceeds to point (1).
* A new writer appears, and gets a new lock before
* the reader increments the counter.
* The reader then increments the counter.
* The previous writer notices there is a waiting
* reader who is almost ready, and wakes it up.
* So, the reader needs to confirm whether it can now
* read explicitly (thus we loop). Note that this is
* not an infinite process, since the reader has
* incremented the counter at this point.
*/
}
/*
* If we are temporarily preferred to writers due to the writer
* quota, reset the condition (race among readers doesn't
* matter).
*/
atomic_store_release(&rwl->write_granted, 0);
} else {
int32_t prev_writer;
/* enter the waiting queue, and wait for our turn */
prev_writer = atomic_fetch_add_release(&rwl->write_requests, 1);
while (atomic_load_acquire(&rwl->write_completions) !=
prev_writer)
{
LOCK(&rwl->lock);
if (atomic_load_acquire(&rwl->write_completions) !=
prev_writer)
{
WAIT(&rwl->writeable, &rwl->lock);
UNLOCK(&rwl->lock);
continue;
}
UNLOCK(&rwl->lock);
break;
}
while (!atomic_compare_exchange_weak_acq_rel(
&rwl->cnt_and_flag, &(int_fast32_t){ 0 },
WRITER_ACTIVE))
{
/* Another active reader or writer is working. */
LOCK(&rwl->lock);
if (atomic_load_acquire(&rwl->cnt_and_flag) != 0) {
WAIT(&rwl->writeable, &rwl->lock);
}
UNLOCK(&rwl->lock);
}
INSIST((atomic_load_acquire(&rwl->cnt_and_flag) &
WRITER_ACTIVE));
atomic_fetch_add_release(&rwl->write_granted, 1);
/* Acquire ownership of write phase */
while (atomic_load_acquire(&rwl->wout) != ticket) {
isc_rwlock_pause();
}
#ifdef ISC_RWLOCK_TRACE
print_lock("postlock", rwl, type);
#endif /* ifdef ISC_RWLOCK_TRACE */
/*
* Acquire ticket on read-side in order to allow them
* to flush. Indicates to any incoming reader that a
* write-phase is pending.
*/
ticket = atomic_fetch_add_release(
&rwl->rin, (ticket & ISC_RWLOCK_PHID) | ISC_RWLOCK_PRES);
/* Wait for any pending readers to flush. */
while (atomic_load_acquire(&rwl->rout) != ticket) {
isc_rwlock_pause();
}
}
static void
isc__rwlock_read_unlock(isc__rwlock_t *rwl) {
atomic_fetch_add_release(&rwl->rout, ISC_RWLOCK_RINC);
}
static void
isc__rwlock_read_lock(isc__rwlock_t *rwl) {
uint32_t writing;
/*
* If no writer is present, then the operation has completed
* successfully.
*/
writing = atomic_fetch_add_release(&rwl->rin, ISC_RWLOCK_RINC) &
ISC_RWLOCK_WBITS;
if (writing == 0) {
return;
}
while ((atomic_load_acquire(&rwl->rin) & ISC_RWLOCK_WBITS) == writing) {
isc_rwlock_pause();
}
}
void
isc__rwlock_lock(isc__rwlock_t *rwl, isc_rwlocktype_t type) {
int32_t cnt = 0;
int32_t spins = atomic_load_acquire(&rwl->spins) * 2 + 10;
int32_t max_cnt = ISC_MAX(spins, RWLOCK_MAX_ADAPTIVE_COUNT);
REQUIRE(VALID_RWLOCK(rwl));
do {
if (cnt++ >= max_cnt) {
rwlock_lock(rwl, type);
break;
}
isc_rwlock_pause();
} while (isc_rwlock_trylock(rwl, type) != ISC_R_SUCCESS);
atomic_fetch_add_release(&rwl->spins, (cnt - spins) / 8);
switch (type) {
case isc_rwlocktype_read:
isc__rwlock_read_lock(rwl);
break;
case isc_rwlocktype_write:
isc__rwlock_write_lock(rwl);
break;
default:
UNREACHABLE();
}
}
isc_result_t
isc__rwlock_trylock(isc__rwlock_t *rwl, isc_rwlocktype_t type) {
int32_t cntflag;
REQUIRE(VALID_RWLOCK(rwl));
UNUSED(type);
#ifdef ISC_RWLOCK_TRACE
print_lock("prelock", rwl, type);
#endif /* ifdef ISC_RWLOCK_TRACE */
if (type == isc_rwlocktype_read) {
/* If a writer is waiting or working, we fail. */
if (atomic_load_acquire(&rwl->write_requests) !=
atomic_load_acquire(&rwl->write_completions))
{
return (ISC_R_LOCKBUSY);
}
/* Otherwise, be ready for reading. */
cntflag = atomic_fetch_add_release(&rwl->cnt_and_flag,
READER_INCR);
if ((cntflag & WRITER_ACTIVE) != 0) {
/*
* A writer is working. We lose, and cancel the read
* request.
*/
cntflag = atomic_fetch_sub_release(&rwl->cnt_and_flag,
READER_INCR);
/*
* If no other readers are waiting and we've suspended
* new writers in this short period, wake them up.
*/
if (cntflag == READER_INCR &&
atomic_load_acquire(&rwl->write_completions) !=
atomic_load_acquire(&rwl->write_requests))
{
LOCK(&rwl->lock);
BROADCAST(&rwl->writeable);
UNLOCK(&rwl->lock);
}
return (ISC_R_LOCKBUSY);
}
} else {
/* Try locking without entering the waiting queue. */
int_fast32_t zero = 0;
if (!atomic_compare_exchange_strong_acq_rel(
&rwl->cnt_and_flag, &zero, WRITER_ACTIVE))
{
return (ISC_R_LOCKBUSY);
}
/*
* XXXJT: jump into the queue, possibly breaking the writer
* order.
*/
atomic_fetch_sub_release(&rwl->write_completions, 1);
atomic_fetch_add_release(&rwl->write_granted, 1);
}
#ifdef ISC_RWLOCK_TRACE
print_lock("postlock", rwl, type);
#endif /* ifdef ISC_RWLOCK_TRACE */
return (ISC_R_SUCCESS);
return (ISC_R_LOCKBUSY);
}
isc_result_t
isc__rwlock_tryupgrade(isc__rwlock_t *rwl) {
REQUIRE(VALID_RWLOCK(rwl));
int_fast32_t reader_incr = READER_INCR;
/* Try to acquire write access. */
atomic_compare_exchange_strong_acq_rel(&rwl->cnt_and_flag, &reader_incr,
WRITER_ACTIVE);
/*
* There must have been no writer, and there must have
* been at least one reader.
*/
INSIST((reader_incr & WRITER_ACTIVE) == 0 &&
(reader_incr & ~WRITER_ACTIVE) != 0);
if (reader_incr == READER_INCR) {
/*
* We are the only reader and have been upgraded.
* Now jump into the head of the writer waiting queue.
*/
atomic_fetch_sub_release(&rwl->write_completions, 1);
} else {
return (ISC_R_LOCKBUSY);
}
return (ISC_R_SUCCESS);
return (ISC_R_LOCKBUSY);
}
void
isc__rwlock_unlock(isc__rwlock_t *rwl, isc_rwlocktype_t type) {
int32_t prev_cnt;
REQUIRE(VALID_RWLOCK(rwl));
#ifdef ISC_RWLOCK_TRACE
print_lock("preunlock", rwl, type);
#endif /* ifdef ISC_RWLOCK_TRACE */
if (type == isc_rwlocktype_read) {
prev_cnt = atomic_fetch_sub_release(&rwl->cnt_and_flag,
READER_INCR);
/*
* If we're the last reader and any writers are waiting, wake
* them up. We need to wake up all of them to ensure the
* FIFO order.
*/
if (prev_cnt == READER_INCR &&
atomic_load_acquire(&rwl->write_completions) !=
atomic_load_acquire(&rwl->write_requests))
{
LOCK(&rwl->lock);
BROADCAST(&rwl->writeable);
UNLOCK(&rwl->lock);
}
} else {
bool wakeup_writers = true;
/*
* Reset the flag, and (implicitly) tell other writers
* we are done.
*/
atomic_fetch_sub_release(&rwl->cnt_and_flag, WRITER_ACTIVE);
atomic_fetch_add_release(&rwl->write_completions, 1);
if ((atomic_load_acquire(&rwl->write_granted) >=
rwl->write_quota) ||
(atomic_load_acquire(&rwl->write_requests) ==
atomic_load_acquire(&rwl->write_completions)) ||
(atomic_load_acquire(&rwl->cnt_and_flag) & ~WRITER_ACTIVE))
{
/*
* We have passed the write quota, no writer is
* waiting, or some readers are almost ready, pending
* possible writers. Note that the last case can
* happen even if write_requests != write_completions
* (which means a new writer in the queue), so we need
* to catch the case explicitly.
*/
LOCK(&rwl->lock);
if (rwl->readers_waiting > 0) {
wakeup_writers = false;
BROADCAST(&rwl->readable);
}
UNLOCK(&rwl->lock);
}
if ((atomic_load_acquire(&rwl->write_requests) !=
atomic_load_acquire(&rwl->write_completions)) &&
wakeup_writers)
{
LOCK(&rwl->lock);
BROADCAST(&rwl->writeable);
UNLOCK(&rwl->lock);
}
switch (type) {
case isc_rwlocktype_read:
isc__rwlock_read_unlock(rwl);
break;
case isc_rwlocktype_write:
isc__rwlock_write_unlock(rwl);
break;
default:
UNREACHABLE();
}
#ifdef ISC_RWLOCK_TRACE
print_lock("postunlock", rwl, type);
#endif /* ifdef ISC_RWLOCK_TRACE */
}