Compare commits

...

2 Commits

Author SHA1 Message Date
Ondřej Surý
11d63ae1d3 WIP: Don't upgrade the tree lock in the decrement_reference() 2022-09-30 17:47:59 +02:00
Ondřej Surý
26c2c22ac9 WIP: Remove the isc_rwlock_downgrade 2022-09-30 17:47:59 +02:00
4 changed files with 151 additions and 247 deletions

View File

@@ -1570,9 +1570,7 @@ signapex(void) {
result = dns_dbiterator_current(gdbiter, &node, name);
check_dns_dbiterator_current(result);
signname(node, name);
LOCK(&namelock);
dumpnode(name, node);
UNLOCK(&namelock);
cleannode(gdb, gversion, node);
dns_db_detachnode(gdb, &node);
result = dns_dbiterator_first(gdbiter);
@@ -1582,6 +1580,7 @@ signapex(void) {
fatal("failure iterating database: %s",
isc_result_totext(result));
}
dns_dbiterator_pause(gdbiter);
}
/*%
@@ -1697,6 +1696,7 @@ assignwork(isc_task_t *task) {
sevent->fname = fname;
isc_task_send(task, ISC_EVENT_PTR(&sevent));
unlock:
dns_dbiterator_pause(gdbiter);
UNLOCK(&namelock);
}
@@ -4030,7 +4030,9 @@ main(int argc, char *argv[]) {
presign();
TIME_NOW(&sign_start);
LOCK(&namelock);
signapex();
UNLOCK(&namelock);
if (!atomic_load(&finished)) {
/*
* There is more work to do. Spread it out over multiple

View File

@@ -136,7 +136,6 @@ typedef isc_rwlock_t nodelock_t;
#define NODE_LOCK(l, t) RWLOCK((l), (t))
#define NODE_UNLOCK(l, t) RWUNLOCK((l), (t))
#define NODE_TRYUPGRADE(l) isc_rwlock_tryupgrade(l)
#define NODE_DOWNGRADE(l) isc_rwlock_downgrade(l)
/*%
* Whether to rate-limit updating the LRU to avoid possible thread contention.
@@ -540,11 +539,11 @@ need_headerupdate(rdatasetheader_t *header, isc_stdtime_t now);
static void
update_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header, isc_stdtime_t now);
static void
expire_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header, bool tree_locked,
expire_t reason);
expire_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
isc_rwlocktype_t treelocktype, expire_t reason);
static void
overmem_purge(dns_rbtdb_t *rbtdb, unsigned int locknum_start, isc_stdtime_t now,
bool tree_locked);
isc_rwlocktype_t treelocktype);
static void
resign_insert(dns_rbtdb_t *rbtdb, int idx, rdatasetheader_t *newheader);
static void
@@ -1952,14 +1951,11 @@ reactivate_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
*/
static bool
decrement_reference(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
rbtdb_serial_t least_serial, isc_rwlocktype_t nlock,
rbtdb_serial_t least_serial, isc_rwlocktype_t *nlock,
isc_rwlocktype_t tlock, bool pruning) {
isc_result_t result;
bool write_locked;
bool locked = tlock != isc_rwlocktype_none;
rbtdb_nodelock_t *nodelock;
int bucket = node->locknum;
bool no_reference = true;
uint_fast32_t refs;
nodelock = &rbtdb->node_locks[bucket];
@@ -1980,16 +1976,13 @@ decrement_reference(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
}
/* Upgrade the lock? */
if (nlock == isc_rwlocktype_read) {
if (*nlock == isc_rwlocktype_read) {
NODE_UNLOCK(&nodelock->lock, isc_rwlocktype_read);
NODE_LOCK(&nodelock->lock, isc_rwlocktype_write);
*nlock = isc_rwlocktype_write;
}
if (isc_refcount_decrement(&node->references) > 1) {
/* Restore the lock? */
if (nlock == isc_rwlocktype_read) {
NODE_DOWNGRADE(&nodelock->lock);
}
return (false);
}
@@ -2010,97 +2003,55 @@ decrement_reference(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
}
}
/*
* Attempt to switch to a write lock on the tree. If this fails,
* we will add this node to a linked list of nodes in this locking
* bucket which we will free later.
*/
if (tlock != isc_rwlocktype_write) {
/*
* Locking hierarchy notwithstanding, we don't need to free
* the node lock before acquiring the tree write lock because
* we only do a trylock.
*/
if (tlock == isc_rwlocktype_read) {
result = isc_rwlock_tryupgrade(&rbtdb->tree_lock);
} else {
result = isc_rwlock_trylock(&rbtdb->tree_lock,
isc_rwlocktype_write);
}
RUNTIME_CHECK(result == ISC_R_SUCCESS ||
result == ISC_R_LOCKBUSY);
write_locked = (result == ISC_R_SUCCESS);
} else {
write_locked = true;
}
refs = isc_refcount_decrement(&nodelock->references);
INSIST(refs > 0);
if (KEEP_NODE(node, rbtdb, locked || write_locked)) {
goto restore_locks;
if (KEEP_NODE(node, rbtdb, locked)) {
return (true);
}
#undef KEEP_NODE
if (write_locked) {
/*
* We can now delete the node.
*/
/*
* If this node is the only one in the level it's in, deleting
* this node may recursively make its parent the only node in
* the parent level; if so, and if no one is currently using
* the parent node, this is almost the only opportunity to
* clean it up. But the recursive cleanup is not that trivial
* since the child and parent may be in different lock buckets,
* which would cause a lock order reversal problem. To avoid
* the trouble, we'll dispatch a separate event for batch
* cleaning. We need to check whether we're deleting the node
* as a result of pruning to avoid infinite dispatching.
* Note: pruning happens only when a task has been set for the
* rbtdb. If the user of the rbtdb chooses not to set a task,
* it's their responsibility to purge stale leaves (e.g. by
* periodic walk-through).
*/
if (!pruning && is_leaf(node) && rbtdb->task != NULL) {
send_to_prune_tree(rbtdb, node, isc_rwlocktype_write);
no_reference = false;
} else {
delete_node(rbtdb, node);
}
} else {
/*
* If the tree is not write locked, we will add this node to a linked
* list of nodes in this locking bucket which we will free later.
*/
if (tlock != isc_rwlocktype_write) {
INSIST(node->data == NULL);
if (!ISC_LINK_LINKED(node, deadlink)) {
ISC_LIST_APPEND(rbtdb->deadnodes[bucket], node,
deadlink);
}
}
restore_locks:
/* Restore the lock? */
if (nlock == isc_rwlocktype_read) {
NODE_DOWNGRADE(&nodelock->lock);
return (true);
}
/*
* Relock a read lock, or unlock the write lock if no lock was held.
* We can now delete the node.
*/
if (tlock == isc_rwlocktype_none) {
if (write_locked) {
RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
}
/*
* If this node is the only one in the level it's in, deleting
* this node may recursively make its parent the only node in
* the parent level; if so, and if no one is currently using
* the parent node, this is almost the only opportunity to
* clean it up. But the recursive cleanup is not that trivial
* since the child and parent may be in different lock buckets,
* which would cause a lock order reversal problem. To avoid
* the trouble, we'll dispatch a separate event for batch
* cleaning. We need to check whether we're deleting the node
* as a result of pruning to avoid infinite dispatching.
* Note: pruning happens only when a task has been set for the
* rbtdb. If the user of the rbtdb chooses not to set a task,
* it's their responsibility to purge stale leaves (e.g. by
* periodic walk-through).
*/
if (!pruning && is_leaf(node) && rbtdb->task != NULL) {
send_to_prune_tree(rbtdb, node, isc_rwlocktype_write);
return (false);
}
if (tlock == isc_rwlocktype_read) {
if (write_locked) {
isc_rwlock_downgrade(&rbtdb->tree_lock);
}
}
return (no_reference);
delete_node(rbtdb, node);
return (true);
}
/*
@@ -2116,6 +2067,7 @@ prune_tree(isc_task_t *task, isc_event_t *event) {
dns_rbtnode_t *node = event->ev_arg;
dns_rbtnode_t *parent;
unsigned int locknum;
isc_rwlocktype_t locktype = isc_rwlocktype_write;
UNUSED(task);
@@ -2123,10 +2075,11 @@ prune_tree(isc_task_t *task, isc_event_t *event) {
RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
locknum = node->locknum;
NODE_LOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
NODE_LOCK(&rbtdb->node_locks[locknum].lock, locktype);
do {
parent = node->parent;
decrement_reference(rbtdb, node, 0, isc_rwlocktype_write,
decrement_reference(rbtdb, node, 0, &locktype,
isc_rwlocktype_write, true);
if (parent != NULL && parent->down == NULL) {
@@ -2159,7 +2112,7 @@ prune_tree(isc_task_t *task, isc_event_t *event) {
node = parent;
} while (node != NULL);
NODE_UNLOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
NODE_UNLOCK(&rbtdb->node_locks[locknum].lock, locktype);
RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
detach((dns_db_t **)&rbtdb);
@@ -2569,18 +2522,19 @@ closeversion(dns_db_t *db, dns_dbversion_t **versionp, bool commit) {
for (header = HEAD(resigned_list); header != NULL;
header = HEAD(resigned_list)) {
nodelock_t *lock;
isc_rwlocktype_t locktype;
ISC_LIST_UNLINK(resigned_list, header, link);
lock = &rbtdb->node_locks[header->node->locknum].lock;
NODE_LOCK(lock, isc_rwlocktype_write);
locktype = isc_rwlocktype_write;
NODE_LOCK(lock, locktype);
if (rollback && !IGNORE(header)) {
resign_insert(rbtdb, header->node->locknum, header);
}
decrement_reference(rbtdb, header->node, least_serial,
isc_rwlocktype_write, isc_rwlocktype_none,
false);
NODE_UNLOCK(lock, isc_rwlocktype_write);
&locktype, isc_rwlocktype_none, false);
NODE_UNLOCK(lock, locktype);
}
if (!EMPTY(cleanup_list)) {
@@ -2603,19 +2557,21 @@ closeversion(dns_db_t *db, dns_dbversion_t **versionp, bool commit) {
* expensive, but this event should be rare enough
* to justify the cost.
*/
RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
tlock = isc_rwlocktype_write;
RWLOCK(&rbtdb->tree_lock, tlock);
}
for (changed = HEAD(cleanup_list); changed != NULL;
changed = next_changed) {
nodelock_t *lock;
isc_rwlocktype_t locktype;
next_changed = NEXT(changed, link);
rbtnode = changed->node;
lock = &rbtdb->node_locks[rbtnode->locknum].lock;
locktype = isc_rwlocktype_write;
NODE_LOCK(lock, isc_rwlocktype_write);
NODE_LOCK(lock, locktype);
/*
* This is a good opportunity to purge any dead nodes,
* so use it.
@@ -2628,9 +2584,9 @@ closeversion(dns_db_t *db, dns_dbversion_t **versionp, bool commit) {
rollback_node(rbtnode, serial);
}
decrement_reference(rbtdb, rbtnode, least_serial,
isc_rwlocktype_write, tlock, false);
&locktype, tlock, false);
NODE_UNLOCK(lock, isc_rwlocktype_write);
NODE_UNLOCK(lock, locktype);
isc_mem_put(rbtdb->common.mctx, changed,
sizeof(*changed));
@@ -2639,7 +2595,7 @@ closeversion(dns_db_t *db, dns_dbversion_t **versionp, bool commit) {
isc_refcount_increment(&rbtdb->references);
isc_task_send(rbtdb->task, &event);
} else {
RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
RWUNLOCK(&rbtdb->tree_lock, tlock);
}
}
@@ -3896,6 +3852,7 @@ zone_find(dns_db_t *db, const dns_name_t *name, dns_dbversion_t *version,
rbtdb_rdatatype_t sigtype;
bool active;
nodelock_t *lock;
isc_rwlocktype_t locktype;
dns_rbt_t *tree;
search.rbtdb = (dns_rbtdb_t *)db;
@@ -4396,11 +4353,12 @@ tree_exit:
node = search.zonecut;
INSIST(node != NULL);
lock = &(search.rbtdb->node_locks[node->locknum].lock);
locktype = isc_rwlocktype_read;
NODE_LOCK(lock, isc_rwlocktype_read);
decrement_reference(search.rbtdb, node, 0, isc_rwlocktype_read,
NODE_LOCK(lock, locktype);
decrement_reference(search.rbtdb, node, 0, &locktype,
isc_rwlocktype_none, false);
NODE_UNLOCK(lock, isc_rwlocktype_read);
NODE_UNLOCK(lock, locktype);
}
if (close_version) {
@@ -5253,11 +5211,12 @@ tree_exit:
node = search.zonecut;
INSIST(node != NULL);
lock = &(search.rbtdb->node_locks[node->locknum].lock);
locktype = isc_rwlocktype_read;
NODE_LOCK(lock, isc_rwlocktype_read);
decrement_reference(search.rbtdb, node, 0, isc_rwlocktype_read,
NODE_LOCK(lock, locktype);
decrement_reference(search.rbtdb, node, 0, &locktype,
isc_rwlocktype_none, false);
NODE_UNLOCK(lock, isc_rwlocktype_read);
NODE_UNLOCK(lock, locktype);
}
dns_rbtnodechain_reset(&search.chain);
@@ -5458,25 +5417,26 @@ detachnode(dns_db_t *db, dns_dbnode_t **targetp) {
bool want_free = false;
bool inactive = false;
rbtdb_nodelock_t *nodelock;
isc_rwlocktype_t locktype;
REQUIRE(VALID_RBTDB(rbtdb));
REQUIRE(targetp != NULL && *targetp != NULL);
node = (dns_rbtnode_t *)(*targetp);
nodelock = &rbtdb->node_locks[node->locknum];
locktype = isc_rwlocktype_read;
NODE_LOCK(&nodelock->lock, isc_rwlocktype_read);
NODE_LOCK(&nodelock->lock, locktype);
if (decrement_reference(rbtdb, node, 0, isc_rwlocktype_read,
isc_rwlocktype_none, false))
{
if (decrement_reference(rbtdb, node, 0, &locktype, isc_rwlocktype_none,
false)) {
if (isc_refcount_current(&nodelock->references) == 0 &&
nodelock->exiting) {
inactive = true;
}
}
NODE_UNLOCK(&nodelock->lock, isc_rwlocktype_read);
NODE_UNLOCK(&nodelock->lock, locktype);
*targetp = NULL;
@@ -6728,7 +6688,7 @@ addrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
isc_result_t result;
bool delegating;
bool newnsec;
bool tree_locked = false;
isc_rwlocktype_t treelocktype = isc_rwlocktype_none;
bool cache_is_overmem = false;
dns_fixedname_t fixed;
dns_name_t *name;
@@ -6873,12 +6833,12 @@ addrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
cache_is_overmem = true;
}
if (delegating || newnsec || cache_is_overmem) {
tree_locked = true;
RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
treelocktype = isc_rwlocktype_write;
RWLOCK(&rbtdb->tree_lock, treelocktype);
}
if (cache_is_overmem) {
overmem_purge(rbtdb, rbtnode->locknum, now, tree_locked);
overmem_purge(rbtdb, rbtnode->locknum, now, treelocktype);
}
NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
@@ -6892,7 +6852,7 @@ addrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
}
if (IS_CACHE(rbtdb)) {
if (tree_locked) {
if (treelocktype == isc_rwlocktype_write) {
cleanup_dead_nodes(rbtdb, rbtnode->locknum);
}
@@ -6901,7 +6861,7 @@ addrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
header->rdh_ttl + STALE_TTL(header, rbtdb) <
now - RBTDB_VIRTUAL)
{
expire_header(rbtdb, header, tree_locked, expire_ttl);
expire_header(rbtdb, header, treelocktype, expire_ttl);
}
/*
@@ -6909,9 +6869,10 @@ addrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
* cleaning, we can release it now. However, we still need the
* node lock.
*/
if (tree_locked && !delegating && !newnsec) {
RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
tree_locked = false;
if (treelocktype == isc_rwlocktype_write && !delegating &&
!newnsec) {
RWUNLOCK(&rbtdb->tree_lock, treelocktype);
treelocktype = isc_rwlocktype_none;
}
}
@@ -6941,8 +6902,8 @@ addrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
isc_rwlocktype_write);
if (tree_locked) {
RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
if (treelocktype == isc_rwlocktype_write) {
RWUNLOCK(&rbtdb->tree_lock, treelocktype);
}
/*
@@ -8615,7 +8576,7 @@ rdataset_expire(dns_rdataset_t *rdataset) {
header--;
NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
isc_rwlocktype_write);
expire_header(rbtdb, header, false, expire_flush);
expire_header(rbtdb, header, isc_rwlocktype_none, expire_flush);
NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
isc_rwlocktype_write);
}
@@ -8816,27 +8777,27 @@ dereference_iter_node(rbtdb_dbiterator_t *rbtdbiter) {
dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
dns_rbtnode_t *node = rbtdbiter->node;
nodelock_t *lock;
isc_rwlocktype_t locktype;
if (node == NULL) {
return;
}
lock = &rbtdb->node_locks[node->locknum].lock;
NODE_LOCK(lock, isc_rwlocktype_read);
decrement_reference(rbtdb, node, 0, isc_rwlocktype_read,
rbtdbiter->tree_locked, false);
NODE_UNLOCK(lock, isc_rwlocktype_read);
locktype = isc_rwlocktype_read;
NODE_LOCK(lock, locktype);
decrement_reference(rbtdb, node, 0, &locktype, rbtdbiter->tree_locked,
false);
NODE_UNLOCK(lock, locktype);
rbtdbiter->node = NULL;
}
static void
flush_deletions(rbtdb_dbiterator_t *rbtdbiter) {
dns_rbtnode_t *node;
dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
bool was_read_locked = false;
nodelock_t *lock;
int i;
isc_rwlocktype_t treelocktype;
if (rbtdbiter->delcnt != 0) {
/*
@@ -8851,31 +8812,47 @@ flush_deletions(rbtdb_dbiterator_t *rbtdbiter) {
rbtdbiter->delcnt,
dns_rbt_nodecount(rbtdb->tree));
if (rbtdbiter->tree_locked == isc_rwlocktype_read) {
treelocktype = rbtdbiter->tree_locked;
switch (treelocktype) {
case isc_rwlocktype_read:
rbtdbiter->tree_locked = isc_rwlocktype_none;
RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
was_read_locked = true;
FALLTHROUGH;
case isc_rwlocktype_none:
RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
rbtdbiter->tree_locked = isc_rwlocktype_write;
break;
default:
/* Only one flush_deletions() can be running */
UNREACHABLE();
}
RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
rbtdbiter->tree_locked = isc_rwlocktype_write;
for (i = 0; i < rbtdbiter->delcnt; i++) {
node = rbtdbiter->deletions[i];
lock = &rbtdb->node_locks[node->locknum].lock;
for (int i = 0; i < rbtdbiter->delcnt; i++) {
dns_rbtnode_t *node = rbtdbiter->deletions[i];
nodelock_t *lock =
&rbtdb->node_locks[node->locknum].lock;
isc_rwlocktype_t locktype = isc_rwlocktype_read;
NODE_LOCK(lock, isc_rwlocktype_read);
decrement_reference(rbtdb, node, 0, isc_rwlocktype_read,
rbtdbiter->tree_locked, false);
NODE_UNLOCK(lock, isc_rwlocktype_read);
NODE_LOCK(lock, locktype);
decrement_reference(rbtdb, node, 0, &locktype,
isc_rwlocktype_write, false);
NODE_UNLOCK(lock, locktype);
}
rbtdbiter->delcnt = 0;
rbtdbiter->tree_locked = isc_rwlocktype_none;
RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
if (was_read_locked) {
switch (treelocktype) {
case isc_rwlocktype_read:
RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
rbtdbiter->tree_locked = isc_rwlocktype_read;
} else {
rbtdbiter->tree_locked = isc_rwlocktype_none;
break;
case isc_rwlocktype_none:
rbtdbiter->tree_locked = treelocktype;
break;
default:
UNREACHABLE();
}
}
}
@@ -8899,11 +8876,15 @@ dbiterator_destroy(dns_dbiterator_t **iteratorp) {
dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
dns_db_t *db = NULL;
if (rbtdbiter->tree_locked == isc_rwlocktype_read) {
switch (rbtdbiter->tree_locked) {
case isc_rwlocktype_read:
RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
rbtdbiter->tree_locked = isc_rwlocktype_none;
} else {
INSIST(rbtdbiter->tree_locked == isc_rwlocktype_none);
break;
case isc_rwlocktype_none:
break;
default:
UNREACHABLE();
}
dereference_iter_node(rbtdbiter);
@@ -9292,10 +9273,15 @@ dbiterator_pause(dns_dbiterator_t *iterator) {
rbtdbiter->paused = true;
if (rbtdbiter->tree_locked != isc_rwlocktype_none) {
INSIST(rbtdbiter->tree_locked == isc_rwlocktype_read);
RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
switch (rbtdbiter->tree_locked) {
case isc_rwlocktype_read:
rbtdbiter->tree_locked = isc_rwlocktype_none;
RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
break;
case isc_rwlocktype_none:
break;
default:
UNREACHABLE();
}
flush_deletions(rbtdbiter);
@@ -10001,7 +9987,7 @@ update_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header, isc_stdtime_t now) {
*/
static void
overmem_purge(dns_rbtdb_t *rbtdb, unsigned int locknum_start, isc_stdtime_t now,
bool tree_locked) {
isc_rwlocktype_t treelocktype) {
rdatasetheader_t *header, *header_prev;
unsigned int locknum;
int purgecount = 2;
@@ -10015,7 +10001,7 @@ overmem_purge(dns_rbtdb_t *rbtdb, unsigned int locknum_start, isc_stdtime_t now,
header = isc_heap_element(rbtdb->heaps[locknum], 1);
if (header && header->rdh_ttl < now - RBTDB_VIRTUAL) {
expire_header(rbtdb, header, tree_locked, expire_ttl);
expire_header(rbtdb, header, treelocktype, expire_ttl);
purgecount--;
}
@@ -10032,7 +10018,7 @@ overmem_purge(dns_rbtdb_t *rbtdb, unsigned int locknum_start, isc_stdtime_t now,
*/
ISC_LIST_UNLINK(rbtdb->rdatasets[locknum], header,
link);
expire_header(rbtdb, header, tree_locked, expire_lru);
expire_header(rbtdb, header, treelocktype, expire_lru);
purgecount--;
}
@@ -10042,8 +10028,11 @@ overmem_purge(dns_rbtdb_t *rbtdb, unsigned int locknum_start, isc_stdtime_t now,
}
static void
expire_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header, bool tree_locked,
expire_t reason) {
expire_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
isc_rwlocktype_t treelocktype, expire_t reason) {
REQUIRE(treelocktype == isc_rwlocktype_none ||
treelocktype == isc_rwlocktype_write);
set_ttl(rbtdb, header, 0);
mark_header_ancient(rbtdb, header);
@@ -10052,17 +10041,15 @@ expire_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header, bool tree_locked,
*/
if (isc_refcount_current(&header->node->references) == 0) {
isc_rwlocktype_t locktype = isc_rwlocktype_write;
/*
* If no one else is using the node, we can clean it up now.
* We first need to gain a new reference to the node to meet a
* requirement of decrement_reference().
*/
new_reference(rbtdb, header->node, isc_rwlocktype_write);
decrement_reference(rbtdb, header->node, 0,
isc_rwlocktype_write,
tree_locked ? isc_rwlocktype_write
: isc_rwlocktype_none,
false);
decrement_reference(rbtdb, header->node, 0, &locktype,
treelocktype, false);
if (rbtdb->cachestats == NULL) {
return;

View File

@@ -37,7 +37,6 @@ typedef enum {
struct isc_rwlock {
pthread_rwlock_t rwlock;
atomic_bool downgrade;
};
#if ISC_TRACK_PTHREADS_OBJECTS
@@ -54,7 +53,6 @@ typedef struct isc_rwlock isc__rwlock_t;
#define isc_rwlock_trylock(rwl, type) isc___rwlock_trylock(*rwl, type)
#define isc_rwlock_unlock(rwl, type) isc__rwlock_unlock(*rwl, type)
#define isc_rwlock_tryupgrade(rwl) isc___rwlock_tryupgrade(*rwl)
#define isc_rwlock_downgrade(rwl) isc__rwlock_downgrade(*rwl)
#define isc_rwlock_destroy(rwl) \
{ \
isc___rwlock_destroy(*rwl); \
@@ -71,7 +69,6 @@ typedef struct isc_rwlock isc__rwlock_t;
#define isc_rwlock_trylock(rwl, type) isc___rwlock_trylock(rwl, type)
#define isc_rwlock_unlock(rwl, type) isc__rwlock_unlock(rwl, type)
#define isc_rwlock_tryupgrade(rwl) isc___rwlock_tryupgrade(rwl)
#define isc_rwlock_downgrade(rwl) isc__rwlock_downgrade(rwl)
#define isc_rwlock_destroy(rwl) isc__rwlock_destroy(rwl)
#endif /* ISC_TRACK_PTHREADS_OBJECTS */
@@ -122,7 +119,6 @@ typedef struct isc_rwlock isc__rwlock_t;
#define isc_rwlock_trylock(rwl, type) isc___rwlock_trylock(rwl, type)
#define isc_rwlock_unlock(rwl, type) isc__rwlock_unlock(rwl, type)
#define isc_rwlock_tryupgrade(rwl) isc___rwlock_tryupgrade(rwl)
#define isc_rwlock_downgrade(rwl) isc__rwlock_downgrade(rwl)
#define isc_rwlock_destroy(rwl) isc__rwlock_destroy(rwl)
#endif /* USE_PTHREAD_RWLOCK */
@@ -145,12 +141,6 @@ typedef struct isc_rwlock isc__rwlock_t;
PTHREADS_RUNTIME_CHECK(isc___rwlock_unlock, _ret); \
}
#define isc__rwlock_downgrade(rwl) \
{ \
int _ret = isc___rwlock_downgrade(rwl); \
PTHREADS_RUNTIME_CHECK(isc___rwlock_downgrade, _ret); \
}
#define isc__rwlock_destroy(rwl) \
{ \
int _ret = isc___rwlock_destroy(rwl); \
@@ -173,9 +163,6 @@ isc___rwlock_unlock(isc__rwlock_t *rwl, isc_rwlocktype_t type);
isc_result_t
isc___rwlock_tryupgrade(isc__rwlock_t *rwl);
int
isc___rwlock_downgrade(isc__rwlock_t *rwl);
int
isc___rwlock_destroy(isc__rwlock_t *rwl);

View File

@@ -41,37 +41,16 @@ isc___rwlock_init(isc__rwlock_t *rwl, unsigned int read_quota,
ret = pthread_rwlock_init(&rwl->rwlock, NULL);
atomic_init(&rwl->downgrade, false);
return (ret);
}
int
isc___rwlock_lock(isc__rwlock_t *rwl, isc_rwlocktype_t type) {
int ret;
switch (type) {
case isc_rwlocktype_read:
return (pthread_rwlock_rdlock(&rwl->rwlock));
case isc_rwlocktype_write:
while (true) {
ret = pthread_rwlock_wrlock(&rwl->rwlock);
if (ret != 0) {
return (ret);
}
/* Unlock if in middle of downgrade operation */
if (atomic_load_acquire(&rwl->downgrade)) {
ret = pthread_rwlock_unlock(&rwl->rwlock);
if (ret != 0) {
return (ret);
}
while (atomic_load_acquire(&rwl->downgrade)) {
}
continue;
}
break;
}
return (0);
return (pthread_rwlock_wrlock(&rwl->rwlock));
default:
UNREACHABLE();
}
@@ -86,10 +65,6 @@ isc___rwlock_trylock(isc__rwlock_t *rwl, isc_rwlocktype_t type) {
break;
case isc_rwlocktype_write:
ret = pthread_rwlock_trywrlock(&rwl->rwlock);
if ((ret == 0) && atomic_load_acquire(&rwl->downgrade)) {
RUNTIME_CHECK(pthread_rwlock_unlock(&rwl->rwlock) == 0);
return (ISC_R_LOCKBUSY);
}
break;
default:
UNREACHABLE();
@@ -119,27 +94,6 @@ isc___rwlock_tryupgrade(isc__rwlock_t *rwl) {
return (ISC_R_LOCKBUSY);
}
int
isc___rwlock_downgrade(isc__rwlock_t *rwl) {
int ret;
atomic_store_release(&rwl->downgrade, true);
ret = pthread_rwlock_unlock(&rwl->rwlock);
if (ret != 0) {
return (ret);
}
ret = pthread_rwlock_rdlock(&rwl->rwlock);
if (ret != 0) {
return (ret);
}
atomic_store_release(&rwl->downgrade, false);
return (0);
}
int
isc___rwlock_destroy(isc__rwlock_t *rwl) {
return (pthread_rwlock_destroy(&rwl->rwlock));
@@ -556,32 +510,6 @@ isc___rwlock_tryupgrade(isc__rwlock_t *rwl) {
return (ISC_R_SUCCESS);
}
int
isc___rwlock_downgrade(isc__rwlock_t *rwl) {
int32_t prev_readers;
REQUIRE(VALID_RWLOCK(rwl));
/* Become an active reader. */
prev_readers = atomic_fetch_add_release(&rwl->cnt_and_flag,
READER_INCR);
/* We must have been a writer. */
INSIST((prev_readers & WRITER_ACTIVE) != 0);
/* Complete write */
atomic_fetch_sub_release(&rwl->cnt_and_flag, WRITER_ACTIVE);
atomic_fetch_add_release(&rwl->write_completions, 1);
/* Resume other readers */
LOCK(&rwl->lock);
if (rwl->readers_waiting > 0) {
BROADCAST(&rwl->readable);
}
UNLOCK(&rwl->lock);
return (0);
}
int
isc___rwlock_unlock(isc__rwlock_t *rwl, isc_rwlocktype_t type) {
int32_t prev_cnt;