Compare commits

...

6 Commits

Author SHA1 Message Date
Ondřej Surý
45cfd3c29e fixup! WIP: Simplify the overmem cleaning to not switch locknum 2024-04-22 12:44:15 +02:00
Ondřej Surý
fc90344dad fixup! WIP: Simplify the overmem cleaning to not switch locknum 2024-04-22 12:24:42 +02:00
Ondřej Surý
512c1c3071 fixup! WIP: Simplify the overmem cleaning to not switch locknum 2024-04-22 12:12:08 +02:00
Ondřej Surý
e421ac168a WIP: Simplify the overmem cleaning to not switch locknum 2024-04-22 12:12:08 +02:00
Ondřej Surý
528a3930b5 Use isc_queue to implement wait-free deadnodes queue
Replace the ISC_LIST based deadnodes implementation with isc_queue which
is wait-free and we don't have to acquire neither the tree nor node lock
to append nodes to the queue and the cleaning process can also
copy (splice) the list into a local copy without acquiring the list.

Currently, there's a little benefit to this as we need to hold those
locks anyway, but in the future as we move to RCU based implementation,
this will be ready.

To align the cleaning with our event loop based model, remove the
hardcoded count for the node locks and use the number of the event loops
instead.  This was each event loop can have own cleaning as part of the
process.  Use the uniform random numbers to spread the nodes evenly
between the buckets (instead of hashing the domain name).
2024-04-22 12:12:00 +02:00
Ondřej Surý
7d8b81d2e8 Add isc_queue implementation on top of cds_wfcq
Add an isc_queue implementation that hides the gory details of cds_wfcq
into more neat API.  The same caveats as with cds_wfcq.

TODO: Add documentation to the API.
2024-04-22 12:12:00 +02:00
10 changed files with 245 additions and 364 deletions

View File

@@ -68,7 +68,7 @@ struct dns_cache {
isc_mutex_t lock;
isc_mem_t *mctx; /* Main cache memory */
isc_mem_t *hmctx; /* Heap memory */
isc_loop_t *loop;
isc_loopmgr_t *loopmgr;
char *name;
isc_refcount_t references;
@@ -97,11 +97,11 @@ cache_create_db(dns_cache_t *cache, dns_db_t **db) {
*/
argv[0] = (char *)cache->hmctx;
result = dns_db_create(cache->mctx, CACHEDB_DEFAULT, dns_rootname,
dns_dbtype_cache, cache->rdclass, 1, argv, db);
dns_dbtype_cache, cache->rdclass,
ARRAY_SIZE(argv), argv, db);
if (result == ISC_R_SUCCESS) {
dns_db_setservestalettl(*db, cache->serve_stale_ttl);
dns_db_setservestalerefresh(*db, cache->serve_stale_refresh);
dns_db_setloop(*db, cache->loop);
}
return (result);
@@ -140,7 +140,7 @@ dns_cache_create(isc_loopmgr_t *loopmgr, dns_rdataclass_t rdclass,
.hmctx = hmctx,
.rdclass = rdclass,
.name = isc_mem_strdup(mctx, cachename),
.loop = isc_loop_ref(isc_loop_main(loopmgr)),
.loopmgr = loopmgr,
};
isc_mutex_init(&cache->lock);
@@ -157,7 +157,6 @@ dns_cache_create(isc_loopmgr_t *loopmgr, dns_rdataclass_t rdclass,
goto cleanup_stats;
}
dns_db_setloop(cache->db, isc_loop_main(loopmgr));
cache->magic = CACHE_MAGIC;
/*
@@ -178,7 +177,6 @@ cleanup_stats:
isc_stats_detach(&cache->stats);
isc_mutex_destroy(&cache->lock);
isc_mem_free(mctx, cache->name);
isc_loop_detach(&cache->loop);
isc_mem_detach(&cache->hmctx);
isc_mem_putanddetach(&cache->mctx, cache, sizeof(*cache));
return (result);
@@ -197,8 +195,6 @@ cache_free(dns_cache_t *cache) {
isc_mutex_destroy(&cache->lock);
isc_loop_detach(&cache->loop);
cache->magic = 0;
isc_mem_detach(&cache->hmctx);
isc_mem_putanddetach(&cache->mctx, cache, sizeof(*cache));

View File

@@ -53,6 +53,7 @@
#include <stdbool.h>
#include <isc/lang.h>
#include <isc/loop.h>
#include <isc/magic.h>
#include <isc/rwlock.h>
#include <isc/stats.h>

View File

@@ -30,6 +30,7 @@
#include <isc/mem.h>
#include <isc/mutex.h>
#include <isc/once.h>
#include <isc/queue.h>
#include <isc/random.h>
#include <isc/refcount.h>
#include <isc/result.h>
@@ -114,8 +115,6 @@
#define ACTIVE(header, now) \
(((header)->ttl > (now)) || ((header)->ttl == (now) && ZEROTTL(header)))
#define DEFAULT_NODE_LOCK_COUNT 7 /*%< Should be prime. */
#define EXPIREDOK(rbtiterator) \
(((rbtiterator)->common.options & DNS_DB_EXPIREDOK) != 0)
@@ -158,26 +157,6 @@
/*% Time after which we update LRU for all other records, 10 minutes */
#define DNS_QPDB_LRUUPDATE_REGULAR 600
/*%
* Number of buckets for cache DB entries (locks, LRU lists, TTL heaps).
* There is a tradeoff issue about configuring this value: if this is too
* small, it may cause heavier contention between threads; if this is too large,
* LRU purge algorithm won't work well (entries tend to be purged prematurely).
* The default value should work well for most environments, but this can
* also be configurable at compilation time via the
* DNS_QPDB_CACHE_NODE_LOCK_COUNT variable. This value must be larger than
* 1 due to the assumption of overmem().
*/
#ifdef DNS_QPDB_CACHE_NODE_LOCK_COUNT
#if DNS_QPDB_CACHE_NODE_LOCK_COUNT <= 1
#error "DNS_QPDB_CACHE_NODE_LOCK_COUNT must be larger than 1"
#else /* if DNS_QPDB_CACHE_NODE_LOCK_COUNT <= 1 */
#define DEFAULT_CACHE_NODE_LOCK_COUNT DNS_QPDB_CACHE_NODE_LOCK_COUNT
#endif /* if DNS_QPDB_CACHE_NODE_LOCK_COUNT <= 1 */
#else /* ifdef DNS_QPDB_CACHE_NODE_LOCK_COUNT */
#define DEFAULT_CACHE_NODE_LOCK_COUNT 17
#endif /* DNS_QPDB_CACHE_NODE_LOCK_COUNT */
/*
* This defines the number of headers that we try to expire each time the
* expire_ttl_headers() is run. The number should be small enough, so the
@@ -185,6 +164,7 @@
* enough, so we expire enough headers if their TTL is clustered.
*/
#define DNS_QPDB_EXPIRE_TTL_COUNT 10
#define DNS_QPDB_EXPIRE_LRU_COUNT 10
/*%
* This is the structure that is used for each node in the qp trie of trees.
@@ -220,11 +200,12 @@ struct dns_qpdata {
isc_mem_t *mctx;
/*%
* Used for LRU cache. This linked list is used to mark nodes which
* have no data any longer, but we cannot unlink at that exact moment
* because we did not or could not obtain a write lock on the tree.
* Used for dead nodes cleaning. This linked list is used to mark nodes
* which have no data any longer, but we cannot unlink at that exact
* moment because we did not or could not obtain a write lock on the
* tree.
*/
ISC_LINK(dns_qpdata_t) deadlink;
isc_queue_node_t deadlink;
/*@{*/
/*!
@@ -265,6 +246,8 @@ typedef ISC_LIST(qpdb_changed_t) qpdb_changedlist_t;
struct dns_qpdb {
/* Unlocked. */
dns_db_t common;
/* Loopmgr */
isc_loopmgr_t *loopmgr;
/* Locks the data in this struct */
isc_rwlock_t lock;
/* Locks the tree structure (prevents nodes appearing/disappearing) */
@@ -283,7 +266,6 @@ struct dns_qpdb {
uint32_t current_serial;
uint32_t least_serial;
uint32_t next_serial;
isc_loop_t *loop;
dns_dbnode_t *soanode;
dns_dbnode_t *nsnode;
@@ -301,22 +283,11 @@ struct dns_qpdb {
*/
dns_slabheaderlist_t *lru;
/*
* Start point % node_lock_count for next LRU cleanup.
*/
atomic_uint lru_sweep;
/*
* When performing LRU cleaning limit cleaning to headers that were
* last used at or before this.
*/
_Atomic(isc_stdtime_t) last_used;
/*%
* Temporary storage for stale cache nodes and dynamically deleted
* nodes that await being cleaned up.
*/
dns_qpdatalist_t *deadnodes;
isc_queue_t *deadnodes;
/*
* Heaps. These are used for TTL based expiry in a cache,
@@ -662,8 +633,6 @@ static void
delete_node(dns_qpdb_t *qpdb, dns_qpdata_t *node) {
isc_result_t result = ISC_R_UNEXPECTED;
INSIST(!ISC_LINK_LINKED(node, deadlink));
if (isc_log_wouldlog(dns_lctx, ISC_LOG_DEBUG(1))) {
char printname[DNS_NAME_FORMATSIZE];
dns_name_format(&node->name, printname, sizeof(printname));
@@ -713,15 +682,9 @@ delete_node(dns_qpdb_t *qpdb, dns_qpdata_t *node) {
*/
static void
newref(dns_qpdb_t *qpdb, dns_qpdata_t *node,
isc_rwlocktype_t nlocktype DNS__DB_FLARG) {
isc_rwlocktype_t nlocktype ISC_ATTR_UNUSED DNS__DB_FLARG) {
uint_fast32_t refs;
if (nlocktype == isc_rwlocktype_write &&
ISC_LINK_LINKED(node, deadlink))
{
ISC_LIST_UNLINK(qpdb->deadnodes[node->locknum], node, deadlink);
}
dns_qpdata_ref(node);
refs = isc_refcount_increment0(&node->erefs);
@@ -748,6 +711,9 @@ newref(dns_qpdb_t *qpdb, dns_qpdata_t *node,
}
}
static void
cleanup_deadnodes(void *arg);
/*
* Caller must be holding the node lock; either the read or write lock.
* Note that the lock must be held even when node references are
@@ -893,10 +859,16 @@ decref(dns_qpdb_t *qpdb, dns_qpdata_t *node, uint32_t least_serial,
*/
delete_node(qpdb, node);
} else {
INSIST(node->data == NULL);
if (!ISC_LINK_LINKED(node, deadlink)) {
ISC_LIST_APPEND(qpdb->deadnodes[bucket], node,
deadlink);
newref(qpdb, node, *nlocktypep DNS__DB_FLARG_PASS);
isc_queue_node_init(&node->deadlink);
if (!isc_queue_enqueue_entry(&qpdb->deadnodes[bucket], node,
deadlink))
{
/* Queue was empty, trigger new cleaning */
isc_loop_t *loop = isc_loop_get(qpdb->loopmgr, bucket);
isc_async_run(loop, cleanup_deadnodes, qpdb);
}
}
@@ -2427,104 +2399,6 @@ expiredata(dns_db_t *db, dns_dbnode_t *node, void *data) {
INSIST(tlocktype == isc_rwlocktype_none);
}
static size_t
rdataset_size(dns_slabheader_t *header) {
if (!NONEXISTENT(header)) {
return (dns_rdataslab_size((unsigned char *)header,
sizeof(*header)));
}
return (sizeof(*header));
}
static size_t
expire_lru_headers(dns_qpdb_t *qpdb, unsigned int locknum,
isc_rwlocktype_t *tlocktypep,
size_t purgesize DNS__DB_FLARG) {
dns_slabheader_t *header = NULL;
size_t purged = 0;
for (header = ISC_LIST_TAIL(qpdb->lru[locknum]);
header != NULL && header->last_used <= qpdb->last_used &&
purged <= purgesize;
header = ISC_LIST_TAIL(qpdb->lru[locknum]))
{
size_t header_size = rdataset_size(header);
/*
* Unlink the entry at this point to avoid checking it
* again even if it's currently used someone else and
* cannot be purged at this moment. This entry won't be
* referenced any more (so unlinking is safe) since the
* TTL will be reset to 0.
*/
ISC_LIST_UNLINK(qpdb->lru[locknum], header, link);
expireheader(header, tlocktypep,
dns_expire_lru DNS__DB_FLARG_PASS);
purged += header_size;
}
return (purged);
}
/*%
* Purge some expired and/or stale (i.e. unused for some period) cache entries
* due to an overmem condition. To recover from this condition quickly,
* we clean up entries up to the size of newly added rdata that triggered
* the overmem; this is accessible via newheader.
*
* The LRU lists tails are processed in LRU order to the nearest second.
*
* A write lock on the tree must be held.
*/
static void
overmem(dns_qpdb_t *qpdb, dns_slabheader_t *newheader,
isc_rwlocktype_t *tlocktypep DNS__DB_FLARG) {
uint32_t locknum_start = qpdb->lru_sweep++ % qpdb->node_lock_count;
uint32_t locknum = locknum_start;
/* Size of added data, possible node and possible ENT node. */
size_t purgesize = rdataset_size(newheader) + 2 * sizeof(dns_qpdata_t);
size_t purged = 0;
isc_stdtime_t min_last_used = 0;
size_t max_passes = 8;
again:
do {
isc_rwlocktype_t nlocktype = isc_rwlocktype_none;
NODE_WRLOCK(&qpdb->node_locks[locknum].lock, &nlocktype);
purged += expire_lru_headers(qpdb, locknum, tlocktypep,
purgesize -
purged DNS__DB_FLARG_PASS);
/*
* Work out the oldest remaining last_used values of the list
* tails as we walk across the array of lru lists.
*/
dns_slabheader_t *header = ISC_LIST_TAIL(qpdb->lru[locknum]);
if (header != NULL &&
(min_last_used == 0 || header->last_used < min_last_used))
{
min_last_used = header->last_used;
}
NODE_UNLOCK(&qpdb->node_locks[locknum].lock, &nlocktype);
locknum = (locknum + 1) % qpdb->node_lock_count;
} while (locknum != locknum_start && purged <= purgesize);
/*
* Update qpdb->last_used if we have walked all the list tails and have
* not freed the required amount of memory.
*/
if (purged < purgesize) {
if (min_last_used != 0) {
qpdb->last_used = min_last_used;
if (max_passes-- > 0) {
goto again;
}
}
}
}
static bool
prio_type(dns_typepair_t type) {
switch (type) {
@@ -2577,21 +2451,7 @@ free_qpdb(dns_qpdb_t *qpdb, bool log) {
char buf[DNS_NAME_FORMATSIZE];
dns_qp_t **treep = NULL;
/*
* We assume the number of remaining dead nodes is reasonably small;
* the overhead of unlinking all nodes here should be negligible.
*/
for (i = 0; i < qpdb->node_lock_count; i++) {
dns_qpdata_t *node = NULL;
node = ISC_LIST_HEAD(qpdb->deadnodes[i]);
while (node != NULL) {
ISC_LIST_UNLINK(qpdb->deadnodes[i], node, deadlink);
node = ISC_LIST_HEAD(qpdb->deadnodes[i]);
}
}
qpdb->quantum = (qpdb->loop != NULL) ? 100 : 0;
qpdb->quantum = 100;
for (;;) {
/*
@@ -2647,13 +2507,13 @@ free_qpdb(dns_qpdb_t *qpdb, bool log) {
/*
* Clean up dead node buckets.
*/
if (qpdb->deadnodes != NULL) {
for (i = 0; i < qpdb->node_lock_count; i++) {
INSIST(ISC_LIST_EMPTY(qpdb->deadnodes[i]));
}
isc_mem_cput(qpdb->common.mctx, qpdb->deadnodes,
qpdb->node_lock_count, sizeof(dns_qpdatalist_t));
for (i = 0; i < qpdb->node_lock_count; i++) {
INSIST(isc_queue_empty(&qpdb->deadnodes[i]));
isc_queue_destroy(&qpdb->deadnodes[i]);
}
isc_mem_cput(qpdb->common.mctx, qpdb->deadnodes, qpdb->node_lock_count,
sizeof(qpdb->deadnodes[0]));
/*
* Clean up heap objects.
*/
@@ -2679,9 +2539,6 @@ free_qpdb(dns_qpdb_t *qpdb, bool log) {
sizeof(db_nodelock_t));
TREE_DESTROYLOCK(&qpdb->tree_lock);
isc_refcount_destroy(&qpdb->common.references);
if (qpdb->loop != NULL) {
isc_loop_detach(&qpdb->loop);
}
isc_rwlock_destroy(&qpdb->lock);
qpdb->common.magic = 0;
@@ -2771,82 +2628,58 @@ mark_ancient(dns_slabheader_t *header) {
*
* The caller must hold a tree write lock and bucketnum'th node (write) lock.
*/
static void
cleanup_dead_nodes(dns_qpdb_t *qpdb, int bucketnum DNS__DB_FLARG) {
dns_qpdata_t *node = NULL;
int count = 10; /* XXXJT: should be adjustable */
__cleanup_deadnodes(dns_qpdb_t *qpdb, uint16_t locknum,
isc_rwlocktype_t *nlocktypep,
isc_rwlocktype_t *tlocktypep) {
isc_queue_t deadnodes;
dns_qpdata_t *qpnode, *qpnext;
node = ISC_LIST_HEAD(qpdb->deadnodes[bucketnum]);
while (node != NULL && count > 0) {
ISC_LIST_UNLINK(qpdb->deadnodes[bucketnum], node, deadlink);
isc_queue_init(&deadnodes);
/*
* We might have reactivated this node without a tree write
* lock, so we couldn't remove this node from deadnodes then
* and we have to do it now.
*/
if (isc_refcount_current(&node->references) != 0 ||
node->data != NULL)
{
node = ISC_LIST_HEAD(qpdb->deadnodes[bucketnum]);
count--;
continue;
}
delete_node(qpdb, node);
node = ISC_LIST_HEAD(qpdb->deadnodes[bucketnum]);
count--;
/* Queue must not be empty */
RUNTIME_CHECK(isc_queue_splice(&deadnodes, &qpdb->deadnodes[locknum]));
isc_queue_for_each_entry_safe(&deadnodes, qpnode, qpnext, deadlink) {
decref(qpdb, qpnode, 0, nlocktypep, tlocktypep, false, true);
}
}
static void
cleanup_deadnodes(void *arg) {
dns_qpdb_t *qpdb = arg;
uint16_t locknum = isc_tid();
isc_rwlocktype_t tlocktype = isc_rwlocktype_none;
isc_rwlocktype_t nlocktype = isc_rwlocktype_none;
INSIST(locknum < qpdb->node_lock_count);
TREE_WRLOCK(&qpdb->tree_lock, &tlocktype);
NODE_WRLOCK(&qpdb->node_locks[locknum].lock, &nlocktype);
__cleanup_deadnodes(qpdb, locknum, &nlocktype, &tlocktype);
NODE_UNLOCK(&qpdb->node_locks[locknum].lock, &nlocktype);
TREE_UNLOCK(&qpdb->tree_lock, &tlocktype);
}
/*
* This function is assumed to be called when a node is newly referenced
* and can be in the deadnode list. In that case the node must be retrieved
* from the list because it is going to be used. In addition, if the caller
* happens to hold a write lock on the tree, it's a good chance to purge dead
* nodes.
* and can be in the deadnode list. In that case the node will be references
* and cleanup_deadnodes() will remove it from the list when the cleaning
* happens.
* Note: while a new reference is gained in multiple places, there are only very
* few cases where the node can be in the deadnode list (only empty nodes can
* have been added to the list).
*/
static void
reactivate_node(dns_qpdb_t *qpdb, dns_qpdata_t *node,
isc_rwlocktype_t tlocktype DNS__DB_FLARG) {
isc_rwlocktype_t tlocktype ISC_ATTR_UNUSED DNS__DB_FLARG) {
isc_rwlocktype_t nlocktype = isc_rwlocktype_none;
isc_rwlock_t *nodelock = &qpdb->node_locks[node->locknum].lock;
bool maybe_cleanup = false;
POST(nlocktype);
NODE_RDLOCK(nodelock, &nlocktype);
/*
* Check if we can possibly cleanup the dead node. If so, upgrade
* the node lock below to perform the cleanup.
*/
if (!ISC_LIST_EMPTY(qpdb->deadnodes[node->locknum]) &&
tlocktype == isc_rwlocktype_write)
{
maybe_cleanup = true;
}
if (ISC_LINK_LINKED(node, deadlink) || maybe_cleanup) {
/*
* Upgrade the lock and test if we still need to unlink.
*/
NODE_FORCEUPGRADE(nodelock, &nlocktype);
POST(nlocktype);
if (ISC_LINK_LINKED(node, deadlink)) {
ISC_LIST_UNLINK(qpdb->deadnodes[node->locknum], node,
deadlink);
}
if (maybe_cleanup) {
cleanup_dead_nodes(qpdb,
node->locknum DNS__DB_FILELINE);
}
}
newref(qpdb, node, nlocktype DNS__DB_FLARG_PASS);
NODE_UNLOCK(nodelock, &nlocktype);
@@ -2859,13 +2692,14 @@ new_qpdata(dns_qpdb_t *qpdb, const dns_name_t *name) {
*newdata = (dns_qpdata_t){
.name = DNS_NAME_INITEMPTY,
.references = ISC_REFCOUNT_INITIALIZER(1),
.locknum = isc_random_uniform(qpdb->node_lock_count),
};
newdata->locknum = dns_name_hash(name) % qpdb->node_lock_count;
INSIST(newdata->locknum < qpdb->node_lock_count);
isc_mem_attach(qpdb->common.mctx, &newdata->mctx);
dns_name_dupwithoffsets(name, newdata->mctx, &newdata->name);
ISC_LINK_INIT(newdata, deadlink);
#ifdef DNS_DB_NODETRACE
fprintf(stderr, "new_qpdata:%s:%s:%d:%p->references = 1\n", __func__,
__FILE__, __LINE__ + 1, name);
@@ -3354,7 +3188,6 @@ find_header:
newheader->down = NULL;
idx = QPDB_HEADERNODE(newheader)->locknum;
if (ZEROTTL(newheader)) {
newheader->last_used = qpdb->last_used + 1;
ISC_LIST_APPEND(qpdb->lru[idx], newheader,
link);
} else {
@@ -3384,7 +3217,6 @@ find_header:
isc_heap_insert(qpdb->heaps[idx], newheader);
newheader->heap = qpdb->heaps[idx];
if (ZEROTTL(newheader)) {
newheader->last_used = qpdb->last_used + 1;
ISC_LIST_APPEND(qpdb->lru[idx], newheader,
link);
} else {
@@ -3559,6 +3391,31 @@ expire_ttl_headers(dns_qpdb_t *qpdb, unsigned int locknum,
isc_rwlocktype_t *tlocktypep, isc_stdtime_t now,
bool cache_is_overmem DNS__DB_FLARG);
static void
overmem_async(void *arg) {
dns_qpdb_t *qpdb = arg;
uint32_t locknum = isc_tid();
isc_rwlocktype_t tlocktype = isc_rwlocktype_none;
isc_rwlocktype_t nlocktype = isc_rwlocktype_none;
dns_slabheader_t *header;
size_t i;
TREE_WRLOCK(&qpdb->tree_lock, &tlocktype);
NODE_WRLOCK(&qpdb->node_locks[locknum].lock, &nlocktype);
for (i = 0, header = ISC_LIST_TAIL(qpdb->lru[locknum]);
i < DNS_QPDB_EXPIRE_LRU_COUNT && header != NULL;
i++, header = ISC_LIST_TAIL(qpdb->lru[locknum]))
{
ISC_LIST_UNLINK(qpdb->lru[locknum], header, link);
expireheader(header, &tlocktype,
dns_expire_lru DNS__DB_FLARG_PASS);
}
NODE_UNLOCK(&qpdb->node_locks[locknum].lock, &nlocktype);
TREE_UNLOCK(&qpdb->tree_lock, &tlocktype);
}
static isc_result_t
addrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
isc_stdtime_t now, dns_rdataset_t *rdataset, unsigned int options,
@@ -3667,15 +3524,16 @@ addrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
*/
if (isc_mem_isovermem(qpdb->common.mctx)) {
cache_is_overmem = true;
for (size_t i = 0; i < qpdb->node_lock_count; i++) {
isc_async_run(isc_loop_get(qpdb->loopmgr, i),
overmem_async, qpdb);
}
}
if (delegating || newnsec || cache_is_overmem) {
TREE_WRLOCK(&qpdb->tree_lock, &tlocktype);
}
if (cache_is_overmem) {
overmem(qpdb, newheader, &tlocktype DNS__DB_FLARG_PASS);
}
NODE_WRLOCK(&qpdb->node_locks[qpnode->locknum].lock, &nlocktype);
if (qpdb->rrsetstats != NULL) {
@@ -3685,10 +3543,6 @@ addrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
true);
}
if (tlocktype == isc_rwlocktype_write) {
cleanup_dead_nodes(qpdb, qpnode->locknum DNS__DB_FLARG_PASS);
}
expire_ttl_headers(qpdb, qpnode->locknum, &tlocktype, now,
cache_is_overmem DNS__DB_FLARG_PASS);
@@ -3797,22 +3651,6 @@ nodecount(dns_db_t *db, dns_dbtree_t tree) {
return (mu.leaves);
}
static void
setloop(dns_db_t *db, isc_loop_t *loop) {
dns_qpdb_t *qpdb = (dns_qpdb_t *)db;
REQUIRE(VALID_QPDB(qpdb));
RWLOCK(&qpdb->lock, isc_rwlocktype_write);
if (qpdb->loop != NULL) {
isc_loop_detach(&qpdb->loop);
}
if (loop != NULL) {
isc_loop_attach(loop, &qpdb->loop);
}
RWUNLOCK(&qpdb->lock, isc_rwlocktype_write);
}
static isc_result_t
getoriginnode(dns_db_t *db, dns_dbnode_t **nodep DNS__DB_FLARG) {
dns_qpdb_t *qpdb = (dns_qpdb_t *)db;
@@ -3857,10 +3695,12 @@ dns__qpcache_create(isc_mem_t *mctx, const dns_name_t *origin,
void *driverarg ISC_ATTR_UNUSED, dns_db_t **dbp) {
dns_qpdb_t *qpdb = NULL;
isc_mem_t *hmctx = mctx;
isc_loop_t *loop = isc_loop();
int i;
/* This database implementation only supports cache semantics */
REQUIRE(type == dns_dbtype_cache);
REQUIRE(loop != NULL);
qpdb = isc_mem_get(mctx, sizeof(*qpdb));
*qpdb = (dns_qpdb_t){
@@ -3869,6 +3709,7 @@ dns__qpcache_create(isc_mem_t *mctx, const dns_name_t *origin,
.current_serial = 1,
.least_serial = 1,
.next_serial = 2,
.loopmgr = isc_loop_getloopmgr(loop),
};
isc_refcount_init(&qpdb->common.references, 1);
@@ -3886,16 +3727,7 @@ dns__qpcache_create(isc_mem_t *mctx, const dns_name_t *origin,
isc_rwlock_init(&qpdb->lock);
TREE_INITLOCK(&qpdb->tree_lock);
/*
* Initialize node_lock_count in a generic way to support future
* extension which allows the user to specify this value on creation.
* Note that when specified for a cache DB it must be larger than 1
* as commented with the definition of DEFAULT_CACHE_NODE_LOCK_COUNT.
*/
if (qpdb->node_lock_count == 0) {
qpdb->node_lock_count = DEFAULT_CACHE_NODE_LOCK_COUNT;
}
INSIST(qpdb->node_lock_count < (1 << DNS_RBT_LOCKLENGTH));
qpdb->node_lock_count = isc_loopmgr_nloops(qpdb->loopmgr);
qpdb->node_locks = isc_mem_cget(mctx, qpdb->node_lock_count,
sizeof(db_nodelock_t));
@@ -3926,9 +3758,9 @@ dns__qpcache_create(isc_mem_t *mctx, const dns_name_t *origin,
* Create deadnode lists.
*/
qpdb->deadnodes = isc_mem_cget(mctx, qpdb->node_lock_count,
sizeof(dns_qpdatalist_t));
for (i = 0; i < (int)qpdb->node_lock_count; i++) {
ISC_LIST_INIT(qpdb->deadnodes[i]);
sizeof(qpdb->deadnodes[0]));
for (i = 0; i < (int)(qpdb->node_lock_count); i++) {
isc_queue_init(&qpdb->deadnodes[i]);
}
qpdb->active = qpdb->node_lock_count;
@@ -4767,7 +4599,6 @@ static dns_dbmethods_t qpdb_cachemethods = {
.addrdataset = addrdataset,
.deleterdataset = deleterdataset,
.nodecount = nodecount,
.setloop = setloop,
.getoriginnode = getoriginnode,
.getrrsetstats = getrrsetstats,
.setcachestats = setcachestats,

View File

@@ -1647,8 +1647,7 @@ expire_lru_headers(dns_rbtdb_t *rbtdb, unsigned int locknum,
size_t purged = 0;
for (header = ISC_LIST_TAIL(rbtdb->lru[locknum]);
header != NULL && header->last_used <= rbtdb->last_used &&
purged <= purgesize;
header != NULL && purged <= purgesize;
header = ISC_LIST_TAIL(rbtdb->lru[locknum]))
{
size_t header_size = rdataset_size(header);
@@ -1681,50 +1680,18 @@ expire_lru_headers(dns_rbtdb_t *rbtdb, unsigned int locknum,
*/
void
dns__cacherbt_overmem(dns_rbtdb_t *rbtdb, dns_slabheader_t *newheader,
uint32_t locknum,
isc_rwlocktype_t *tlocktypep DNS__DB_FLARG) {
uint32_t locknum_start = rbtdb->lru_sweep++ % rbtdb->node_lock_count;
uint32_t locknum = locknum_start;
/* Size of added data, possible node and possible ENT node. */
size_t purgesize =
rdataset_size(newheader) +
2 * dns__rbtnode_getsize(RBTDB_HEADERNODE(newheader));
size_t purged = 0;
isc_stdtime_t min_last_used = 0;
size_t max_passes = 8;
again:
do {
isc_rwlocktype_t nlocktype = isc_rwlocktype_none;
NODE_WRLOCK(&rbtdb->node_locks[locknum].lock, &nlocktype);
isc_rwlocktype_t nlocktype = isc_rwlocktype_none;
NODE_WRLOCK(&rbtdb->node_locks[locknum].lock, &nlocktype);
purged += expire_lru_headers(rbtdb, locknum, tlocktypep,
purgesize - purged DNS__DB_FLARG_PASS);
purged += expire_lru_headers(rbtdb, locknum, tlocktypep,
purgesize -
purged DNS__DB_FLARG_PASS);
/*
* Work out the oldest remaining last_used values of the list
* tails as we walk across the array of lru lists.
*/
dns_slabheader_t *header = ISC_LIST_TAIL(rbtdb->lru[locknum]);
if (header != NULL &&
(min_last_used == 0 || header->last_used < min_last_used))
{
min_last_used = header->last_used;
}
NODE_UNLOCK(&rbtdb->node_locks[locknum].lock, &nlocktype);
locknum = (locknum + 1) % rbtdb->node_lock_count;
} while (locknum != locknum_start && purged <= purgesize);
/*
* Update rbtdb->last_used if we have walked all the list tails and have
* not freed the required amount of memory.
*/
if (purged < purgesize) {
if (min_last_used != 0) {
rbtdb->last_used = min_last_used;
if (max_passes-- > 0) {
goto again;
}
}
}
NODE_UNLOCK(&rbtdb->node_locks[locknum].lock, &nlocktype);
}

View File

@@ -2938,8 +2938,6 @@ find_header:
idx = RBTDB_HEADERNODE(newheader)->locknum;
if (IS_CACHE(rbtdb)) {
if (ZEROTTL(newheader)) {
newheader->last_used =
rbtdb->last_used + 1;
ISC_LIST_APPEND(rbtdb->lru[idx],
newheader, link);
} else {
@@ -2984,8 +2982,6 @@ find_header:
isc_heap_insert(rbtdb->heaps[idx], newheader);
newheader->heap = rbtdb->heaps[idx];
if (ZEROTTL(newheader)) {
newheader->last_used =
rbtdb->last_used + 1;
ISC_LIST_APPEND(rbtdb->lru[idx],
newheader, link);
} else {
@@ -3384,7 +3380,7 @@ dns__rbtdb_addrdataset(dns_db_t *db, dns_dbnode_t *node,
}
if (cache_is_overmem) {
dns__cacherbt_overmem(rbtdb, newheader,
dns__cacherbt_overmem(rbtdb, newheader, rbtnode->locknum,
&tlocktype DNS__DB_FLARG_PASS);
}

View File

@@ -135,17 +135,6 @@ struct dns_rbtdb {
*/
dns_slabheaderlist_t *lru;
/*
* Start point % node_lock_count for next LRU cleanup.
*/
atomic_uint lru_sweep;
/*
* When performing LRU cleaning limit cleaning to headers that were
* last used at or before this.
*/
_Atomic(isc_stdtime_t) last_used;
/*%
* Temporary storage for stale cache nodes and dynamically deleted
* nodes that await being cleaned up.
@@ -475,6 +464,7 @@ dns__cacherbt_expireheader(dns_slabheader_t *header,
dns_expire_t reason DNS__DB_FLARG);
void
dns__cacherbt_overmem(dns_rbtdb_t *rbtdb, dns_slabheader_t *newheader,
uint32_t locknum,
isc_rwlocktype_t *tlocktypep DNS__DB_FLARG);
ISC_LANG_ENDDECLS

View File

@@ -64,6 +64,7 @@ libisc_la_HEADERS = \
include/isc/pause.h \
include/isc/portset.h \
include/isc/proxy2.h \
include/isc/queue.h \
include/isc/quota.h \
include/isc/radix.h \
include/isc/random.h \

103
lib/isc/include/isc/queue.h Normal file
View File

@@ -0,0 +1,103 @@
/*
* Copyright (C) Internet Systems Consortium, Inc. ("ISC")
*
* SPDX-License-Identifier: MPL-2.0
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, you can obtain one at https://mozilla.org/MPL/2.0/.
*
* See the COPYRIGHT file distributed with this work for additional
* information regarding copyright ownership.
*/
#include <isc/os.h>
#include <isc/urcu.h>
STATIC_ASSERT(sizeof(struct __cds_wfcq_head) <= ISC_OS_CACHELINE_SIZE,
"size of struct __cds_wfcq_head must be smaller than "
"ISC_OS_CACHELINE_SIZE");
typedef struct isc_queue {
struct __cds_wfcq_head head;
uint8_t __padding[ISC_OS_CACHELINE_SIZE -
sizeof(struct __cds_wfcq_head)];
struct cds_wfcq_tail tail;
} isc_queue_t;
typedef struct cds_wfcq_node isc_queue_node_t;
static inline void
isc_queue_node_init(isc_queue_node_t *node) {
cds_wfcq_node_init(node);
}
static inline void
isc_queue_init(isc_queue_t *queue) {
__cds_wfcq_init(&(queue)->head, &(queue)->tail);
}
static inline void
isc_queue_destroy(isc_queue_t *queue) {
UNUSED(queue);
}
static inline bool
isc_queue_empty(isc_queue_t *queue) {
return (cds_wfcq_empty(&(queue)->head, &(queue)->tail));
}
static inline bool
isc_queue_enqueue(isc_queue_t *queue, isc_queue_node_t *node) {
return (cds_wfcq_enqueue(&(queue)->head, &(queue)->tail, node));
}
#define isc_queue_enqueue_entry(queue, entry, member) \
cds_wfcq_enqueue(&(queue)->head, &(queue)->tail, &((entry)->member))
static inline isc_queue_node_t *
isc_queue_dequeue(isc_queue_t *queue) {
return (__cds_wfcq_dequeue_nonblocking(&(queue)->head, &(queue)->tail));
}
#define isc_queue_entry(ptr, type, member) \
caa_container_of_check_null(ptr, type, member)
#define isc_queue_dequeue_entry(queue, type, member) \
isc_queue_entry(isc_queue_dequeue(queue), type, member)
static inline bool
isc_queue_splice(isc_queue_t *dest, isc_queue_t *src) {
enum cds_wfcq_ret ret = __cds_wfcq_splice_blocking(
&dest->head, &dest->tail, &src->head, &src->tail);
INSIST(ret != CDS_WFCQ_RET_WOULDBLOCK &&
ret != CDS_WFCQ_RET_DEST_NON_EMPTY);
return (ret != CDS_WFCQ_RET_SRC_EMPTY);
}
#define isc_queue_first_entry(queue, type, member) \
isc_queue_entry( \
__cds_wfcq_first_blocking(&(queue)->head, &(queue)->tail), \
type, member)
#define isc_queue_next_entry(queue, node, type, member) \
isc_queue_entry(__cds_wfcq_next_blocking(&(queue)->head, \
&(queue)->tail, node), \
type, member)
#define isc_queue_for_each_entry(queue, pos, member) \
for (pos = isc_queue_first_entry(queue, __typeof__(*pos), member); \
pos != NULL; \
pos = isc_queue_next_entry(queue, &(pos)->member, \
__typeof__(*pos), member))
#define isc_queue_for_each_entry_safe(queue, pos, next, member) \
for (pos = isc_queue_first_entry(queue, __typeof__(*pos), member), \
next = (pos ? isc_queue_next_entry(queue, &(pos)->member, \
__typeof__(*pos), member) \
: NULL); \
pos != NULL; pos = next, \
next = (pos ? isc_queue_next_entry(queue, &(pos)->member, \
__typeof__(*pos), member) \
: NULL))

View File

@@ -39,13 +39,11 @@
*/
/* test multiple calls to dns_db_getoriginnode */
ISC_RUN_TEST_IMPL(getoriginnode) {
ISC_LOOP_TEST_IMPL(getoriginnode) {
dns_db_t *db = NULL;
dns_dbnode_t *node = NULL;
isc_result_t result;
UNUSED(state);
result = dns_db_create(mctx, ZONEDB_DEFAULT, dns_rootname,
dns_dbtype_zone, dns_rdataclass_in, 0, NULL,
&db);
@@ -60,16 +58,15 @@ ISC_RUN_TEST_IMPL(getoriginnode) {
dns_db_detachnode(db, &node);
dns_db_detach(&db);
isc_loopmgr_shutdown(loopmgr);
}
/* test getservestalettl and setservestalettl */
ISC_RUN_TEST_IMPL(getsetservestalettl) {
ISC_LOOP_TEST_IMPL(getsetservestalettl) {
dns_db_t *db = NULL;
isc_result_t result;
dns_ttl_t ttl;
UNUSED(state);
result = dns_db_create(mctx, CACHEDB_DEFAULT, dns_rootname,
dns_dbtype_cache, dns_rdataclass_in, 0, NULL,
&db);
@@ -90,10 +87,11 @@ ISC_RUN_TEST_IMPL(getsetservestalettl) {
assert_int_equal(ttl, 6 * 3600);
dns_db_detach(&db);
isc_loopmgr_shutdown(loopmgr);
}
/* check DNS_DBFIND_STALEOK works */
ISC_RUN_TEST_IMPL(dns_dbfind_staleok) {
ISC_LOOP_TEST_IMPL(dns_dbfind_staleok) {
dns_db_t *db = NULL;
dns_dbnode_t *node = NULL;
dns_fixedname_t example_fixed;
@@ -107,8 +105,6 @@ ISC_RUN_TEST_IMPL(dns_dbfind_staleok) {
isc_result_t result;
unsigned char data[] = { 0x0a, 0x00, 0x00, 0x01 };
UNUSED(state);
result = dns_db_create(mctx, CACHEDB_DEFAULT, dns_rootname,
dns_dbtype_cache, dns_rdataclass_in, 0, NULL,
&db);
@@ -243,15 +239,14 @@ ISC_RUN_TEST_IMPL(dns_dbfind_staleok) {
}
dns_db_detach(&db);
isc_loopmgr_shutdown(loopmgr);
}
/* database class */
ISC_RUN_TEST_IMPL(class) {
ISC_LOOP_TEST_IMPL(class) {
isc_result_t result;
dns_db_t *db = NULL;
UNUSED(state);
result = dns_db_create(mctx, ZONEDB_DEFAULT, dns_rootname,
dns_dbtype_zone, dns_rdataclass_in, 0, NULL,
&db);
@@ -264,15 +259,14 @@ ISC_RUN_TEST_IMPL(class) {
assert_int_equal(dns_db_class(db), dns_rdataclass_in);
dns_db_detach(&db);
isc_loopmgr_shutdown(loopmgr);
}
/* database type */
ISC_RUN_TEST_IMPL(dbtype) {
ISC_LOOP_TEST_IMPL(dbtype) {
isc_result_t result;
dns_db_t *db = NULL;
UNUSED(state);
/* DB has zone semantics */
result = dns_db_create(mctx, ZONEDB_DEFAULT, dns_rootname,
dns_dbtype_zone, dns_rdataclass_in, 0, NULL,
@@ -293,10 +287,11 @@ ISC_RUN_TEST_IMPL(dbtype) {
assert_true(dns_db_iscache(db));
assert_false(dns_db_iszone(db));
dns_db_detach(&db);
isc_loopmgr_shutdown(loopmgr);
}
/* database versions */
ISC_RUN_TEST_IMPL(version) {
ISC_LOOP_TEST_IMPL(version) {
isc_result_t result;
dns_fixedname_t fname, ffound;
dns_name_t *name, *foundname;
@@ -305,8 +300,6 @@ ISC_RUN_TEST_IMPL(version) {
dns_dbnode_t *node = NULL;
dns_rdataset_t rdataset;
UNUSED(state);
result = dns_test_loaddb(&db, dns_dbtype_zone, "test.test",
TESTS_DIR "/testdata/db/data.db");
assert_int_equal(result, ISC_R_SUCCESS);
@@ -360,15 +353,16 @@ ISC_RUN_TEST_IMPL(version) {
dns_db_closeversion(db, &ver, false);
dns_db_detach(&db);
isc_loopmgr_shutdown(loopmgr);
}
ISC_TEST_LIST_START
ISC_TEST_ENTRY(getoriginnode)
ISC_TEST_ENTRY(getsetservestalettl)
ISC_TEST_ENTRY(dns_dbfind_staleok)
ISC_TEST_ENTRY(class)
ISC_TEST_ENTRY(dbtype)
ISC_TEST_ENTRY(version)
ISC_TEST_ENTRY_CUSTOM(getoriginnode, setup_managers, teardown_managers)
ISC_TEST_ENTRY_CUSTOM(getsetservestalettl, setup_managers, teardown_managers)
ISC_TEST_ENTRY_CUSTOM(dns_dbfind_staleok, setup_managers, teardown_managers)
ISC_TEST_ENTRY_CUSTOM(class, setup_managers, teardown_managers)
ISC_TEST_ENTRY_CUSTOM(dbtype, setup_managers, teardown_managers)
ISC_TEST_ENTRY_CUSTOM(version, setup_managers, teardown_managers)
ISC_TEST_LIST_END
ISC_TEST_MAIN

View File

@@ -108,7 +108,7 @@ overmempurge_addrdataset(dns_db_t *db, isc_stdtime_t now, int idx,
dns_db_detachnode(db, &node);
}
ISC_RUN_TEST_IMPL(overmempurge_bigrdata) {
ISC_LOOP_TEST_IMPL(overmempurge_bigrdata) {
size_t maxcache = 2097152U; /* 2MB - same as DNS_CACHE_MINSIZE */
size_t hiwater = maxcache - (maxcache >> 3); /* borrowed from cache.c */
size_t lowater = maxcache - (maxcache >> 2); /* ditto */
@@ -151,9 +151,10 @@ ISC_RUN_TEST_IMPL(overmempurge_bigrdata) {
dns_db_detach(&db);
isc_mem_destroy(&mctx2);
isc_loopmgr_shutdown(loopmgr);
}
ISC_RUN_TEST_IMPL(overmempurge_longname) {
ISC_LOOP_TEST_IMPL(overmempurge_longname) {
size_t maxcache = 2097152U; /* 2MB - same as DNS_CACHE_MINSIZE */
size_t hiwater = maxcache - (maxcache >> 3); /* borrowed from cache.c */
size_t lowater = maxcache - (maxcache >> 2); /* ditto */
@@ -196,11 +197,12 @@ ISC_RUN_TEST_IMPL(overmempurge_longname) {
dns_db_detach(&db);
isc_mem_destroy(&mctx2);
isc_loopmgr_shutdown(loopmgr);
}
ISC_TEST_LIST_START
ISC_TEST_ENTRY(overmempurge_bigrdata)
ISC_TEST_ENTRY(overmempurge_longname)
ISC_TEST_ENTRY_CUSTOM(overmempurge_bigrdata, setup_managers, teardown_managers)
ISC_TEST_ENTRY_CUSTOM(overmempurge_longname, setup_managers, teardown_managers)
ISC_TEST_LIST_END
ISC_TEST_MAIN