Compare commits

...

3 Commits

Author SHA1 Message Date
Ondřej Surý
2db76506d2 fixup! Pick random leaf from RBT to clean in the overmem cleaning 2024-05-22 23:59:53 +02:00
Ondřej Surý
67da99a549 Remove header->last_update field, and rest of the LRU 2024-05-22 23:59:39 +02:00
Ondřej Surý
d20771cb7a Pick random leaf from RBT to clean in the overmem cleaning 2024-05-22 23:54:31 +02:00
3 changed files with 70 additions and 320 deletions

View File

@@ -370,6 +370,9 @@ dns_rbt_addnode(dns_rbt_t *rbt, const dns_name_t *name, dns_rbtnode_t **nodep);
*\li #ISC_R_NOMEMORY Resource Limit: Out of Memory
*/
dns_rbtnode_t *
dns_rbt_findrandomleaf(dns_rbt_t *rbt);
isc_result_t
dns_rbt_findname(dns_rbt_t *rbt, const dns_name_t *name, unsigned int options,
dns_name_t *foundname, void **data);

View File

@@ -23,6 +23,7 @@
#include <isc/mem.h>
#include <isc/once.h>
#include <isc/print.h>
#include <isc/random.h>
#include <isc/refcount.h>
#include <isc/stdio.h>
#include <isc/string.h>
@@ -3046,6 +3047,33 @@ dns_rbtnodechain_next(dns_rbtnodechain_t *chain, dns_name_t *name,
return (result);
}
dns_rbtnode_t *
dns_rbt_findrandomleaf(dns_rbt_t *rbt) {
REQUIRE(VALID_RBT(rbt));
dns_rbtnode_t *current = NULL;
uint32_t hashval = isc_random32();
do {
uint8_t hindex = rbt->hindex;
uint32_t hash = hash_32(hashval, rbt->hashbits[hindex]);
current = rbt->hashtable[hindex][hash];
if (current == NULL && rehashing_in_progress(rbt)) {
hindex = RBT_HASH_NEXTTABLE(rbt->hindex);
current = rbt->hashtable[hindex][hash];
}
hashval++;
} while (current == NULL);
while (current->down != NULL) {
current = current->down;
}
return (current);
}
isc_result_t
dns_rbtnodechain_first(dns_rbtnodechain_t *chain, dns_rbt_t *rbt,
dns_name_t *name, dns_name_t *origin)

View File

@@ -204,7 +204,6 @@ typedef struct rdatasetheader {
*/
dns_rbtnode_t *node;
isc_stdtime_t last_used;
ISC_LINK(struct rdatasetheader) link;
unsigned int heap_index;
@@ -483,24 +482,6 @@ struct dns_rbtdb {
*/
uint32_t serve_stale_refresh;
/*
* This is a linked list used to implement the LRU cache. There will
* be node_lock_count linked lists here. Nodes in bucket 1 will be
* placed on the linked list rdatasets[1].
*/
rdatasetheaderlist_t *rdatasets;
/*
* Start point % node_lock_count for next LRU cleanup.
*/
atomic_uint lru_sweep;
/*
* When performing LRU cleaning limit cleaning to headers that were
* last used at or before this.
*/
atomic_uint last_used;
/*%
* Temporary storage for stale cache nodes and dynamically deleted
* nodes that await being cleaned up.
@@ -577,10 +558,6 @@ rdataset_getnoqname(dns_rdataset_t *rdataset, dns_name_t *name,
static isc_result_t
rdataset_getclosest(dns_rdataset_t *rdataset, dns_name_t *name,
dns_rdataset_t *neg, dns_rdataset_t *negsig);
static bool
need_headerupdate(rdatasetheader_t *header, isc_stdtime_t now);
static void
update_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header, isc_stdtime_t now);
static void
expire_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header, bool tree_locked,
expire_t reason);
@@ -1142,17 +1119,6 @@ free_rbtdb(dns_rbtdb_t *rbtdb, bool log, isc_event_t *event) {
NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
}
/*
* Clean up LRU / re-signing order lists.
*/
if (rbtdb->rdatasets != NULL) {
for (i = 0; i < rbtdb->node_lock_count; i++) {
INSIST(ISC_LIST_EMPTY(rbtdb->rdatasets[i]));
}
isc_mem_put(rbtdb->common.mctx, rbtdb->rdatasets,
rbtdb->node_lock_count *
sizeof(rdatasetheaderlist_t));
}
/*
* Clean up dead node buckets.
*/
@@ -1483,10 +1449,6 @@ free_rdataset(dns_rbtdb_t *rbtdb, isc_mem_t *mctx, rdatasetheader_t *rdataset) {
atomic_load_acquire(&rdataset->attributes), false);
idx = rdataset->node->locknum;
if (ISC_LINK_LINKED(rdataset, link)) {
INSIST(IS_CACHE(rbtdb));
ISC_LIST_UNLINK(rbtdb->rdatasets[idx], rdataset, link);
}
if (rdataset->heap_index != 0) {
isc_heap_delete(rbtdb->heaps[idx], rdataset->heap_index);
@@ -4840,27 +4802,6 @@ find_deepest_zonecut(rbtdb_search_t *search, dns_rbtnode_t *node,
search->now, locktype,
sigrdataset);
}
if (need_headerupdate(found, search->now) ||
(foundsig != NULL &&
need_headerupdate(foundsig, search->now)))
{
if (locktype != isc_rwlocktype_write) {
NODE_UNLOCK(lock, locktype);
NODE_LOCK(lock, isc_rwlocktype_write);
locktype = isc_rwlocktype_write;
POST(locktype);
}
if (need_headerupdate(found, search->now)) {
update_header(search->rbtdb, found,
search->now);
}
if (foundsig != NULL &&
need_headerupdate(foundsig, search->now))
{
update_header(search->rbtdb, foundsig,
search->now);
}
}
}
node_exit:
@@ -5013,7 +4954,6 @@ cache_find(dns_db_t *db, const dns_name_t *name, dns_dbversion_t *version,
rdatasetheader_t *header, *header_prev, *header_next;
rdatasetheader_t *found, *nsheader;
rdatasetheader_t *foundsig, *nssig, *cnamesig;
rdatasetheader_t *update, *updatesig;
rdatasetheader_t *nsecheader, *nsecsig;
rbtdb_rdatatype_t sigtype, negtype;
@@ -5040,8 +4980,6 @@ cache_find(dns_db_t *db, const dns_name_t *name, dns_dbversion_t *version,
dns_fixedname_init(&search.zonecut_name);
dns_rbtnodechain_init(&search.chain);
search.now = now;
update = NULL;
updatesig = NULL;
RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
@@ -5249,16 +5187,10 @@ cache_find(dns_db_t *db, const dns_name_t *name, dns_dbversion_t *version,
}
bind_rdataset(search.rbtdb, node, nsecheader,
search.now, locktype, rdataset);
if (need_headerupdate(nsecheader, search.now)) {
update = nsecheader;
}
if (nsecsig != NULL) {
bind_rdataset(search.rbtdb, node, nsecsig,
search.now, locktype,
sigrdataset);
if (need_headerupdate(nsecsig, search.now)) {
updatesig = nsecsig;
}
}
result = DNS_R_COVERINGNSEC;
goto node_exit;
@@ -5291,16 +5223,10 @@ cache_find(dns_db_t *db, const dns_name_t *name, dns_dbversion_t *version,
}
bind_rdataset(search.rbtdb, node, nsheader, search.now,
locktype, rdataset);
if (need_headerupdate(nsheader, search.now)) {
update = nsheader;
}
if (nssig != NULL) {
bind_rdataset(search.rbtdb, node, nssig,
search.now, locktype,
sigrdataset);
if (need_headerupdate(nssig, search.now)) {
updatesig = nssig;
}
}
result = DNS_R_DELEGATION;
goto node_exit;
@@ -5352,33 +5278,13 @@ cache_find(dns_db_t *db, const dns_name_t *name, dns_dbversion_t *version,
{
bind_rdataset(search.rbtdb, node, found, search.now, locktype,
rdataset);
if (need_headerupdate(found, search.now)) {
update = found;
}
if (!NEGATIVE(found) && foundsig != NULL) {
bind_rdataset(search.rbtdb, node, foundsig, search.now,
locktype, sigrdataset);
if (need_headerupdate(foundsig, search.now)) {
updatesig = foundsig;
}
}
}
node_exit:
if ((update != NULL || updatesig != NULL) &&
locktype != isc_rwlocktype_write)
{
NODE_UNLOCK(lock, locktype);
NODE_LOCK(lock, isc_rwlocktype_write);
locktype = isc_rwlocktype_write;
POST(locktype);
}
if (update != NULL && need_headerupdate(update, search.now)) {
update_header(search.rbtdb, update, search.now);
}
if (updatesig != NULL && need_headerupdate(updatesig, search.now)) {
update_header(search.rbtdb, updatesig, search.now);
}
NODE_UNLOCK(lock, locktype);
@@ -5545,24 +5451,6 @@ cache_findzonecut(dns_db_t *db, const dns_name_t *name, unsigned int options,
locktype, sigrdataset);
}
if (need_headerupdate(found, search.now) ||
(foundsig != NULL && need_headerupdate(foundsig, search.now)))
{
if (locktype != isc_rwlocktype_write) {
NODE_UNLOCK(lock, locktype);
NODE_LOCK(lock, isc_rwlocktype_write);
locktype = isc_rwlocktype_write;
POST(locktype);
}
if (need_headerupdate(found, search.now)) {
update_header(search.rbtdb, found, search.now);
}
if (foundsig != NULL && need_headerupdate(foundsig, search.now))
{
update_header(search.rbtdb, foundsig, search.now);
}
}
NODE_UNLOCK(lock, locktype);
tree_exit:
@@ -6540,9 +6428,6 @@ find_header:
if (header->rdh_ttl > newheader->rdh_ttl) {
set_ttl(rbtdb, header, newheader->rdh_ttl);
}
if (header->last_used != now) {
update_header(rbtdb, header, now);
}
if (header->noqname == NULL &&
newheader->noqname != NULL)
{
@@ -6595,9 +6480,6 @@ find_header:
if (header->rdh_ttl > newheader->rdh_ttl) {
set_ttl(rbtdb, header, newheader->rdh_ttl);
}
if (header->last_used != now) {
update_header(rbtdb, header, now);
}
if (header->noqname == NULL &&
newheader->noqname != NULL)
{
@@ -6624,16 +6506,6 @@ find_header:
newheader->down = NULL;
idx = newheader->node->locknum;
if (IS_CACHE(rbtdb)) {
if (ZEROTTL(newheader)) {
newheader->last_used =
atomic_load(&rbtdb->last_used) +
1;
ISC_LIST_APPEND(rbtdb->rdatasets[idx],
newheader, link);
} else {
ISC_LIST_PREPEND(rbtdb->rdatasets[idx],
newheader, link);
}
INSIST(rbtdb->heaps != NULL);
isc_heap_insert(rbtdb->heaps[idx], newheader);
} else if (RESIGN(newheader)) {
@@ -6668,16 +6540,6 @@ find_header:
if (IS_CACHE(rbtdb)) {
INSIST(rbtdb->heaps != NULL);
isc_heap_insert(rbtdb->heaps[idx], newheader);
if (ZEROTTL(newheader)) {
newheader->last_used =
atomic_load(&rbtdb->last_used) +
1;
ISC_LIST_APPEND(rbtdb->rdatasets[idx],
newheader, link);
} else {
ISC_LIST_PREPEND(rbtdb->rdatasets[idx],
newheader, link);
}
} else if (RESIGN(newheader)) {
resign_insert(rbtdb, idx, newheader);
resign_delete(rbtdb, rbtversion, header);
@@ -6725,13 +6587,6 @@ find_header:
idx = newheader->node->locknum;
if (IS_CACHE(rbtdb)) {
isc_heap_insert(rbtdb->heaps[idx], newheader);
if (ZEROTTL(newheader)) {
ISC_LIST_APPEND(rbtdb->rdatasets[idx],
newheader, link);
} else {
ISC_LIST_PREPEND(rbtdb->rdatasets[idx],
newheader, link);
}
} else if (RESIGN(newheader)) {
resign_insert(rbtdb, idx, newheader);
resign_delete(rbtdb, rbtversion, header);
@@ -7001,7 +6856,6 @@ addrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
atomic_init(&newheader->count,
atomic_fetch_add_relaxed(&init_count, 1));
newheader->trust = rdataset->trust;
newheader->last_used = now;
newheader->node = rbtnode;
if (rbtversion != NULL) {
newheader->serial = rbtversion->serial;
@@ -7204,7 +7058,6 @@ subtractrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
newheader->closest = NULL;
atomic_init(&newheader->count,
atomic_fetch_add_relaxed(&init_count, 1));
newheader->last_used = 0;
newheader->node = rbtnode;
if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) {
RDATASET_ATTR_SET(newheader, RDATASET_ATTR_RESIGN);
@@ -7315,7 +7168,6 @@ subtractrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
newheader->node = rbtnode;
newheader->resign = 0;
newheader->resign_lsb = 0;
newheader->last_used = 0;
} else {
free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
goto unlock;
@@ -7409,7 +7261,6 @@ deleterdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
newheader->serial = 0;
}
atomic_init(&newheader->count, 0);
newheader->last_used = 0;
newheader->node = rbtnode;
nodefullname(db, node, nodename);
@@ -7587,7 +7438,6 @@ loading_addrdataset(void *arg, const dns_name_t *name,
newheader->closest = NULL;
atomic_init(&newheader->count,
atomic_fetch_add_relaxed(&init_count, 1));
newheader->last_used = 0;
newheader->node = node;
setownercase(newheader, name);
@@ -8358,14 +8208,6 @@ dns_rbtdb_create(isc_mem_t *mctx, const dns_name_t *origin, dns_dbtype_t type,
if (result != ISC_R_SUCCESS) {
goto cleanup_node_locks;
}
rbtdb->rdatasets = isc_mem_get(
mctx,
rbtdb->node_lock_count * sizeof(rdatasetheaderlist_t));
for (i = 0; i < (int)rbtdb->node_lock_count; i++) {
ISC_LIST_INIT(rbtdb->rdatasets[i]);
}
} else {
rbtdb->rdatasets = NULL;
}
/*
@@ -10256,168 +10098,6 @@ no_glue:
/* UNREACHABLE */
}
/*%
* Routines for LRU-based cache management.
*/
/*%
* See if a given cache entry that is being reused needs to be updated
* in the LRU-list. From the LRU management point of view, this function is
* expected to return true for almost all cases. When used with threads,
* however, this may cause a non-negligible performance penalty because a
* writer lock will have to be acquired before updating the list.
* If DNS_RBTDB_LIMITLRUUPDATE is defined to be non 0 at compilation time, this
* function returns true if the entry has not been updated for some period of
* time. We differentiate the NS or glue address case and the others since
* experiments have shown that the former tends to be accessed relatively
* infrequently and the cost of cache miss is higher (e.g., a missing NS records
* may cause external queries at a higher level zone, involving more
* transactions).
*
* Caller must hold the node (read or write) lock.
*/
static bool
need_headerupdate(rdatasetheader_t *header, isc_stdtime_t now) {
if (RDATASET_ATTR_GET(header, (RDATASET_ATTR_NONEXISTENT |
RDATASET_ATTR_ANCIENT |
RDATASET_ATTR_ZEROTTL)) != 0)
{
return (false);
}
#if DNS_RBTDB_LIMITLRUUPDATE
if (header->type == dns_rdatatype_ns ||
(header->trust == dns_trust_glue &&
(header->type == dns_rdatatype_a ||
header->type == dns_rdatatype_aaaa)))
{
/*
* Glue records are updated if at least DNS_RBTDB_LRUUPDATE_GLUE
* seconds have passed since the previous update time.
*/
return (header->last_used + DNS_RBTDB_LRUUPDATE_GLUE <= now);
}
/*
* Other records are updated if DNS_RBTDB_LRUUPDATE_REGULAR seconds
* have passed.
*/
return (header->last_used + DNS_RBTDB_LRUUPDATE_REGULAR <= now);
#else
UNUSED(now);
return (true);
#endif /* if DNS_RBTDB_LIMITLRUUPDATE */
}
/*%
* Update the timestamp of a given cache entry and move it to the head
* of the corresponding LRU list.
*
* Caller must hold the node (write) lock.
*
* Note that the we do NOT touch the heap here, as the TTL has not changed.
*/
static void
update_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header, isc_stdtime_t now) {
INSIST(IS_CACHE(rbtdb));
/* To be checked: can we really assume this? XXXMLG */
INSIST(ISC_LINK_LINKED(header, link));
ISC_LIST_UNLINK(rbtdb->rdatasets[header->node->locknum], header, link);
header->last_used = now;
ISC_LIST_PREPEND(rbtdb->rdatasets[header->node->locknum], header, link);
}
static size_t
expire_lru_headers(dns_rbtdb_t *rbtdb, unsigned int locknum, size_t purgesize,
bool tree_locked) {
rdatasetheader_t *header;
size_t purged = 0;
for (header = ISC_LIST_TAIL(rbtdb->rdatasets[locknum]);
header != NULL &&
header->last_used <= atomic_load(&rbtdb->last_used) &&
purged <= purgesize;
header = ISC_LIST_TAIL(rbtdb->rdatasets[locknum]))
{
/*
* Unlink the entry at this point to avoid checking it
* again even if it's currently used someone else and
* cannot be purged at this moment. This entry won't be
* referenced any more (so unlinking is safe) since the
* TTL will be reset to 0.
*/
ISC_LIST_UNLINK(rbtdb->rdatasets[locknum], header, link);
size_t header_size = rdataset_size(header);
expire_header(rbtdb, header, tree_locked, expire_lru);
purged += header_size;
}
return (purged);
}
/*%
* Purge some stale (i.e. unused for some period - LRU based cleaning) cache
* entries under the overmem condition. To recover from this condition quickly,
* we cleanup entries up to the size of newly added rdata (passed as purgesize).
*
* The LRU lists tails are processed in LRU order to the nearest second.
*
* A write lock on the tree must be held.
*/
static void
overmem_purge(dns_rbtdb_t *rbtdb, rdatasetheader_t *newheader,
bool tree_locked) {
uint32_t locknum_start = atomic_fetch_add(&rbtdb->lru_sweep, 1) %
rbtdb->node_lock_count;
uint32_t locknum = locknum_start;
/* Size of added data, possible node and possible ENT node. */
size_t purgesize = rdataset_size(newheader) +
2 * dns__rbtnode_getsize(newheader->node);
size_t purged = 0;
isc_stdtime_t min_last_used = 0;
size_t max_passes = 8;
again:
do {
NODE_LOCK(&rbtdb->node_locks[locknum].lock,
isc_rwlocktype_write);
purged += expire_lru_headers(rbtdb, locknum, purgesize - purged,
tree_locked);
/*
* Work out the oldest remaining last_used values of the list
* tails as we walk across the array of lru lists.
*/
rdatasetheader_t *header =
ISC_LIST_TAIL(rbtdb->rdatasets[locknum]);
if (header != NULL &&
(min_last_used == 0 || header->last_used < min_last_used))
{
min_last_used = header->last_used;
}
NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
isc_rwlocktype_write);
locknum = (locknum + 1) % rbtdb->node_lock_count;
} while (locknum != locknum_start && purged <= purgesize);
/*
* Update rbtdb->last_used if we have walked all the list tails and have
* not freed the required amount of memory.
*/
if (purged < purgesize) {
if (min_last_used != 0) {
atomic_store(&rbtdb->last_used, min_last_used);
if (max_passes-- > 0) {
goto again;
}
}
}
}
static void
expire_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header, bool tree_locked,
expire_t reason) {
@@ -10460,6 +10140,45 @@ expire_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header, bool tree_locked,
}
}
/*%
* Purge some random cache entries under the overmem condition. To recover from
* this condition quickly, we cleanup entries up to the size of newly added
* rdata (passed as purgesize).
*
* A write lock on the tree must be held.
*/
static void
overmem_purge(dns_rbtdb_t *rbtdb, rdatasetheader_t *newheader,
bool tree_locked) {
/* Size of added data, possible node and possible ENT node. */
size_t purgesize = rdataset_size(newheader) +
2 * dns__rbtnode_getsize(newheader->node);
size_t purged = 0;
do {
dns_rbtnode_t *node = dns_rbt_findrandomleaf(rbtdb->tree);
uint32_t locknum = node->locknum;
rdatasetheader_t *header = NULL;
rdatasetheader_t *header_next = NULL;
NODE_LOCK(&rbtdb->node_locks[locknum].lock,
isc_rwlocktype_write);
for (header = node->data,
header_next = (header != NULL) ? header->next : NULL;
header != NULL; header = header_next,
header_next = (header != NULL) ? header->next : NULL)
{
size_t header_size = rdataset_size(header);
expire_header(rbtdb, header, tree_locked, expire_lru);
purged += header_size;
}
NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
isc_rwlocktype_write);
} while (purged <= purgesize);
}
/*
* Caller must be holding the node write lock.
*/