From 5a85135c1e18c44c4c3dd654f75c132f9c98cb4e Mon Sep 17 00:00:00 2001 From: Evan Hunt Date: Fri, 12 May 2023 23:01:01 -0700 Subject: [PATCH] split out cache-specific functions move cache-specific functions from rbtdb.c to rbt-cachedb.c. --- lib/dns/Makefile.am | 1 + lib/dns/rbt-cachedb.c | 1693 +++++++++++++++++++++++++++++++++++++++ lib/dns/rbt-zonedb.c | 98 +-- lib/dns/rbtdb.c | 1776 +++-------------------------------------- lib/dns/rbtdb_p.h | 168 +++- 5 files changed, 1950 insertions(+), 1786 deletions(-) create mode 100644 lib/dns/rbt-cachedb.c diff --git a/lib/dns/Makefile.am b/lib/dns/Makefile.am index f3cc99cd0a..9e0eeeb9e1 100644 --- a/lib/dns/Makefile.am +++ b/lib/dns/Makefile.am @@ -211,6 +211,7 @@ libdns_la_SOURCES = \ qp.c \ qp_p.h \ rbt.c \ + rbt-cachedb.c \ rbt-zonedb.c \ rbtdb_p.h \ rbtdb.c \ diff --git a/lib/dns/rbt-cachedb.c b/lib/dns/rbt-cachedb.c new file mode 100644 index 0000000000..d57eb2a6cc --- /dev/null +++ b/lib/dns/rbt-cachedb.c @@ -0,0 +1,1693 @@ +/* + * Copyright (C) Internet Systems Consortium, Inc. ("ISC") + * + * SPDX-License-Identifier: MPL-2.0 + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, you can obtain one at https://mozilla.org/MPL/2.0/. + * + * See the COPYRIGHT file distributed with this work for additional + * information regarding copyright ownership. + */ + +/*! \file */ + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "rbtdb_p.h" + +#define CHECK(op) \ + do { \ + result = (op); \ + if (result != ISC_R_SUCCESS) \ + goto failure; \ + } while (0) + +/*% + * Whether to rate-limit updating the LRU to avoid possible thread contention. + * Updating LRU requires write locking, so we don't do it every time the + * record is touched - only after some time passes. + */ +#ifndef DNS_RBTDB_LIMITLRUUPDATE +#define DNS_RBTDB_LIMITLRUUPDATE 1 +#endif + +/*% Time after which we update LRU for glue records, 5 minutes */ +#define DNS_RBTDB_LRUUPDATE_GLUE 300 +/*% Time after which we update LRU for all other records, 10 minutes */ +#define DNS_RBTDB_LRUUPDATE_REGULAR 600 + +#define EXISTS(header) \ + ((atomic_load_acquire(&(header)->attributes) & \ + DNS_SLABHEADERATTR_NONEXISTENT) == 0) +#define NONEXISTENT(header) \ + ((atomic_load_acquire(&(header)->attributes) & \ + DNS_SLABHEADERATTR_NONEXISTENT) != 0) +#define NXDOMAIN(header) \ + ((atomic_load_acquire(&(header)->attributes) & \ + DNS_SLABHEADERATTR_NXDOMAIN) != 0) +#define STALE(header) \ + ((atomic_load_acquire(&(header)->attributes) & \ + DNS_SLABHEADERATTR_STALE) != 0) +#define NEGATIVE(header) \ + ((atomic_load_acquire(&(header)->attributes) & \ + DNS_SLABHEADERATTR_NEGATIVE) != 0) +#define ZEROTTL(header) \ + ((atomic_load_acquire(&(header)->attributes) & \ + DNS_SLABHEADERATTR_ZEROTTL) != 0) +#define ANCIENT(header) \ + ((atomic_load_acquire(&(header)->attributes) & \ + DNS_SLABHEADERATTR_ANCIENT) != 0) +#define STATCOUNT(header) \ + ((atomic_load_acquire(&(header)->attributes) & \ + DNS_SLABHEADERATTR_STATCOUNT) != 0) + +#define STALE_TTL(header, rbtdb) \ + (NXDOMAIN(header) ? 0 : rbtdb->common.serve_stale_ttl) + +#define ACTIVE(header, now) \ + (((header)->ttl > (now)) || ((header)->ttl == (now) && ZEROTTL(header))) + +#define KEEPSTALE(rbtdb) ((rbtdb)->common.serve_stale_ttl > 0) + +/*% + * Routines for LRU-based cache management. + */ + +/*% + * See if a given cache entry that is being reused needs to be updated + * in the LRU-list. From the LRU management point of view, this function is + * expected to return true for almost all cases. When used with threads, + * however, this may cause a non-negligible performance penalty because a + * writer lock will have to be acquired before updating the list. + * If DNS_RBTDB_LIMITLRUUPDATE is defined to be non 0 at compilation time, this + * function returns true if the entry has not been updated for some period of + * time. We differentiate the NS or glue address case and the others since + * experiments have shown that the former tends to be accessed relatively + * infrequently and the cost of cache miss is higher (e.g., a missing NS records + * may cause external queries at a higher level zone, involving more + * transactions). + * + * Caller must hold the node (read or write) lock. + */ +static bool +need_headerupdate(dns_slabheader_t *header, isc_stdtime_t now) { + if (DNS_SLABHEADER_GETATTR(header, (DNS_SLABHEADERATTR_NONEXISTENT | + DNS_SLABHEADERATTR_ANCIENT | + DNS_SLABHEADERATTR_ZEROTTL)) != 0) + { + return (false); + } + +#if DNS_RBTDB_LIMITLRUUPDATE + if (header->type == dns_rdatatype_ns || + (header->trust == dns_trust_glue && + (header->type == dns_rdatatype_a || + header->type == dns_rdatatype_aaaa))) + { + /* + * Glue records are updated if at least DNS_RBTDB_LRUUPDATE_GLUE + * seconds have passed since the previous update time. + */ + return (header->last_used + DNS_RBTDB_LRUUPDATE_GLUE <= now); + } + + /* + * Other records are updated if DNS_RBTDB_LRUUPDATE_REGULAR seconds + * have passed. + */ + return (header->last_used + DNS_RBTDB_LRUUPDATE_REGULAR <= now); +#else + UNUSED(now); + + return (true); +#endif /* if DNS_RBTDB_LIMITLRUUPDATE */ +} + +/*% + * Update the timestamp of a given cache entry and move it to the head + * of the corresponding LRU list. + * + * Caller must hold the node (write) lock. + * + * Note that the we do NOT touch the heap here, as the TTL has not changed. + */ +static void +update_header(dns_rbtdb_t *rbtdb, dns_slabheader_t *header, isc_stdtime_t now) { + INSIST(IS_CACHE(rbtdb)); + + /* To be checked: can we really assume this? XXXMLG */ + INSIST(ISC_LINK_LINKED(header, link)); + + ISC_LIST_UNLINK(rbtdb->lru[HEADER_NODE(header)->locknum], header, link); + header->last_used = now; + ISC_LIST_PREPEND(rbtdb->lru[HEADER_NODE(header)->locknum], header, + link); +} + +/* + * Locking + * + * If a routine is going to lock more than one lock in this module, then + * the locking must be done in the following order: + * + * Tree Lock + * + * Node Lock (Only one from the set may be locked at one time by + * any caller) + * + * Database Lock + * + * Failure to follow this hierarchy can result in deadlock. + */ + +/* + * Deleting Nodes + * + * For zone databases the node for the origin of the zone MUST NOT be deleted. + */ + +/* + * DB Routines + */ + +static void +update_cachestats(dns_rbtdb_t *rbtdb, isc_result_t result) { + INSIST(IS_CACHE(rbtdb)); + + if (rbtdb->cachestats == NULL) { + return; + } + + switch (result) { + case DNS_R_COVERINGNSEC: + isc_stats_increment(rbtdb->cachestats, + dns_cachestatscounter_coveringnsec); + FALLTHROUGH; + case ISC_R_SUCCESS: + case DNS_R_CNAME: + case DNS_R_DNAME: + case DNS_R_DELEGATION: + case DNS_R_NCACHENXDOMAIN: + case DNS_R_NCACHENXRRSET: + isc_stats_increment(rbtdb->cachestats, + dns_cachestatscounter_hits); + break; + default: + isc_stats_increment(rbtdb->cachestats, + dns_cachestatscounter_misses); + } +} + +static void +clean_stale_headers(dns_slabheader_t *top) { + dns_slabheader_t *d = NULL, *down_next = NULL; + + for (d = top->down; d != NULL; d = down_next) { + down_next = d->down; + dns_slabheader_destroy(&d); + } + top->down = NULL; +} + +static isc_result_t +setup_delegation(rbtdb_search_t *search, dns_dbnode_t **nodep, + dns_name_t *foundname, dns_rdataset_t *rdataset, + dns_rdataset_t *sigrdataset DNS__DB_FLARG) { + dns_name_t *zcname = NULL; + dns_typepair_t type; + dns_rbtnode_t *node = NULL; + + REQUIRE(search != NULL); + REQUIRE(search->zonecut != NULL); + REQUIRE(search->zonecut_header != NULL); + + /* + * The caller MUST NOT be holding any node locks. + */ + + node = search->zonecut; + type = search->zonecut_header->type; + + /* + * If we have to set foundname, we do it before anything else. + * If we were to set foundname after we had set nodep or bound the + * rdataset, then we'd have to undo that work if dns_name_copy() + * failed. By setting foundname first, there's nothing to undo if + * we have trouble. + */ + if (foundname != NULL && search->copy_name) { + zcname = dns_fixedname_name(&search->zonecut_name); + dns_name_copy(zcname, foundname); + } + if (nodep != NULL) { + /* + * Note that we don't have to increment the node's reference + * count here because we're going to use the reference we + * already have in the search block. + */ + *nodep = node; + search->need_cleanup = false; + } + if (rdataset != NULL) { + isc_rwlocktype_t nlocktype = isc_rwlocktype_none; + NODE_RDLOCK(&(search->rbtdb->node_locks[node->locknum].lock), + &nlocktype); + dns__rbtdb_bindrdataset(search->rbtdb, node, + search->zonecut_header, search->now, + isc_rwlocktype_read, + rdataset DNS__DB_FLARG_PASS); + if (sigrdataset != NULL && search->zonecut_sigheader != NULL) { + dns__rbtdb_bindrdataset( + search->rbtdb, node, search->zonecut_sigheader, + search->now, isc_rwlocktype_read, + sigrdataset DNS__DB_FLARG_PASS); + } + NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock), + &nlocktype); + } + + if (type == dns_rdatatype_dname) { + return (DNS_R_DNAME); + } + return (DNS_R_DELEGATION); +} + +static bool +check_stale_header(dns_rbtnode_t *node, dns_slabheader_t *header, + isc_rwlocktype_t *nlocktypep, isc_rwlock_t *lock, + rbtdb_search_t *search, dns_slabheader_t **header_prev) { + if (!ACTIVE(header, search->now)) { + dns_ttl_t stale = header->ttl + + STALE_TTL(header, search->rbtdb); + /* + * If this data is in the stale window keep it and if + * DNS_DBFIND_STALEOK is not set we tell the caller to + * skip this record. We skip the records with ZEROTTL + * (these records should not be cached anyway). + */ + + DNS_SLABHEADER_CLRATTR(header, DNS_SLABHEADERATTR_STALE_WINDOW); + if (!ZEROTTL(header) && KEEPSTALE(search->rbtdb) && + stale > search->now) + { + dns__rbtdb_mark(header, DNS_SLABHEADERATTR_STALE); + *header_prev = header; + /* + * If DNS_DBFIND_STALESTART is set then it means we + * failed to resolve the name during recursion, in + * this case we mark the time in which the refresh + * failed. + */ + if ((search->options & DNS_DBFIND_STALESTART) != 0) { + atomic_store_release( + &header->last_refresh_fail_ts, + search->now); + } else if ((search->options & + DNS_DBFIND_STALEENABLED) != 0 && + search->now < + (atomic_load_acquire( + &header->last_refresh_fail_ts) + + search->rbtdb->serve_stale_refresh)) + { + /* + * If we are within interval between last + * refresh failure time + 'stale-refresh-time', + * then don't skip this stale entry but use it + * instead. + */ + DNS_SLABHEADER_SETATTR( + header, + DNS_SLABHEADERATTR_STALE_WINDOW); + return (false); + } else if ((search->options & + DNS_DBFIND_STALETIMEOUT) != 0) + { + /* + * We want stale RRset due to timeout, so we + * don't skip it. + */ + return (false); + } + return ((search->options & DNS_DBFIND_STALEOK) == 0); + } + + /* + * This rdataset is stale. If no one else is using the + * node, we can clean it up right now, otherwise we mark + * it as ancient, and the node as dirty, so it will get + * cleaned up later. + */ + if ((header->ttl < search->now - RBTDB_VIRTUAL) && + (*nlocktypep == isc_rwlocktype_write || + NODE_TRYUPGRADE(lock, nlocktypep) == ISC_R_SUCCESS)) + { + /* + * We update the node's status only when we can + * get write access; otherwise, we leave others + * to this work. Periodical cleaning will + * eventually take the job as the last resort. + * We won't downgrade the lock, since other + * rdatasets are probably stale, too. + */ + + if (isc_refcount_current(&node->references) == 0) { + /* + * header->down can be non-NULL if the + * refcount has just decremented to 0 + * but dns__rbtdb_decref() has not + * performed clean_cache_node(), in + * which case we need to purge the stale + * headers first. + */ + clean_stale_headers(header); + if (*header_prev != NULL) { + (*header_prev)->next = header->next; + } else { + node->data = header->next; + } + dns_slabheader_destroy(&header); + } else { + dns__rbtdb_mark(header, + DNS_SLABHEADERATTR_ANCIENT); + HEADER_NODE(header)->dirty = 1; + *header_prev = header; + } + } else { + *header_prev = header; + } + return (true); + } + return (false); +} + +static isc_result_t +cache_zonecut_callback(dns_rbtnode_t *node, dns_name_t *name, + void *arg DNS__DB_FLARG) { + rbtdb_search_t *search = arg; + dns_slabheader_t *header = NULL; + dns_slabheader_t *header_prev = NULL, *header_next = NULL; + dns_slabheader_t *dname_header = NULL, *sigdname_header = NULL; + isc_result_t result; + isc_rwlock_t *lock = NULL; + isc_rwlocktype_t nlocktype = isc_rwlocktype_none; + + REQUIRE(search->zonecut == NULL); + + /* + * Keep compiler silent. + */ + UNUSED(name); + + lock = &(search->rbtdb->node_locks[node->locknum].lock); + NODE_RDLOCK(lock, &nlocktype); + + /* + * Look for a DNAME or RRSIG DNAME rdataset. + */ + for (header = node->data; header != NULL; header = header_next) { + header_next = header->next; + if (check_stale_header(node, header, &nlocktype, lock, search, + &header_prev)) + { + /* Do nothing. */ + } else if (header->type == dns_rdatatype_dname && + EXISTS(header) && !ANCIENT(header)) + { + dname_header = header; + header_prev = header; + } else if (header->type == RBTDB_RDATATYPE_SIGDNAME && + EXISTS(header) && !ANCIENT(header)) + { + sigdname_header = header; + header_prev = header; + } else { + header_prev = header; + } + } + + if (dname_header != NULL && + (!DNS_TRUST_PENDING(dname_header->trust) || + (search->options & DNS_DBFIND_PENDINGOK) != 0)) + { + /* + * We increment the reference count on node to ensure that + * search->zonecut_header will still be valid later. + */ + dns__rbtdb_newref(search->rbtdb, node, + nlocktype DNS__DB_FLARG_PASS); + search->zonecut = node; + search->zonecut_header = dname_header; + search->zonecut_sigheader = sigdname_header; + search->need_cleanup = true; + result = DNS_R_PARTIALMATCH; + } else { + result = DNS_R_CONTINUE; + } + + NODE_UNLOCK(lock, &nlocktype); + + return (result); +} + +static isc_result_t +find_deepest_zonecut(rbtdb_search_t *search, dns_rbtnode_t *node, + dns_dbnode_t **nodep, dns_name_t *foundname, + dns_rdataset_t *rdataset, + dns_rdataset_t *sigrdataset DNS__DB_FLARG) { + unsigned int i; + isc_result_t result = ISC_R_NOTFOUND; + dns_name_t name; + dns_rbtdb_t *rbtdb = NULL; + bool done; + + /* + * Caller must be holding the tree lock. + */ + + rbtdb = search->rbtdb; + i = search->chain.level_matches; + done = false; + do { + dns_slabheader_t *header = NULL; + dns_slabheader_t *header_prev = NULL, *header_next = NULL; + dns_slabheader_t *found = NULL, *foundsig = NULL; + isc_rwlock_t *lock = &rbtdb->node_locks[node->locknum].lock; + isc_rwlocktype_t nlocktype = isc_rwlocktype_none; + + NODE_RDLOCK(lock, &nlocktype); + + /* + * Look for NS and RRSIG NS rdatasets. + */ + for (header = node->data; header != NULL; header = header_next) + { + header_next = header->next; + if (check_stale_header(node, header, &nlocktype, lock, + search, &header_prev)) + { + /* Do nothing. */ + } else if (EXISTS(header) && !ANCIENT(header)) { + /* + * We've found an extant rdataset. See if + * we're interested in it. + */ + if (header->type == dns_rdatatype_ns) { + found = header; + if (foundsig != NULL) { + break; + } + } else if (header->type == + RBTDB_RDATATYPE_SIGNS) + { + foundsig = header; + if (found != NULL) { + break; + } + } + header_prev = header; + } else { + header_prev = header; + } + } + + if (found != NULL) { + /* + * If we have to set foundname, we do it before + * anything else. If we were to set foundname after + * we had set nodep or bound the rdataset, then we'd + * have to undo that work if dns_name_concatenate() + * failed. By setting foundname first, there's + * nothing to undo if we have trouble. + */ + if (foundname != NULL) { + dns_name_init(&name, NULL); + dns_rbt_namefromnode(node, &name); + dns_name_copy(&name, foundname); + while (i > 0) { + dns_rbtnode_t *level_node = + search->chain.levels[--i]; + dns_name_init(&name, NULL); + dns_rbt_namefromnode(level_node, &name); + result = dns_name_concatenate( + foundname, &name, foundname, + NULL); + if (result != ISC_R_SUCCESS) { + if (nodep != NULL) { + *nodep = NULL; + } + goto node_exit; + } + } + } + result = DNS_R_DELEGATION; + if (nodep != NULL) { + dns__rbtdb_newref(search->rbtdb, node, + nlocktype DNS__DB_FLARG_PASS); + *nodep = node; + } + dns__rbtdb_bindrdataset(search->rbtdb, node, found, + search->now, nlocktype, + rdataset DNS__DB_FLARG_PASS); + if (foundsig != NULL) { + dns__rbtdb_bindrdataset( + search->rbtdb, node, foundsig, + search->now, nlocktype, + sigrdataset DNS__DB_FLARG_PASS); + } + if (need_headerupdate(found, search->now) || + (foundsig != NULL && + need_headerupdate(foundsig, search->now))) + { + if (nlocktype != isc_rwlocktype_write) { + NODE_FORCEUPGRADE(lock, &nlocktype); + POST(nlocktype); + } + if (need_headerupdate(found, search->now)) { + update_header(search->rbtdb, found, + search->now); + } + if (foundsig != NULL && + need_headerupdate(foundsig, search->now)) + { + update_header(search->rbtdb, foundsig, + search->now); + } + } + } + + node_exit: + NODE_UNLOCK(lock, &nlocktype); + + if (found == NULL && i > 0) { + i--; + node = search->chain.levels[i]; + } else { + done = true; + } + } while (!done); + + return (result); +} + +/* + * Look for a potentially covering NSEC in the cache where `name` + * is known not to exist. This uses the auxiliary NSEC tree to find + * the potential NSEC owner. If found, we update 'foundname', 'nodep', + * 'rdataset' and 'sigrdataset', and return DNS_R_COVERINGNSEC. + * Otherwise, return ISC_R_NOTFOUND. + */ +static isc_result_t +find_coveringnsec(rbtdb_search_t *search, const dns_name_t *name, + dns_dbnode_t **nodep, isc_stdtime_t now, + dns_name_t *foundname, dns_rdataset_t *rdataset, + dns_rdataset_t *sigrdataset DNS__DB_FLARG) { + dns_fixedname_t fprefix, forigin, ftarget, fixed; + dns_name_t *prefix = NULL, *origin = NULL; + dns_name_t *target = NULL, *fname = NULL; + dns_rbtnode_t *node = NULL; + dns_rbtnodechain_t chain; + isc_result_t result; + isc_rwlocktype_t nlocktype = isc_rwlocktype_none; + isc_rwlock_t *lock = NULL; + dns_typepair_t matchtype, sigmatchtype; + dns_slabheader_t *found = NULL, *foundsig = NULL; + dns_slabheader_t *header = NULL; + dns_slabheader_t *header_next = NULL, *header_prev = NULL; + + /* + * Look for the node in the auxilary tree. + */ + dns_rbtnodechain_init(&chain); + target = dns_fixedname_initname(&ftarget); + result = dns_rbt_findnode(search->rbtdb->nsec, name, target, &node, + &chain, DNS_RBTFIND_EMPTYDATA, NULL, NULL); + if (result != DNS_R_PARTIALMATCH) { + dns_rbtnodechain_reset(&chain); + return (ISC_R_NOTFOUND); + } + + prefix = dns_fixedname_initname(&fprefix); + origin = dns_fixedname_initname(&forigin); + target = dns_fixedname_initname(&ftarget); + fname = dns_fixedname_initname(&fixed); + + matchtype = DNS_TYPEPAIR_VALUE(dns_rdatatype_nsec, 0); + sigmatchtype = DNS_TYPEPAIR_VALUE(dns_rdatatype_rrsig, + dns_rdatatype_nsec); + + /* + * Extract predecessor from chain. + */ + result = dns_rbtnodechain_current(&chain, prefix, origin, NULL); + dns_rbtnodechain_reset(&chain); + if (result != ISC_R_SUCCESS && result != DNS_R_NEWORIGIN) { + return (ISC_R_NOTFOUND); + } + + result = dns_name_concatenate(prefix, origin, target, NULL); + if (result != ISC_R_SUCCESS) { + return (ISC_R_NOTFOUND); + } + + /* + * Lookup the predecessor in the main tree. + */ + node = NULL; + result = dns_rbt_findnode(search->rbtdb->tree, target, fname, &node, + NULL, DNS_RBTFIND_EMPTYDATA, NULL, NULL); + if (result != ISC_R_SUCCESS) { + return (ISC_R_NOTFOUND); + } + + lock = &(search->rbtdb->node_locks[node->locknum].lock); + NODE_RDLOCK(lock, &nlocktype); + for (header = node->data; header != NULL; header = header_next) { + header_next = header->next; + if (check_stale_header(node, header, &nlocktype, lock, search, + &header_prev)) + { + continue; + } + if (NONEXISTENT(header) || DNS_TYPEPAIR_TYPE(header->type) == 0) + { + header_prev = header; + continue; + } + if (header->type == matchtype) { + found = header; + if (foundsig != NULL) { + break; + } + } else if (header->type == sigmatchtype) { + foundsig = header; + if (found != NULL) { + break; + } + } + header_prev = header; + } + if (found != NULL) { + dns__rbtdb_bindrdataset(search->rbtdb, node, found, now, + nlocktype, rdataset DNS__DB_FLARG_PASS); + if (foundsig != NULL) { + dns__rbtdb_bindrdataset(search->rbtdb, node, foundsig, + now, nlocktype, + sigrdataset DNS__DB_FLARG_PASS); + } + dns__rbtdb_newref(search->rbtdb, node, + nlocktype DNS__DB_FLARG_PASS); + + dns_name_copy(fname, foundname); + + *nodep = node; + result = DNS_R_COVERINGNSEC; + } else { + result = ISC_R_NOTFOUND; + } + NODE_UNLOCK(lock, &nlocktype); + return (result); +} + +static isc_result_t +cache_find(dns_db_t *db, const dns_name_t *name, dns_dbversion_t *version, + dns_rdatatype_t type, unsigned int options, isc_stdtime_t now, + dns_dbnode_t **nodep, dns_name_t *foundname, + dns_rdataset_t *rdataset, + dns_rdataset_t *sigrdataset DNS__DB_FLARG) { + dns_rbtnode_t *node = NULL; + isc_result_t result; + rbtdb_search_t search; + bool cname_ok = true; + bool found_noqname = false; + bool all_negative = true; + bool empty_node; + isc_rwlock_t *lock = NULL; + isc_rwlocktype_t tlocktype = isc_rwlocktype_none; + isc_rwlocktype_t nlocktype = isc_rwlocktype_none; + dns_slabheader_t *header = NULL; + dns_slabheader_t *header_prev = NULL, *header_next = NULL; + dns_slabheader_t *found = NULL, *nsheader = NULL; + dns_slabheader_t *foundsig = NULL, *nssig = NULL, *cnamesig = NULL; + dns_slabheader_t *update = NULL, *updatesig = NULL; + dns_slabheader_t *nsecheader = NULL, *nsecsig = NULL; + dns_typepair_t sigtype, negtype; + + UNUSED(version); + + REQUIRE(VALID_RBTDB((dns_rbtdb_t *)db)); + REQUIRE(version == NULL); + + if (now == 0) { + now = isc_stdtime_now(); + } + + search = (rbtdb_search_t){ + .rbtdb = (dns_rbtdb_t *)db, + .serial = 1, + .options = options, + .now = now, + }; + dns_fixedname_init(&search.zonecut_name); + dns_rbtnodechain_init(&search.chain); + + TREE_RDLOCK(&search.rbtdb->tree_lock, &tlocktype); + + /* + * Search down from the root of the tree. If, while going down, we + * encounter a callback node, cache_zonecut_callback() will search the + * rdatasets at the zone cut for a DNAME rdataset. + */ + result = dns_rbt_findnode(search.rbtdb->tree, name, foundname, &node, + &search.chain, DNS_RBTFIND_EMPTYDATA, + cache_zonecut_callback, &search); + + if (result == DNS_R_PARTIALMATCH) { + /* + * If dns_rbt_findnode discovered a covering DNAME skip + * looking for a covering NSEC. + */ + if ((search.options & DNS_DBFIND_COVERINGNSEC) != 0 && + (search.zonecut_header == NULL || + search.zonecut_header->type != dns_rdatatype_dname)) + { + result = find_coveringnsec( + &search, name, nodep, now, foundname, rdataset, + sigrdataset DNS__DB_FLARG_PASS); + if (result == DNS_R_COVERINGNSEC) { + goto tree_exit; + } + } + if (search.zonecut != NULL) { + result = setup_delegation( + &search, nodep, foundname, rdataset, + sigrdataset DNS__DB_FLARG_PASS); + goto tree_exit; + } else { + find_ns: + result = find_deepest_zonecut( + &search, node, nodep, foundname, rdataset, + sigrdataset DNS__DB_FLARG_PASS); + goto tree_exit; + } + } else if (result != ISC_R_SUCCESS) { + goto tree_exit; + } + + /* + * Certain DNSSEC types are not subject to CNAME matching + * (RFC4035, section 2.5 and RFC3007). + * + * We don't check for RRSIG, because we don't store RRSIG records + * directly. + */ + if (type == dns_rdatatype_key || type == dns_rdatatype_nsec) { + cname_ok = false; + } + + /* + * We now go looking for rdata... + */ + + lock = &(search.rbtdb->node_locks[node->locknum].lock); + NODE_RDLOCK(lock, &nlocktype); + + /* + * These pointers need to be reset here in case we did + * 'goto find_ns' from somewhere below. + */ + found = NULL; + foundsig = NULL; + sigtype = DNS_TYPEPAIR_VALUE(dns_rdatatype_rrsig, type); + negtype = DNS_TYPEPAIR_VALUE(0, type); + nsheader = NULL; + nsecheader = NULL; + nssig = NULL; + nsecsig = NULL; + cnamesig = NULL; + empty_node = true; + header_prev = NULL; + for (header = node->data; header != NULL; header = header_next) { + header_next = header->next; + if (check_stale_header(node, header, &nlocktype, lock, &search, + &header_prev)) + { + /* Do nothing. */ + } else if (EXISTS(header) && !ANCIENT(header)) { + /* + * We now know that there is at least one active + * non-stale rdataset at this node. + */ + empty_node = false; + if (header->noqname != NULL && + header->trust == dns_trust_secure) + { + found_noqname = true; + } + if (!NEGATIVE(header)) { + all_negative = false; + } + + /* + * If we found a type we were looking for, remember + * it. + */ + if (header->type == type || + (type == dns_rdatatype_any && + DNS_TYPEPAIR_TYPE(header->type) != 0) || + (cname_ok && header->type == dns_rdatatype_cname)) + { + /* + * We've found the answer. + */ + found = header; + if (header->type == dns_rdatatype_cname && + cname_ok && cnamesig != NULL) + { + /* + * If we've already got the + * CNAME RRSIG, use it. + */ + foundsig = cnamesig; + } + } else if (header->type == sigtype) { + /* + * We've found the RRSIG rdataset for our + * target type. Remember it. + */ + foundsig = header; + } else if (header->type == RBTDB_RDATATYPE_NCACHEANY || + header->type == negtype) + { + /* + * We've found a negative cache entry. + */ + found = header; + } else if (header->type == dns_rdatatype_ns) { + /* + * Remember a NS rdataset even if we're + * not specifically looking for it, because + * we might need it later. + */ + nsheader = header; + } else if (header->type == RBTDB_RDATATYPE_SIGNS) { + /* + * If we need the NS rdataset, we'll also + * need its signature. + */ + nssig = header; + } else if (header->type == dns_rdatatype_nsec) { + nsecheader = header; + } else if (header->type == RBTDB_RDATATYPE_SIGNSEC) { + nsecsig = header; + } else if (cname_ok && + header->type == RBTDB_RDATATYPE_SIGCNAME) + { + /* + * If we get a CNAME match, we'll also need + * its signature. + */ + cnamesig = header; + } + header_prev = header; + } else { + header_prev = header; + } + } + + if (empty_node) { + /* + * We have an exact match for the name, but there are no + * extant rdatasets. That means that this node doesn't + * meaningfully exist, and that we really have a partial match. + */ + NODE_UNLOCK(lock, &nlocktype); + if ((search.options & DNS_DBFIND_COVERINGNSEC) != 0) { + result = find_coveringnsec( + &search, name, nodep, now, foundname, rdataset, + sigrdataset DNS__DB_FLARG_PASS); + if (result == DNS_R_COVERINGNSEC) { + goto tree_exit; + } + } + goto find_ns; + } + + /* + * If we didn't find what we were looking for... + */ + if (found == NULL || + (DNS_TRUST_ADDITIONAL(found->trust) && + ((options & DNS_DBFIND_ADDITIONALOK) == 0)) || + (found->trust == dns_trust_glue && + ((options & DNS_DBFIND_GLUEOK) == 0)) || + (DNS_TRUST_PENDING(found->trust) && + ((options & DNS_DBFIND_PENDINGOK) == 0))) + { + /* + * Return covering NODATA NSEC record. + */ + if ((search.options & DNS_DBFIND_COVERINGNSEC) != 0 && + nsecheader != NULL) + { + if (nodep != NULL) { + dns__rbtdb_newref(search.rbtdb, node, + nlocktype DNS__DB_FLARG_PASS); + *nodep = node; + } + dns__rbtdb_bindrdataset(search.rbtdb, node, nsecheader, + search.now, nlocktype, + rdataset DNS__DB_FLARG_PASS); + if (need_headerupdate(nsecheader, search.now)) { + update = nsecheader; + } + if (nsecsig != NULL) { + dns__rbtdb_bindrdataset( + search.rbtdb, node, nsecsig, search.now, + nlocktype, + sigrdataset DNS__DB_FLARG_PASS); + if (need_headerupdate(nsecsig, search.now)) { + updatesig = nsecsig; + } + } + result = DNS_R_COVERINGNSEC; + goto node_exit; + } + + /* + * This name was from a wild card. Look for a covering NSEC. + */ + if (found == NULL && (found_noqname || all_negative) && + (search.options & DNS_DBFIND_COVERINGNSEC) != 0) + { + NODE_UNLOCK(lock, &nlocktype); + result = find_coveringnsec( + &search, name, nodep, now, foundname, rdataset, + sigrdataset DNS__DB_FLARG_PASS); + if (result == DNS_R_COVERINGNSEC) { + goto tree_exit; + } + goto find_ns; + } + + /* + * If there is an NS rdataset at this node, then this is the + * deepest zone cut. + */ + if (nsheader != NULL) { + if (nodep != NULL) { + dns__rbtdb_newref(search.rbtdb, node, + nlocktype DNS__DB_FLARG_PASS); + *nodep = node; + } + dns__rbtdb_bindrdataset(search.rbtdb, node, nsheader, + search.now, nlocktype, + rdataset DNS__DB_FLARG_PASS); + if (need_headerupdate(nsheader, search.now)) { + update = nsheader; + } + if (nssig != NULL) { + dns__rbtdb_bindrdataset( + search.rbtdb, node, nssig, search.now, + nlocktype, + sigrdataset DNS__DB_FLARG_PASS); + if (need_headerupdate(nssig, search.now)) { + updatesig = nssig; + } + } + result = DNS_R_DELEGATION; + goto node_exit; + } + + /* + * Go find the deepest zone cut. + */ + NODE_UNLOCK(lock, &nlocktype); + goto find_ns; + } + + /* + * We found what we were looking for, or we found a CNAME. + */ + + if (nodep != NULL) { + dns__rbtdb_newref(search.rbtdb, node, + nlocktype DNS__DB_FLARG_PASS); + *nodep = node; + } + + if (NEGATIVE(found)) { + /* + * We found a negative cache entry. + */ + if (NXDOMAIN(found)) { + result = DNS_R_NCACHENXDOMAIN; + } else { + result = DNS_R_NCACHENXRRSET; + } + } else if (type != found->type && type != dns_rdatatype_any && + found->type == dns_rdatatype_cname) + { + /* + * We weren't doing an ANY query and we found a CNAME instead + * of the type we were looking for, so we need to indicate + * that result to the caller. + */ + result = DNS_R_CNAME; + } else { + /* + * An ordinary successful query! + */ + result = ISC_R_SUCCESS; + } + + if (type != dns_rdatatype_any || result == DNS_R_NCACHENXDOMAIN || + result == DNS_R_NCACHENXRRSET) + { + dns__rbtdb_bindrdataset(search.rbtdb, node, found, search.now, + nlocktype, rdataset DNS__DB_FLARG_PASS); + if (need_headerupdate(found, search.now)) { + update = found; + } + if (!NEGATIVE(found) && foundsig != NULL) { + dns__rbtdb_bindrdataset(search.rbtdb, node, foundsig, + search.now, nlocktype, + sigrdataset DNS__DB_FLARG_PASS); + if (need_headerupdate(foundsig, search.now)) { + updatesig = foundsig; + } + } + } + +node_exit: + if ((update != NULL || updatesig != NULL) && + nlocktype != isc_rwlocktype_write) + { + NODE_FORCEUPGRADE(lock, &nlocktype); + POST(nlocktype); + } + if (update != NULL && need_headerupdate(update, search.now)) { + update_header(search.rbtdb, update, search.now); + } + if (updatesig != NULL && need_headerupdate(updatesig, search.now)) { + update_header(search.rbtdb, updatesig, search.now); + } + + NODE_UNLOCK(lock, &nlocktype); + +tree_exit: + TREE_UNLOCK(&search.rbtdb->tree_lock, &tlocktype); + + /* + * If we found a zonecut but aren't going to use it, we have to + * let go of it. + */ + if (search.need_cleanup) { + node = search.zonecut; + INSIST(node != NULL); + lock = &(search.rbtdb->node_locks[node->locknum].lock); + + NODE_RDLOCK(lock, &nlocktype); + dns__rbtdb_decref(search.rbtdb, node, 0, &nlocktype, &tlocktype, + true, false DNS__DB_FLARG_PASS); + NODE_UNLOCK(lock, &nlocktype); + INSIST(tlocktype == isc_rwlocktype_none); + } + + dns_rbtnodechain_reset(&search.chain); + + update_cachestats(search.rbtdb, result); + return (result); +} + +static isc_result_t +cache_findzonecut(dns_db_t *db, const dns_name_t *name, unsigned int options, + isc_stdtime_t now, dns_dbnode_t **nodep, + dns_name_t *foundname, dns_name_t *dcname, + dns_rdataset_t *rdataset, + dns_rdataset_t *sigrdataset DNS__DB_FLARG) { + dns_rbtnode_t *node = NULL; + isc_rwlock_t *lock = NULL; + isc_result_t result; + rbtdb_search_t search; + dns_slabheader_t *header = NULL; + dns_slabheader_t *header_prev = NULL, *header_next = NULL; + dns_slabheader_t *found = NULL, *foundsig = NULL; + unsigned int rbtoptions = DNS_RBTFIND_EMPTYDATA; + isc_rwlocktype_t tlocktype = isc_rwlocktype_none; + isc_rwlocktype_t nlocktype = isc_rwlocktype_none; + bool dcnull = (dcname == NULL); + + REQUIRE(VALID_RBTDB((dns_rbtdb_t *)db)); + + if (now == 0) { + now = isc_stdtime_now(); + } + + search = (rbtdb_search_t){ + .rbtdb = (dns_rbtdb_t *)db, + .serial = 1, + .options = options, + .now = now, + }; + dns_fixedname_init(&search.zonecut_name); + dns_rbtnodechain_init(&search.chain); + + if (dcnull) { + dcname = foundname; + } + + if ((options & DNS_DBFIND_NOEXACT) != 0) { + rbtoptions |= DNS_RBTFIND_NOEXACT; + } + + TREE_RDLOCK(&search.rbtdb->tree_lock, &tlocktype); + + /* + * Search down from the root of the tree. + */ + result = dns_rbt_findnode(search.rbtdb->tree, name, dcname, &node, + &search.chain, rbtoptions, NULL, &search); + + if (result == DNS_R_PARTIALMATCH) { + result = find_deepest_zonecut(&search, node, nodep, foundname, + rdataset, + sigrdataset DNS__DB_FLARG_PASS); + goto tree_exit; + } else if (result != ISC_R_SUCCESS) { + goto tree_exit; + } else if (!dcnull) { + dns_name_copy(dcname, foundname); + } + + /* + * We now go looking for an NS rdataset at the node. + */ + + lock = &(search.rbtdb->node_locks[node->locknum].lock); + NODE_RDLOCK(lock, &nlocktype); + + for (header = node->data; header != NULL; header = header_next) { + header_next = header->next; + if (check_stale_header(node, header, &nlocktype, lock, &search, + &header_prev)) + { + /* + * The function dns_rbt_findnode found us the a matching + * node for 'name' and stored the result in 'dcname'. + * This is the deepest known zonecut in our database. + * However, this node may be stale and if serve-stale + * is not enabled (in other words 'stale-answer-enable' + * is set to no), this node may not be used as a + * zonecut we know about. If so, find the deepest + * zonecut from this node up and return that instead. + */ + NODE_UNLOCK(lock, &nlocktype); + result = find_deepest_zonecut( + &search, node, nodep, foundname, rdataset, + sigrdataset DNS__DB_FLARG_PASS); + dns_name_copy(foundname, dcname); + goto tree_exit; + } else if (EXISTS(header) && !ANCIENT(header)) { + /* + * If we found a type we were looking for, remember + * it. + */ + if (header->type == dns_rdatatype_ns) { + /* + * Remember a NS rdataset even if we're + * not specifically looking for it, because + * we might need it later. + */ + found = header; + } else if (header->type == RBTDB_RDATATYPE_SIGNS) { + /* + * If we need the NS rdataset, we'll also + * need its signature. + */ + foundsig = header; + } + header_prev = header; + } else { + header_prev = header; + } + } + + if (found == NULL) { + /* + * No NS records here. + */ + NODE_UNLOCK(lock, &nlocktype); + result = find_deepest_zonecut(&search, node, nodep, foundname, + rdataset, + sigrdataset DNS__DB_FLARG_PASS); + goto tree_exit; + } + + if (nodep != NULL) { + dns__rbtdb_newref(search.rbtdb, node, + nlocktype DNS__DB_FLARG_PASS); + *nodep = node; + } + + dns__rbtdb_bindrdataset(search.rbtdb, node, found, search.now, + nlocktype, rdataset DNS__DB_FLARG_PASS); + if (foundsig != NULL) { + dns__rbtdb_bindrdataset(search.rbtdb, node, foundsig, + search.now, nlocktype, + sigrdataset DNS__DB_FLARG_PASS); + } + + if (need_headerupdate(found, search.now) || + (foundsig != NULL && need_headerupdate(foundsig, search.now))) + { + if (nlocktype != isc_rwlocktype_write) { + NODE_FORCEUPGRADE(lock, &nlocktype); + POST(nlocktype); + } + if (need_headerupdate(found, search.now)) { + update_header(search.rbtdb, found, search.now); + } + if (foundsig != NULL && need_headerupdate(foundsig, search.now)) + { + update_header(search.rbtdb, foundsig, search.now); + } + } + + NODE_UNLOCK(lock, &nlocktype); + +tree_exit: + TREE_UNLOCK(&search.rbtdb->tree_lock, &tlocktype); + + INSIST(!search.need_cleanup); + + dns_rbtnodechain_reset(&search.chain); + + if (result == DNS_R_DELEGATION) { + result = ISC_R_SUCCESS; + } + + return (result); +} + +static isc_result_t +cache_findrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version, + dns_rdatatype_t type, dns_rdatatype_t covers, + isc_stdtime_t now, dns_rdataset_t *rdataset, + dns_rdataset_t *sigrdataset DNS__DB_FLARG) { + dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; + dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node; + dns_slabheader_t *header = NULL, *header_next = NULL; + dns_slabheader_t *found = NULL, *foundsig = NULL; + dns_typepair_t matchtype, sigmatchtype, negtype; + isc_result_t result; + isc_rwlock_t *lock = NULL; + isc_rwlocktype_t nlocktype = isc_rwlocktype_none; + + REQUIRE(VALID_RBTDB(rbtdb)); + REQUIRE(type != dns_rdatatype_any); + + UNUSED(version); + + result = ISC_R_SUCCESS; + + if (now == 0) { + now = isc_stdtime_now(); + } + + lock = &rbtdb->node_locks[rbtnode->locknum].lock; + NODE_RDLOCK(lock, &nlocktype); + + matchtype = DNS_TYPEPAIR_VALUE(type, covers); + negtype = DNS_TYPEPAIR_VALUE(0, type); + if (covers == 0) { + sigmatchtype = DNS_TYPEPAIR_VALUE(dns_rdatatype_rrsig, type); + } else { + sigmatchtype = 0; + } + + for (header = rbtnode->data; header != NULL; header = header_next) { + header_next = header->next; + if (!ACTIVE(header, now)) { + if ((header->ttl + STALE_TTL(header, rbtdb) < + now - RBTDB_VIRTUAL) && + (nlocktype == isc_rwlocktype_write || + NODE_TRYUPGRADE(lock, &nlocktype) == + ISC_R_SUCCESS)) + { + /* + * We update the node's status only when we + * can get write access. + * + * We don't check if refcurrent(rbtnode) == 0 + * and try to free like we do in cache_find(), + * because refcurrent(rbtnode) must be + * non-zero. This is so because 'node' is an + * argument to the function. + */ + dns__rbtdb_mark(header, + DNS_SLABHEADERATTR_ANCIENT); + HEADER_NODE(header)->dirty = 1; + } + } else if (EXISTS(header) && !ANCIENT(header)) { + if (header->type == matchtype) { + found = header; + } else if (header->type == RBTDB_RDATATYPE_NCACHEANY || + header->type == negtype) + { + found = header; + } else if (header->type == sigmatchtype) { + foundsig = header; + } + } + } + if (found != NULL) { + dns__rbtdb_bindrdataset(rbtdb, rbtnode, found, now, nlocktype, + rdataset DNS__DB_FLARG_PASS); + if (!NEGATIVE(found) && foundsig != NULL) { + dns__rbtdb_bindrdataset(rbtdb, rbtnode, foundsig, now, + nlocktype, + sigrdataset DNS__DB_FLARG_PASS); + } + } + + NODE_UNLOCK(lock, &nlocktype); + + if (found == NULL) { + return (ISC_R_NOTFOUND); + } + + if (NEGATIVE(found)) { + /* + * We found a negative cache entry. + */ + if (NXDOMAIN(found)) { + result = DNS_R_NCACHENXDOMAIN; + } else { + result = DNS_R_NCACHENXRRSET; + } + } + + update_cachestats(rbtdb, result); + + return (result); +} + +static size_t +hashsize(dns_db_t *db) { + dns_rbtdb_t *rbtdb = NULL; + size_t size; + isc_rwlocktype_t tlocktype = isc_rwlocktype_none; + + rbtdb = (dns_rbtdb_t *)db; + + REQUIRE(VALID_RBTDB(rbtdb)); + + TREE_RDLOCK(&rbtdb->tree_lock, &tlocktype); + size = dns_rbt_hashsize(rbtdb->tree); + TREE_UNLOCK(&rbtdb->tree_lock, &tlocktype); + + return (size); +} + +static isc_result_t +setcachestats(dns_db_t *db, isc_stats_t *stats) { + dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; + + REQUIRE(VALID_RBTDB(rbtdb)); + REQUIRE(IS_CACHE(rbtdb)); /* current restriction */ + REQUIRE(stats != NULL); + + isc_stats_attach(stats, &rbtdb->cachestats); + return (ISC_R_SUCCESS); +} + +static dns_stats_t * +getrrsetstats(dns_db_t *db) { + dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; + + REQUIRE(VALID_RBTDB(rbtdb)); + REQUIRE(IS_CACHE(rbtdb)); /* current restriction */ + + return (rbtdb->rrsetstats); +} + +static isc_result_t +setservestalettl(dns_db_t *db, dns_ttl_t ttl) { + dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; + + REQUIRE(VALID_RBTDB(rbtdb)); + REQUIRE(IS_CACHE(rbtdb)); + + /* currently no bounds checking. 0 means disable. */ + rbtdb->common.serve_stale_ttl = ttl; + return (ISC_R_SUCCESS); +} + +static isc_result_t +getservestalettl(dns_db_t *db, dns_ttl_t *ttl) { + dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; + + REQUIRE(VALID_RBTDB(rbtdb)); + REQUIRE(IS_CACHE(rbtdb)); + + *ttl = rbtdb->common.serve_stale_ttl; + return (ISC_R_SUCCESS); +} + +static isc_result_t +setservestalerefresh(dns_db_t *db, uint32_t interval) { + dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; + + REQUIRE(VALID_RBTDB(rbtdb)); + REQUIRE(IS_CACHE(rbtdb)); + + /* currently no bounds checking. 0 means disable. */ + rbtdb->serve_stale_refresh = interval; + return (ISC_R_SUCCESS); +} + +static isc_result_t +getservestalerefresh(dns_db_t *db, uint32_t *interval) { + dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; + + REQUIRE(VALID_RBTDB(rbtdb)); + REQUIRE(IS_CACHE(rbtdb)); + + *interval = rbtdb->serve_stale_refresh; + return (ISC_R_SUCCESS); +} + +static void +expiredata(dns_db_t *db, dns_dbnode_t *node, void *data) { + dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; + dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node; + dns_slabheader_t *header = data; + isc_rwlocktype_t nlocktype = isc_rwlocktype_none; + isc_rwlocktype_t tlocktype = isc_rwlocktype_none; + + NODE_WRLOCK(&rbtdb->node_locks[rbtnode->locknum].lock, &nlocktype); + dns__cachedb_expireheader(header, &tlocktype, + dns_expire_flush DNS__DB_FLARG_PASS); + NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock, &nlocktype); + INSIST(tlocktype == isc_rwlocktype_none); +} + +dns_dbmethods_t dns__rbtdb_cachemethods = { + .destroy = dns__rbtdb_destroy, + .currentversion = dns__rbtdb_currentversion, + .newversion = dns__rbtdb_newversion, + .attachversion = dns__rbtdb_attachversion, + .closeversion = dns__rbtdb_closeversion, + .findnode = dns__rbtdb_findnode, + .find = cache_find, + .findzonecut = cache_findzonecut, + .attachnode = dns__rbtdb_attachnode, + .detachnode = dns__rbtdb_detachnode, + .createiterator = dns__rbtdb_createiterator, + .findrdataset = cache_findrdataset, + .allrdatasets = dns__rbtdb_allrdatasets, + .addrdataset = dns__rbtdb_addrdataset, + .subtractrdataset = dns__rbtdb_subtractrdataset, + .deleterdataset = dns__rbtdb_deleterdataset, + .nodecount = dns__rbtdb_nodecount, + .setloop = dns__rbtdb_setloop, + .getoriginnode = dns__rbtdb_getoriginnode, + .getrrsetstats = getrrsetstats, + .setcachestats = setcachestats, + .hashsize = hashsize, + .setservestalettl = setservestalettl, + .getservestalettl = getservestalettl, + .setservestalerefresh = setservestalerefresh, + .getservestalerefresh = getservestalerefresh, + .locknode = dns__rbtdb_locknode, + .unlocknode = dns__rbtdb_unlocknode, + .expiredata = expiredata, + .deletedata = dns__rbtdb_deletedata, +}; + +/* + * Caller must hold the node (write) lock. + */ +void +dns__cachedb_expireheader(dns_slabheader_t *header, + isc_rwlocktype_t *tlocktypep, + dns_expire_t reason DNS__DB_FLARG) { + dns__rbtdb_setttl(header, 0); + dns__rbtdb_mark(header, DNS_SLABHEADERATTR_ANCIENT); + HEADER_NODE(header)->dirty = 1; + + if (isc_refcount_current(&HEADER_NODE(header)->references) == 0) { + isc_rwlocktype_t nlocktype = isc_rwlocktype_write; + dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)header->db; + + /* + * If no one else is using the node, we can clean it up now. + * We first need to gain a new reference to the node to meet a + * requirement of dns__rbtdb_decref(). + */ + dns__rbtdb_newref(rbtdb, HEADER_NODE(header), + nlocktype DNS__DB_FLARG_PASS); + dns__rbtdb_decref(rbtdb, HEADER_NODE(header), 0, &nlocktype, + tlocktypep, true, false DNS__DB_FLARG_PASS); + + if (rbtdb->cachestats == NULL) { + return; + } + + switch (reason) { + case dns_expire_ttl: + isc_stats_increment(rbtdb->cachestats, + dns_cachestatscounter_deletettl); + break; + case dns_expire_lru: + isc_stats_increment(rbtdb->cachestats, + dns_cachestatscounter_deletelru); + break; + default: + break; + } + } +} + +static size_t +rdataset_size(dns_slabheader_t *header) { + if (!NONEXISTENT(header)) { + return (dns_rdataslab_size((unsigned char *)header, + sizeof(*header))); + } + + return (sizeof(*header)); +} + +static size_t +expire_lru_headers(dns_rbtdb_t *rbtdb, unsigned int locknum, + isc_rwlocktype_t *tlocktypep, + size_t purgesize DNS__DB_FLARG) { + dns_slabheader_t *header = NULL, *header_prev = NULL; + size_t purged = 0; + + for (header = ISC_LIST_TAIL(rbtdb->lru[locknum]); + header != NULL && purged <= purgesize; header = header_prev) + { + size_t header_size = rdataset_size(header); + header_prev = ISC_LIST_PREV(header, link); + + /* + * Unlink the entry at this point to avoid checking it + * again even if it's currently used someone else and + * cannot be purged at this moment. This entry won't be + * referenced any more (so unlinking is safe) since the + * TTL was reset to 0. + */ + ISC_LIST_UNLINK(rbtdb->lru[locknum], header, link); + dns__cachedb_expireheader(header, tlocktypep, + dns_expire_lru DNS__DB_FLARG_PASS); + purged += header_size; + } + + return (purged); +} + +/*% + * Purge some expired and/or stale (i.e. unused for some period) cache entries + * due to an overmem condition. To recover from this condition quickly, + * we clean up entries up to the size of newly added rdata that triggered + * the overmem; this is accessible via newheader. + * + * This process is triggered while adding a new entry, and we specifically + * avoid purging entries in the same LRU bucket as the one to which the new + * entry will belong. Otherwise, we might purge entries of the same name + * of different RR types while adding RRsets from a single response + * (consider the case where we're adding A and AAAA glue records of the + * same NS name). + */ +void +dns__cachedb_overmem(dns_rbtdb_t *rbtdb, dns_slabheader_t *newheader, + unsigned int locknum_start, + isc_rwlocktype_t *tlocktypep DNS__DB_FLARG) { + unsigned int locknum; + size_t purgesize = rdataset_size(newheader); + size_t purged = 0; + + for (locknum = (locknum_start + 1) % rbtdb->node_lock_count; + locknum != locknum_start && purged <= purgesize; + locknum = (locknum + 1) % rbtdb->node_lock_count) + { + isc_rwlocktype_t nlocktype = isc_rwlocktype_none; + NODE_WRLOCK(&rbtdb->node_locks[locknum].lock, &nlocktype); + + purged += expire_lru_headers(rbtdb, locknum, tlocktypep, + purgesize - + purged DNS__DB_FLARG_PASS); + + NODE_UNLOCK(&rbtdb->node_locks[locknum].lock, &nlocktype); + } +} diff --git a/lib/dns/rbt-zonedb.c b/lib/dns/rbt-zonedb.c index 023e284c2b..0fd838bc5b 100644 --- a/lib/dns/rbt-zonedb.c +++ b/lib/dns/rbt-zonedb.c @@ -89,44 +89,6 @@ #define RBTDB_ATTR_LOADED 0x01 #define RBTDB_ATTR_LOADING 0x02 -/* - * Caller must be holding the node lock. - */ -static void -new_reference(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node, - isc_rwlocktype_t locktype DNS__DB_FLARG) { - uint_fast32_t refs; - - if (locktype == isc_rwlocktype_write && ISC_LINK_LINKED(node, deadlink)) - { - ISC_LIST_UNLINK(rbtdb->deadnodes[node->locknum], node, - deadlink); - } - - refs = isc_refcount_increment0(&node->references); -#if DNS_DB_NODETRACE - fprintf(stderr, "incr:node:%s:%s:%u:%p->references = %" PRIuFAST32 "\n", - func, file, line, node, refs + 1); -#else - UNUSED(refs); -#endif - - if (refs == 0) { - /* this is the first reference to the node */ - refs = isc_refcount_increment0( - &rbtdb->node_locks[node->locknum].references); -#if DNS_DB_NODETRACE - fprintf(stderr, - "incr:nodelock:%s:%s:%u:%p:%p->references = " - "%" PRIuFAST32 "\n", - func, file, line, node, - &rbtdb->node_locks[node->locknum], refs + 1); -#else - UNUSED(refs); -#endif - } -} - static isc_result_t findnsec3node(dns_db_t *db, const dns_name_t *name, bool create, dns_dbnode_t **nodep DNS__DB_FLARG) { @@ -236,8 +198,8 @@ zone_zonecut_callback(dns_rbtnode_t *node, dns_name_t *name, * We increment the reference count on node to ensure that * search->zonecut_header will still be valid later. */ - new_reference(search->rbtdb, node, - isc_rwlocktype_read DNS__DB_FLARG_PASS); + dns__rbtdb_newref(search->rbtdb, node, + isc_rwlocktype_read DNS__DB_FLARG_PASS); search->zonecut = node; search->zonecut_header = found; search->need_cleanup = true; @@ -963,7 +925,7 @@ again: foundname, NULL); if (result == ISC_R_SUCCESS) { if (nodep != NULL) { - new_reference( + dns__rbtdb_newref( search->rbtdb, node, isc_rwlocktype_read DNS__DB_FLARG_PASS); @@ -1256,8 +1218,8 @@ found: * ensure that search->zonecut_header will * still be valid later. */ - new_reference(search.rbtdb, node, - nlocktype DNS__DB_FLARG_PASS); + dns__rbtdb_newref(search.rbtdb, node, + nlocktype DNS__DB_FLARG_PASS); search.zonecut = node; search.zonecut_header = header; search.zonecut_sigheader = NULL; @@ -1431,8 +1393,8 @@ found: goto tree_exit; } if (nodep != NULL) { - new_reference(search.rbtdb, node, - nlocktype DNS__DB_FLARG_PASS); + dns__rbtdb_newref(search.rbtdb, node, + nlocktype DNS__DB_FLARG_PASS); *nodep = node; } if ((search.rbtversion->secure && @@ -1502,8 +1464,8 @@ found: if (nodep != NULL) { if (!at_zonecut) { - new_reference(search.rbtdb, node, - nlocktype DNS__DB_FLARG_PASS); + dns__rbtdb_newref(search.rbtdb, node, + nlocktype DNS__DB_FLARG_PASS); } else { search.need_cleanup = false; } @@ -2156,21 +2118,6 @@ setgluecachestats(dns_db_t *db, isc_stats_t *stats) { return (ISC_R_SUCCESS); } -static void -deletedata(dns_db_t *db ISC_ATTR_UNUSED, dns_dbnode_t *node ISC_ATTR_UNUSED, - void *data) { - dns_slabheader_t *header = data; - - if (header->heap != NULL && header->heap_index != 0) { - isc_heap_delete(header->heap, header->heap_index); - } - header->heap_index = 0; - - if (header->glue_list) { - dns__rbtdb_freeglue(header->glue_list); - } -} - static dns_glue_t * new_gluelist(isc_mem_t *mctx, dns_name_t *name) { dns_glue_t *glue = isc_mem_getx(mctx, sizeof(*glue), ISC_MEM_ZERO); @@ -2504,7 +2451,7 @@ dns_dbmethods_t dns__rbtdb_zonemethods = { .locknode = dns__rbtdb_locknode, .unlocknode = dns__rbtdb_unlocknode, .addglue = addglue, - .deletedata = deletedata, + .deletedata = dns__rbtdb_deletedata, }; void @@ -2529,28 +2476,14 @@ dns__zonedb_resigndelete(dns_rbtdb_t *rbtdb, dns_rbtdb_version_t *version, header->heap_index); header->heap_index = 0; if (version != NULL) { - new_reference(rbtdb, HEADER_NODE(header), - isc_rwlocktype_write DNS__DB_FLARG_PASS); + dns__rbtdb_newref( + rbtdb, HEADER_NODE(header), + isc_rwlocktype_write DNS__DB_FLARG_PASS); ISC_LIST_APPEND(version->resigned_list, header, link); } } } -/* - * Add the necessary magic for the wildcard name 'name' - * to be found in 'rbtdb'. - * - * In order for wildcard matching to work correctly in - * zone_find(), we must ensure that a node for the wildcarding - * level exists in the database, and has its 'find_callback' - * and 'wild' bits set. - * - * E.g. if the wildcard name is "*.sub.example." then we - * must ensure that "sub.example." exists and is marked as - * a wildcard level. - * - * tree_lock(write) must be held. - */ isc_result_t dns__zonedb_wildcardmagic(dns_rbtdb_t *rbtdb, const dns_name_t *name, bool lock) { @@ -2584,9 +2517,6 @@ dns__zonedb_wildcardmagic(dns_rbtdb_t *rbtdb, const dns_name_t *name, return (ISC_R_SUCCESS); } -/* - * tree_lock(write) must be held. - */ isc_result_t dns__zonedb_addwildcards(dns_rbtdb_t *rbtdb, const dns_name_t *name, bool lock) { @@ -2600,7 +2530,7 @@ dns__zonedb_addwildcards(dns_rbtdb_t *rbtdb, const dns_name_t *name, l = dns_name_countlabels(&rbtdb->common.origin); i = l + 1; while (i < n) { - dns_rbtnode_t *node = NULL; /* dummy */ + dns_rbtnode_t *node = NULL; dns_name_getlabelsequence(name, n - i, i, &foundname); if (dns_name_iswildcard(&foundname)) { result = dns__zonedb_wildcardmagic(rbtdb, &foundname, diff --git a/lib/dns/rbtdb.c b/lib/dns/rbtdb.c index 260273faad..674d0b5771 100644 --- a/lib/dns/rbtdb.c +++ b/lib/dns/rbtdb.c @@ -70,26 +70,6 @@ goto failure; \ } while (0) -/*% - * Whether to rate-limit updating the LRU to avoid possible thread contention. - * Updating LRU requires write locking, so we don't do it every time the - * record is touched - only after some time passes. - */ -#ifndef DNS_RBTDB_LIMITLRUUPDATE -#define DNS_RBTDB_LIMITLRUUPDATE 1 -#endif - -/*% Time after which we update LRU for glue records, 5 minutes */ -#define DNS_RBTDB_LRUUPDATE_GLUE 300 -/*% Time after which we update LRU for all other records, 10 minutes */ -#define DNS_RBTDB_LRUUPDATE_REGULAR 600 - -/* - * Allow clients with a virtual time of up to 5 minutes in the past to see - * records that would have otherwise have expired. - */ -#define RBTDB_VIRTUAL 300 - #define EXISTS(header) \ ((atomic_load_acquire(&(header)->attributes) & \ DNS_SLABHEADERATTR_NONEXISTENT) == 0) @@ -157,7 +137,7 @@ * The default value should work well for most environments, but this can * also be configurable at compilation time via the * DNS_RBTDB_CACHE_NODE_LOCK_COUNT variable. This value must be larger than - * 1 due to the assumption of overmem_purge(). + * 1 due to the assumption of dns__cachedb_overmem(). */ #ifdef DNS_RBTDB_CACHE_NODE_LOCK_COUNT #if DNS_RBTDB_CACHE_NODE_LOCK_COUNT <= 1 @@ -171,13 +151,6 @@ static void delete_callback(void *data, void *arg); -static bool -need_headerupdate(dns_slabheader_t *header, isc_stdtime_t now); -static void -update_header(dns_rbtdb_t *rbtdb, dns_slabheader_t *header, isc_stdtime_t now); -static void -overmem_purge(dns_rbtdb_t *rbtdb, unsigned int locknum_start, size_t purgesize, - isc_rwlocktype_t *tlocktypep DNS__DB_FLARG); static void prune_tree(void *arg); static void @@ -296,41 +269,6 @@ static atomic_uint_fast16_t init_count = 0; * DB Routines */ -static void -free_rbtdb_callback(void *arg) { - dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)arg; - - free_rbtdb(rbtdb, true); -} - -static void -update_cachestats(dns_rbtdb_t *rbtdb, isc_result_t result) { - INSIST(IS_CACHE(rbtdb)); - - if (rbtdb->cachestats == NULL) { - return; - } - - switch (result) { - case DNS_R_COVERINGNSEC: - isc_stats_increment(rbtdb->cachestats, - dns_cachestatscounter_coveringnsec); - FALLTHROUGH; - case ISC_R_SUCCESS: - case DNS_R_CNAME: - case DNS_R_DNAME: - case DNS_R_DELEGATION: - case DNS_R_NCACHENXDOMAIN: - case DNS_R_NCACHENXRRSET: - isc_stats_increment(rbtdb->cachestats, - dns_cachestatscounter_hits); - break; - default: - isc_stats_increment(rbtdb->cachestats, - dns_cachestatscounter_misses); - } -} - static void update_rrsetstats(dns_stats_t *stats, const dns_typepair_t htype, const uint_least16_t hattributes, const bool increment) { @@ -372,8 +310,8 @@ update_rrsetstats(dns_stats_t *stats, const dns_typepair_t htype, } } -static void -set_ttl(dns_slabheader_t *header, dns_ttl_t newttl) { +void +dns__rbtdb_setttl(dns_slabheader_t *header, dns_ttl_t newttl) { dns_ttl_t oldttl = header->ttl; header->ttl = newttl; @@ -489,6 +427,13 @@ adjust_quantum(unsigned int old, isc_time_t *start) { return (nodes); } +static void +free_rbtdb_callback(void *arg) { + dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)arg; + + free_rbtdb(rbtdb, true); +} + static void free_rbtdb(dns_rbtdb_t *rbtdb, bool log) { unsigned int i; @@ -833,23 +778,6 @@ add_changed(dns_slabheader_t *header, return (changed); } -static void -free_proof(isc_mem_t *mctx, dns_proof_t **noqname) { - if (dns_name_dynamic(&(*noqname)->name)) { - dns_name_free(&(*noqname)->name, mctx); - } - if ((*noqname)->neg != NULL) { - isc_mem_put(mctx, (*noqname)->neg, - dns_rdataslab_size((*noqname)->neg, 0)); - } - if ((*noqname)->negsig != NULL) { - isc_mem_put(mctx, (*noqname)->negsig, - dns_rdataslab_size((*noqname)->negsig, 0)); - } - isc_mem_put(mctx, *noqname, sizeof(**noqname)); - *noqname = NULL; -} - static void rollback_node(dns_rbtnode_t *node, uint32_t serial) { dns_slabheader_t *header = NULL, *dcurrent = NULL; @@ -885,8 +813,8 @@ rollback_node(dns_rbtnode_t *node, uint32_t serial) { } } -static void -mark_header_ancient(dns_slabheader_t *header) { +void +dns__rbtdb_mark(dns_slabheader_t *header, uint_least16_t flag) { uint_least16_t attributes = atomic_load_acquire(&header->attributes); uint_least16_t newattributes = 0; dns_stats_t *stats = NULL; @@ -895,54 +823,29 @@ mark_header_ancient(dns_slabheader_t *header) { * If we are already ancient there is nothing to do. */ do { - if ((attributes & DNS_SLABHEADERATTR_ANCIENT) != 0) { + if ((attributes & flag) != 0) { return; } - newattributes = attributes | DNS_SLABHEADERATTR_ANCIENT; + newattributes = attributes | flag; } while (!atomic_compare_exchange_weak_acq_rel( &header->attributes, &attributes, newattributes)); /* - * Decrement the stats counter for the appropriate RRtype. - * If the STALE attribute is set, this will decrement the - * stale type counter, otherwise it decrements the active - * stats type counter. + * Decrement and increment the stats counter for the appropriate + * RRtype. */ stats = dns_db_getrrsetstats(header->db); - update_rrsetstats(stats, header->type, attributes, false); - HEADER_NODE(header)->dirty = 1; - - /* Increment the stats counter for the ancient RRtype. */ - update_rrsetstats(stats, header->type, newattributes, true); + if (stats != NULL) { + update_rrsetstats(stats, header->type, attributes, false); + update_rrsetstats(stats, header->type, newattributes, true); + } } static void -mark_header_stale(dns_rbtdb_t *rbtdb, dns_slabheader_t *header) { - uint_least16_t attributes = atomic_load_acquire(&header->attributes); - uint_least16_t newattributes = 0; - - INSIST((attributes & DNS_SLABHEADERATTR_ZEROTTL) == 0); - - /* - * If we are already stale there is nothing to do. - */ - do { - if ((attributes & DNS_SLABHEADERATTR_STALE) != 0) { - return; - } - newattributes = attributes | DNS_SLABHEADERATTR_STALE; - } while (!atomic_compare_exchange_weak_acq_rel( - &header->attributes, &attributes, newattributes)); - - /* Decrement the stats counter for the appropriate RRtype. - * If the ANCIENT attribute is set (although it is very - * unlikely that an RRset goes from ANCIENT to STALE), this - * will decrement the ancient stale type counter, otherwise it - * decrements the active stats type counter. - */ - - update_rrsetstats(rbtdb->rrsetstats, header->type, attributes, false); - update_rrsetstats(rbtdb->rrsetstats, header->type, newattributes, true); +mark_ancient(dns_slabheader_t *header) { + dns__rbtdb_setttl(header, 0); + dns__rbtdb_mark(header, DNS_SLABHEADERATTR_ANCIENT); + HEADER_NODE(header)->dirty = 1; } static void @@ -1199,9 +1102,9 @@ delete_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node) { /* * Caller must be holding the node lock. */ -static void -new_reference(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node, - isc_rwlocktype_t locktype DNS__DB_FLARG) { +void +dns__rbtdb_newref(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node, + isc_rwlocktype_t locktype DNS__DB_FLARG) { uint_fast32_t refs; if (locktype == isc_rwlocktype_write && ISC_LINK_LINKED(node, deadlink)) @@ -1250,7 +1153,7 @@ send_to_prune_tree(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node, *prune = (prune_t){ .node = node }; dns_db_attach((dns_db_t *)rbtdb, &prune->db); - new_reference(rbtdb, node, locktype DNS__DB_FLARG_PASS); + dns__rbtdb_newref(rbtdb, node, locktype DNS__DB_FLARG_PASS); isc_async_run(rbtdb->loop, prune_tree, prune); } @@ -1355,7 +1258,7 @@ reactivate_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node, } } - new_reference(rbtdb, node, nlocktype DNS__DB_FLARG_PASS); + dns__rbtdb_newref(rbtdb, node, nlocktype DNS__DB_FLARG_PASS); NODE_UNLOCK(nodelock, &nlocktype); } @@ -1599,8 +1502,8 @@ prune_tree(void *arg) { ISC_LIST_UNLINK(rbtdb->deadnodes[locknum], parent, deadlink); } - new_reference(rbtdb, parent, - nlocktype DNS__DB_FILELINE); + dns__rbtdb_newref(rbtdb, parent, + nlocktype DNS__DB_FILELINE); } else { parent = NULL; } @@ -2198,7 +2101,7 @@ dns__rbtdb_bindrdataset(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node, return; } - new_reference(rbtdb, node, locktype DNS__DB_FLARG_PASS); + dns__rbtdb_newref(rbtdb, node, locktype DNS__DB_FLARG_PASS); INSIST(rdataset->methods == NULL); /* We must be disassociated. */ @@ -2292,1082 +2195,6 @@ dns__rbtdb_bindrdataset(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node, } } -static isc_result_t -setup_delegation(rbtdb_search_t *search, dns_dbnode_t **nodep, - dns_name_t *foundname, dns_rdataset_t *rdataset, - dns_rdataset_t *sigrdataset DNS__DB_FLARG) { - dns_name_t *zcname = NULL; - dns_typepair_t type; - dns_rbtnode_t *node = NULL; - - REQUIRE(search != NULL); - REQUIRE(search->zonecut != NULL); - REQUIRE(search->zonecut_header != NULL); - - /* - * The caller MUST NOT be holding any node locks. - */ - - node = search->zonecut; - type = search->zonecut_header->type; - - /* - * If we have to set foundname, we do it before anything else. - * If we were to set foundname after we had set nodep or bound the - * rdataset, then we'd have to undo that work if dns_name_copy() - * failed. By setting foundname first, there's nothing to undo if - * we have trouble. - */ - if (foundname != NULL && search->copy_name) { - zcname = dns_fixedname_name(&search->zonecut_name); - dns_name_copy(zcname, foundname); - } - if (nodep != NULL) { - /* - * Note that we don't have to increment the node's reference - * count here because we're going to use the reference we - * already have in the search block. - */ - *nodep = node; - search->need_cleanup = false; - } - if (rdataset != NULL) { - isc_rwlocktype_t nlocktype = isc_rwlocktype_none; - NODE_RDLOCK(&(search->rbtdb->node_locks[node->locknum].lock), - &nlocktype); - dns__rbtdb_bindrdataset(search->rbtdb, node, - search->zonecut_header, search->now, - isc_rwlocktype_read, - rdataset DNS__DB_FLARG_PASS); - if (sigrdataset != NULL && search->zonecut_sigheader != NULL) { - dns__rbtdb_bindrdataset( - search->rbtdb, node, search->zonecut_sigheader, - search->now, isc_rwlocktype_read, - sigrdataset DNS__DB_FLARG_PASS); - } - NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock), - &nlocktype); - } - - if (type == dns_rdatatype_dname) { - return (DNS_R_DNAME); - } - return (DNS_R_DELEGATION); -} - -static bool -check_stale_header(dns_rbtnode_t *node, dns_slabheader_t *header, - isc_rwlocktype_t *nlocktypep, isc_rwlock_t *lock, - rbtdb_search_t *search, dns_slabheader_t **header_prev) { - if (!ACTIVE(header, search->now)) { - dns_ttl_t stale = header->ttl + - STALE_TTL(header, search->rbtdb); - /* - * If this data is in the stale window keep it and if - * DNS_DBFIND_STALEOK is not set we tell the caller to - * skip this record. We skip the records with ZEROTTL - * (these records should not be cached anyway). - */ - - DNS_SLABHEADER_CLRATTR(header, DNS_SLABHEADERATTR_STALE_WINDOW); - if (!ZEROTTL(header) && KEEPSTALE(search->rbtdb) && - stale > search->now) - { - mark_header_stale(search->rbtdb, header); - *header_prev = header; - /* - * If DNS_DBFIND_STALESTART is set then it means we - * failed to resolve the name during recursion, in - * this case we mark the time in which the refresh - * failed. - */ - if ((search->options & DNS_DBFIND_STALESTART) != 0) { - atomic_store_release( - &header->last_refresh_fail_ts, - search->now); - } else if ((search->options & - DNS_DBFIND_STALEENABLED) != 0 && - search->now < - (atomic_load_acquire( - &header->last_refresh_fail_ts) + - search->rbtdb->serve_stale_refresh)) - { - /* - * If we are within interval between last - * refresh failure time + 'stale-refresh-time', - * then don't skip this stale entry but use it - * instead. - */ - DNS_SLABHEADER_SETATTR( - header, - DNS_SLABHEADERATTR_STALE_WINDOW); - return (false); - } else if ((search->options & - DNS_DBFIND_STALETIMEOUT) != 0) - { - /* - * We want stale RRset due to timeout, so we - * don't skip it. - */ - return (false); - } - return ((search->options & DNS_DBFIND_STALEOK) == 0); - } - - /* - * This rdataset is stale. If no one else is using the - * node, we can clean it up right now, otherwise we mark - * it as ancient, and the node as dirty, so it will get - * cleaned up later. - */ - if ((header->ttl < search->now - RBTDB_VIRTUAL) && - (*nlocktypep == isc_rwlocktype_write || - NODE_TRYUPGRADE(lock, nlocktypep) == ISC_R_SUCCESS)) - { - /* - * We update the node's status only when we can - * get write access; otherwise, we leave others - * to this work. Periodical cleaning will - * eventually take the job as the last resort. - * We won't downgrade the lock, since other - * rdatasets are probably stale, too. - */ - - if (isc_refcount_current(&node->references) == 0) { - /* - * header->down can be non-NULL if the - * refcount has just decremented to 0 - * but dns__rbtdb_decref() has not - * performed clean_cache_node(), in - * which case we need to purge the stale - * headers first. - */ - clean_stale_headers(header); - if (*header_prev != NULL) { - (*header_prev)->next = header->next; - } else { - node->data = header->next; - } - dns_slabheader_destroy(&header); - } else { - mark_header_ancient(header); - *header_prev = header; - } - } else { - *header_prev = header; - } - return (true); - } - return (false); -} - -static isc_result_t -cache_zonecut_callback(dns_rbtnode_t *node, dns_name_t *name, - void *arg DNS__DB_FLARG) { - rbtdb_search_t *search = arg; - dns_slabheader_t *header = NULL; - dns_slabheader_t *header_prev = NULL, *header_next = NULL; - dns_slabheader_t *dname_header = NULL, *sigdname_header = NULL; - isc_result_t result; - isc_rwlock_t *lock = NULL; - isc_rwlocktype_t nlocktype = isc_rwlocktype_none; - - REQUIRE(search->zonecut == NULL); - - /* - * Keep compiler silent. - */ - UNUSED(name); - - lock = &(search->rbtdb->node_locks[node->locknum].lock); - NODE_RDLOCK(lock, &nlocktype); - - /* - * Look for a DNAME or RRSIG DNAME rdataset. - */ - for (header = node->data; header != NULL; header = header_next) { - header_next = header->next; - if (check_stale_header(node, header, &nlocktype, lock, search, - &header_prev)) - { - /* Do nothing. */ - } else if (header->type == dns_rdatatype_dname && - EXISTS(header) && !ANCIENT(header)) - { - dname_header = header; - header_prev = header; - } else if (header->type == RBTDB_RDATATYPE_SIGDNAME && - EXISTS(header) && !ANCIENT(header)) - { - sigdname_header = header; - header_prev = header; - } else { - header_prev = header; - } - } - - if (dname_header != NULL && - (!DNS_TRUST_PENDING(dname_header->trust) || - (search->options & DNS_DBFIND_PENDINGOK) != 0)) - { - /* - * We increment the reference count on node to ensure that - * search->zonecut_header will still be valid later. - */ - new_reference(search->rbtdb, node, - nlocktype DNS__DB_FLARG_PASS); - search->zonecut = node; - search->zonecut_header = dname_header; - search->zonecut_sigheader = sigdname_header; - search->need_cleanup = true; - result = DNS_R_PARTIALMATCH; - } else { - result = DNS_R_CONTINUE; - } - - NODE_UNLOCK(lock, &nlocktype); - - return (result); -} - -static isc_result_t -find_deepest_zonecut(rbtdb_search_t *search, dns_rbtnode_t *node, - dns_dbnode_t **nodep, dns_name_t *foundname, - dns_rdataset_t *rdataset, - dns_rdataset_t *sigrdataset DNS__DB_FLARG) { - unsigned int i; - isc_result_t result = ISC_R_NOTFOUND; - dns_name_t name; - dns_rbtdb_t *rbtdb = NULL; - bool done; - - /* - * Caller must be holding the tree lock. - */ - - rbtdb = search->rbtdb; - i = search->chain.level_matches; - done = false; - do { - dns_slabheader_t *header = NULL; - dns_slabheader_t *header_prev = NULL, *header_next = NULL; - dns_slabheader_t *found = NULL, *foundsig = NULL; - isc_rwlock_t *lock = &rbtdb->node_locks[node->locknum].lock; - isc_rwlocktype_t nlocktype = isc_rwlocktype_none; - - NODE_RDLOCK(lock, &nlocktype); - - /* - * Look for NS and RRSIG NS rdatasets. - */ - for (header = node->data; header != NULL; header = header_next) - { - header_next = header->next; - if (check_stale_header(node, header, &nlocktype, lock, - search, &header_prev)) - { - /* Do nothing. */ - } else if (EXISTS(header) && !ANCIENT(header)) { - /* - * We've found an extant rdataset. See if - * we're interested in it. - */ - if (header->type == dns_rdatatype_ns) { - found = header; - if (foundsig != NULL) { - break; - } - } else if (header->type == - RBTDB_RDATATYPE_SIGNS) - { - foundsig = header; - if (found != NULL) { - break; - } - } - header_prev = header; - } else { - header_prev = header; - } - } - - if (found != NULL) { - /* - * If we have to set foundname, we do it before - * anything else. If we were to set foundname after - * we had set nodep or bound the rdataset, then we'd - * have to undo that work if dns_name_concatenate() - * failed. By setting foundname first, there's - * nothing to undo if we have trouble. - */ - if (foundname != NULL) { - dns_name_init(&name, NULL); - dns_rbt_namefromnode(node, &name); - dns_name_copy(&name, foundname); - while (i > 0) { - dns_rbtnode_t *level_node = - search->chain.levels[--i]; - dns_name_init(&name, NULL); - dns_rbt_namefromnode(level_node, &name); - result = dns_name_concatenate( - foundname, &name, foundname, - NULL); - if (result != ISC_R_SUCCESS) { - if (nodep != NULL) { - *nodep = NULL; - } - goto node_exit; - } - } - } - result = DNS_R_DELEGATION; - if (nodep != NULL) { - new_reference(search->rbtdb, node, - nlocktype DNS__DB_FLARG_PASS); - *nodep = node; - } - dns__rbtdb_bindrdataset(search->rbtdb, node, found, - search->now, nlocktype, - rdataset DNS__DB_FLARG_PASS); - if (foundsig != NULL) { - dns__rbtdb_bindrdataset( - search->rbtdb, node, foundsig, - search->now, nlocktype, - sigrdataset DNS__DB_FLARG_PASS); - } - if (need_headerupdate(found, search->now) || - (foundsig != NULL && - need_headerupdate(foundsig, search->now))) - { - if (nlocktype != isc_rwlocktype_write) { - NODE_FORCEUPGRADE(lock, &nlocktype); - POST(nlocktype); - } - if (need_headerupdate(found, search->now)) { - update_header(search->rbtdb, found, - search->now); - } - if (foundsig != NULL && - need_headerupdate(foundsig, search->now)) - { - update_header(search->rbtdb, foundsig, - search->now); - } - } - } - - node_exit: - NODE_UNLOCK(lock, &nlocktype); - - if (found == NULL && i > 0) { - i--; - node = search->chain.levels[i]; - } else { - done = true; - } - } while (!done); - - return (result); -} - -/* - * Look for a potentially covering NSEC in the cache where `name` - * is known not to exist. This uses the auxiliary NSEC tree to find - * the potential NSEC owner. If found, we update 'foundname', 'nodep', - * 'rdataset' and 'sigrdataset', and return DNS_R_COVERINGNSEC. - * Otherwise, return ISC_R_NOTFOUND. - */ -static isc_result_t -find_coveringnsec(rbtdb_search_t *search, const dns_name_t *name, - dns_dbnode_t **nodep, isc_stdtime_t now, - dns_name_t *foundname, dns_rdataset_t *rdataset, - dns_rdataset_t *sigrdataset DNS__DB_FLARG) { - dns_fixedname_t fprefix, forigin, ftarget, fixed; - dns_name_t *prefix = NULL, *origin = NULL; - dns_name_t *target = NULL, *fname = NULL; - dns_rbtnode_t *node = NULL; - dns_rbtnodechain_t chain; - isc_result_t result; - isc_rwlocktype_t nlocktype = isc_rwlocktype_none; - isc_rwlock_t *lock = NULL; - dns_typepair_t matchtype, sigmatchtype; - dns_slabheader_t *found = NULL, *foundsig = NULL; - dns_slabheader_t *header = NULL; - dns_slabheader_t *header_next = NULL, *header_prev = NULL; - - /* - * Look for the node in the auxilary tree. - */ - dns_rbtnodechain_init(&chain); - target = dns_fixedname_initname(&ftarget); - result = dns_rbt_findnode(search->rbtdb->nsec, name, target, &node, - &chain, DNS_RBTFIND_EMPTYDATA, NULL, NULL); - if (result != DNS_R_PARTIALMATCH) { - dns_rbtnodechain_reset(&chain); - return (ISC_R_NOTFOUND); - } - - prefix = dns_fixedname_initname(&fprefix); - origin = dns_fixedname_initname(&forigin); - target = dns_fixedname_initname(&ftarget); - fname = dns_fixedname_initname(&fixed); - - matchtype = DNS_TYPEPAIR_VALUE(dns_rdatatype_nsec, 0); - sigmatchtype = DNS_TYPEPAIR_VALUE(dns_rdatatype_rrsig, - dns_rdatatype_nsec); - - /* - * Extract predecessor from chain. - */ - result = dns_rbtnodechain_current(&chain, prefix, origin, NULL); - dns_rbtnodechain_reset(&chain); - if (result != ISC_R_SUCCESS && result != DNS_R_NEWORIGIN) { - return (ISC_R_NOTFOUND); - } - - result = dns_name_concatenate(prefix, origin, target, NULL); - if (result != ISC_R_SUCCESS) { - return (ISC_R_NOTFOUND); - } - - /* - * Lookup the predecessor in the main tree. - */ - node = NULL; - result = dns_rbt_findnode(search->rbtdb->tree, target, fname, &node, - NULL, DNS_RBTFIND_EMPTYDATA, NULL, NULL); - if (result != ISC_R_SUCCESS) { - return (ISC_R_NOTFOUND); - } - - lock = &(search->rbtdb->node_locks[node->locknum].lock); - NODE_RDLOCK(lock, &nlocktype); - for (header = node->data; header != NULL; header = header_next) { - header_next = header->next; - if (check_stale_header(node, header, &nlocktype, lock, search, - &header_prev)) - { - continue; - } - if (NONEXISTENT(header) || DNS_TYPEPAIR_TYPE(header->type) == 0) - { - header_prev = header; - continue; - } - if (header->type == matchtype) { - found = header; - if (foundsig != NULL) { - break; - } - } else if (header->type == sigmatchtype) { - foundsig = header; - if (found != NULL) { - break; - } - } - header_prev = header; - } - if (found != NULL) { - dns__rbtdb_bindrdataset(search->rbtdb, node, found, now, - nlocktype, rdataset DNS__DB_FLARG_PASS); - if (foundsig != NULL) { - dns__rbtdb_bindrdataset(search->rbtdb, node, foundsig, - now, nlocktype, - sigrdataset DNS__DB_FLARG_PASS); - } - new_reference(search->rbtdb, node, - nlocktype DNS__DB_FLARG_PASS); - - dns_name_copy(fname, foundname); - - *nodep = node; - result = DNS_R_COVERINGNSEC; - } else { - result = ISC_R_NOTFOUND; - } - NODE_UNLOCK(lock, &nlocktype); - return (result); -} - -static isc_result_t -cache_find(dns_db_t *db, const dns_name_t *name, dns_dbversion_t *version, - dns_rdatatype_t type, unsigned int options, isc_stdtime_t now, - dns_dbnode_t **nodep, dns_name_t *foundname, - dns_rdataset_t *rdataset, - dns_rdataset_t *sigrdataset DNS__DB_FLARG) { - dns_rbtnode_t *node = NULL; - isc_result_t result; - rbtdb_search_t search; - bool cname_ok = true; - bool found_noqname = false; - bool all_negative = true; - bool empty_node; - isc_rwlock_t *lock = NULL; - isc_rwlocktype_t tlocktype = isc_rwlocktype_none; - isc_rwlocktype_t nlocktype = isc_rwlocktype_none; - dns_slabheader_t *header = NULL; - dns_slabheader_t *header_prev = NULL, *header_next = NULL; - dns_slabheader_t *found = NULL, *nsheader = NULL; - dns_slabheader_t *foundsig = NULL, *nssig = NULL, *cnamesig = NULL; - dns_slabheader_t *update = NULL, *updatesig = NULL; - dns_slabheader_t *nsecheader = NULL, *nsecsig = NULL; - dns_typepair_t sigtype, negtype; - - UNUSED(version); - - REQUIRE(VALID_RBTDB((dns_rbtdb_t *)db)); - REQUIRE(version == NULL); - - if (now == 0) { - now = isc_stdtime_now(); - } - - search = (rbtdb_search_t){ - .rbtdb = (dns_rbtdb_t *)db, - .serial = 1, - .options = options, - .now = now, - }; - dns_fixedname_init(&search.zonecut_name); - dns_rbtnodechain_init(&search.chain); - - TREE_RDLOCK(&search.rbtdb->tree_lock, &tlocktype); - - /* - * Search down from the root of the tree. If, while going down, we - * encounter a callback node, cache_zonecut_callback() will search the - * rdatasets at the zone cut for a DNAME rdataset. - */ - result = dns_rbt_findnode(search.rbtdb->tree, name, foundname, &node, - &search.chain, DNS_RBTFIND_EMPTYDATA, - cache_zonecut_callback, &search); - - if (result == DNS_R_PARTIALMATCH) { - /* - * If dns_rbt_findnode discovered a covering DNAME skip - * looking for a covering NSEC. - */ - if ((search.options & DNS_DBFIND_COVERINGNSEC) != 0 && - (search.zonecut_header == NULL || - search.zonecut_header->type != dns_rdatatype_dname)) - { - result = find_coveringnsec( - &search, name, nodep, now, foundname, rdataset, - sigrdataset DNS__DB_FLARG_PASS); - if (result == DNS_R_COVERINGNSEC) { - goto tree_exit; - } - } - if (search.zonecut != NULL) { - result = setup_delegation( - &search, nodep, foundname, rdataset, - sigrdataset DNS__DB_FLARG_PASS); - goto tree_exit; - } else { - find_ns: - result = find_deepest_zonecut( - &search, node, nodep, foundname, rdataset, - sigrdataset DNS__DB_FLARG_PASS); - goto tree_exit; - } - } else if (result != ISC_R_SUCCESS) { - goto tree_exit; - } - - /* - * Certain DNSSEC types are not subject to CNAME matching - * (RFC4035, section 2.5 and RFC3007). - * - * We don't check for RRSIG, because we don't store RRSIG records - * directly. - */ - if (type == dns_rdatatype_key || type == dns_rdatatype_nsec) { - cname_ok = false; - } - - /* - * We now go looking for rdata... - */ - - lock = &(search.rbtdb->node_locks[node->locknum].lock); - NODE_RDLOCK(lock, &nlocktype); - - /* - * These pointers need to be reset here in case we did - * 'goto find_ns' from somewhere below. - */ - found = NULL; - foundsig = NULL; - sigtype = DNS_TYPEPAIR_VALUE(dns_rdatatype_rrsig, type); - negtype = DNS_TYPEPAIR_VALUE(0, type); - nsheader = NULL; - nsecheader = NULL; - nssig = NULL; - nsecsig = NULL; - cnamesig = NULL; - empty_node = true; - header_prev = NULL; - for (header = node->data; header != NULL; header = header_next) { - header_next = header->next; - if (check_stale_header(node, header, &nlocktype, lock, &search, - &header_prev)) - { - /* Do nothing. */ - } else if (EXISTS(header) && !ANCIENT(header)) { - /* - * We now know that there is at least one active - * non-stale rdataset at this node. - */ - empty_node = false; - if (header->noqname != NULL && - header->trust == dns_trust_secure) - { - found_noqname = true; - } - if (!NEGATIVE(header)) { - all_negative = false; - } - - /* - * If we found a type we were looking for, remember - * it. - */ - if (header->type == type || - (type == dns_rdatatype_any && - DNS_TYPEPAIR_TYPE(header->type) != 0) || - (cname_ok && header->type == dns_rdatatype_cname)) - { - /* - * We've found the answer. - */ - found = header; - if (header->type == dns_rdatatype_cname && - cname_ok && cnamesig != NULL) - { - /* - * If we've already got the - * CNAME RRSIG, use it. - */ - foundsig = cnamesig; - } - } else if (header->type == sigtype) { - /* - * We've found the RRSIG rdataset for our - * target type. Remember it. - */ - foundsig = header; - } else if (header->type == RBTDB_RDATATYPE_NCACHEANY || - header->type == negtype) - { - /* - * We've found a negative cache entry. - */ - found = header; - } else if (header->type == dns_rdatatype_ns) { - /* - * Remember a NS rdataset even if we're - * not specifically looking for it, because - * we might need it later. - */ - nsheader = header; - } else if (header->type == RBTDB_RDATATYPE_SIGNS) { - /* - * If we need the NS rdataset, we'll also - * need its signature. - */ - nssig = header; - } else if (header->type == dns_rdatatype_nsec) { - nsecheader = header; - } else if (header->type == RBTDB_RDATATYPE_SIGNSEC) { - nsecsig = header; - } else if (cname_ok && - header->type == RBTDB_RDATATYPE_SIGCNAME) - { - /* - * If we get a CNAME match, we'll also need - * its signature. - */ - cnamesig = header; - } - header_prev = header; - } else { - header_prev = header; - } - } - - if (empty_node) { - /* - * We have an exact match for the name, but there are no - * extant rdatasets. That means that this node doesn't - * meaningfully exist, and that we really have a partial match. - */ - NODE_UNLOCK(lock, &nlocktype); - if ((search.options & DNS_DBFIND_COVERINGNSEC) != 0) { - result = find_coveringnsec( - &search, name, nodep, now, foundname, rdataset, - sigrdataset DNS__DB_FLARG_PASS); - if (result == DNS_R_COVERINGNSEC) { - goto tree_exit; - } - } - goto find_ns; - } - - /* - * If we didn't find what we were looking for... - */ - if (found == NULL || - (DNS_TRUST_ADDITIONAL(found->trust) && - ((options & DNS_DBFIND_ADDITIONALOK) == 0)) || - (found->trust == dns_trust_glue && - ((options & DNS_DBFIND_GLUEOK) == 0)) || - (DNS_TRUST_PENDING(found->trust) && - ((options & DNS_DBFIND_PENDINGOK) == 0))) - { - /* - * Return covering NODATA NSEC record. - */ - if ((search.options & DNS_DBFIND_COVERINGNSEC) != 0 && - nsecheader != NULL) - { - if (nodep != NULL) { - new_reference(search.rbtdb, node, - nlocktype DNS__DB_FLARG_PASS); - *nodep = node; - } - dns__rbtdb_bindrdataset(search.rbtdb, node, nsecheader, - search.now, nlocktype, - rdataset DNS__DB_FLARG_PASS); - if (need_headerupdate(nsecheader, search.now)) { - update = nsecheader; - } - if (nsecsig != NULL) { - dns__rbtdb_bindrdataset( - search.rbtdb, node, nsecsig, search.now, - nlocktype, - sigrdataset DNS__DB_FLARG_PASS); - if (need_headerupdate(nsecsig, search.now)) { - updatesig = nsecsig; - } - } - result = DNS_R_COVERINGNSEC; - goto node_exit; - } - - /* - * This name was from a wild card. Look for a covering NSEC. - */ - if (found == NULL && (found_noqname || all_negative) && - (search.options & DNS_DBFIND_COVERINGNSEC) != 0) - { - NODE_UNLOCK(lock, &nlocktype); - result = find_coveringnsec( - &search, name, nodep, now, foundname, rdataset, - sigrdataset DNS__DB_FLARG_PASS); - if (result == DNS_R_COVERINGNSEC) { - goto tree_exit; - } - goto find_ns; - } - - /* - * If there is an NS rdataset at this node, then this is the - * deepest zone cut. - */ - if (nsheader != NULL) { - if (nodep != NULL) { - new_reference(search.rbtdb, node, - nlocktype DNS__DB_FLARG_PASS); - *nodep = node; - } - dns__rbtdb_bindrdataset(search.rbtdb, node, nsheader, - search.now, nlocktype, - rdataset DNS__DB_FLARG_PASS); - if (need_headerupdate(nsheader, search.now)) { - update = nsheader; - } - if (nssig != NULL) { - dns__rbtdb_bindrdataset( - search.rbtdb, node, nssig, search.now, - nlocktype, - sigrdataset DNS__DB_FLARG_PASS); - if (need_headerupdate(nssig, search.now)) { - updatesig = nssig; - } - } - result = DNS_R_DELEGATION; - goto node_exit; - } - - /* - * Go find the deepest zone cut. - */ - NODE_UNLOCK(lock, &nlocktype); - goto find_ns; - } - - /* - * We found what we were looking for, or we found a CNAME. - */ - - if (nodep != NULL) { - new_reference(search.rbtdb, node, nlocktype DNS__DB_FLARG_PASS); - *nodep = node; - } - - if (NEGATIVE(found)) { - /* - * We found a negative cache entry. - */ - if (NXDOMAIN(found)) { - result = DNS_R_NCACHENXDOMAIN; - } else { - result = DNS_R_NCACHENXRRSET; - } - } else if (type != found->type && type != dns_rdatatype_any && - found->type == dns_rdatatype_cname) - { - /* - * We weren't doing an ANY query and we found a CNAME instead - * of the type we were looking for, so we need to indicate - * that result to the caller. - */ - result = DNS_R_CNAME; - } else { - /* - * An ordinary successful query! - */ - result = ISC_R_SUCCESS; - } - - if (type != dns_rdatatype_any || result == DNS_R_NCACHENXDOMAIN || - result == DNS_R_NCACHENXRRSET) - { - dns__rbtdb_bindrdataset(search.rbtdb, node, found, search.now, - nlocktype, rdataset DNS__DB_FLARG_PASS); - if (need_headerupdate(found, search.now)) { - update = found; - } - if (!NEGATIVE(found) && foundsig != NULL) { - dns__rbtdb_bindrdataset(search.rbtdb, node, foundsig, - search.now, nlocktype, - sigrdataset DNS__DB_FLARG_PASS); - if (need_headerupdate(foundsig, search.now)) { - updatesig = foundsig; - } - } - } - -node_exit: - if ((update != NULL || updatesig != NULL) && - nlocktype != isc_rwlocktype_write) - { - NODE_FORCEUPGRADE(lock, &nlocktype); - POST(nlocktype); - } - if (update != NULL && need_headerupdate(update, search.now)) { - update_header(search.rbtdb, update, search.now); - } - if (updatesig != NULL && need_headerupdate(updatesig, search.now)) { - update_header(search.rbtdb, updatesig, search.now); - } - - NODE_UNLOCK(lock, &nlocktype); - -tree_exit: - TREE_UNLOCK(&search.rbtdb->tree_lock, &tlocktype); - - /* - * If we found a zonecut but aren't going to use it, we have to - * let go of it. - */ - if (search.need_cleanup) { - node = search.zonecut; - INSIST(node != NULL); - lock = &(search.rbtdb->node_locks[node->locknum].lock); - - NODE_RDLOCK(lock, &nlocktype); - dns__rbtdb_decref(search.rbtdb, node, 0, &nlocktype, &tlocktype, - true, false DNS__DB_FLARG_PASS); - NODE_UNLOCK(lock, &nlocktype); - INSIST(tlocktype == isc_rwlocktype_none); - } - - dns_rbtnodechain_reset(&search.chain); - - update_cachestats(search.rbtdb, result); - return (result); -} - -static isc_result_t -cache_findzonecut(dns_db_t *db, const dns_name_t *name, unsigned int options, - isc_stdtime_t now, dns_dbnode_t **nodep, - dns_name_t *foundname, dns_name_t *dcname, - dns_rdataset_t *rdataset, - dns_rdataset_t *sigrdataset DNS__DB_FLARG) { - dns_rbtnode_t *node = NULL; - isc_rwlock_t *lock = NULL; - isc_result_t result; - rbtdb_search_t search; - dns_slabheader_t *header = NULL; - dns_slabheader_t *header_prev = NULL, *header_next = NULL; - dns_slabheader_t *found = NULL, *foundsig = NULL; - unsigned int rbtoptions = DNS_RBTFIND_EMPTYDATA; - isc_rwlocktype_t tlocktype = isc_rwlocktype_none; - isc_rwlocktype_t nlocktype = isc_rwlocktype_none; - bool dcnull = (dcname == NULL); - - REQUIRE(VALID_RBTDB((dns_rbtdb_t *)db)); - - if (now == 0) { - now = isc_stdtime_now(); - } - - search = (rbtdb_search_t){ - .rbtdb = (dns_rbtdb_t *)db, - .serial = 1, - .options = options, - .now = now, - }; - dns_fixedname_init(&search.zonecut_name); - dns_rbtnodechain_init(&search.chain); - - if (dcnull) { - dcname = foundname; - } - - if ((options & DNS_DBFIND_NOEXACT) != 0) { - rbtoptions |= DNS_RBTFIND_NOEXACT; - } - - TREE_RDLOCK(&search.rbtdb->tree_lock, &tlocktype); - - /* - * Search down from the root of the tree. - */ - result = dns_rbt_findnode(search.rbtdb->tree, name, dcname, &node, - &search.chain, rbtoptions, NULL, &search); - - if (result == DNS_R_PARTIALMATCH) { - result = find_deepest_zonecut(&search, node, nodep, foundname, - rdataset, - sigrdataset DNS__DB_FLARG_PASS); - goto tree_exit; - } else if (result != ISC_R_SUCCESS) { - goto tree_exit; - } else if (!dcnull) { - dns_name_copy(dcname, foundname); - } - - /* - * We now go looking for an NS rdataset at the node. - */ - - lock = &(search.rbtdb->node_locks[node->locknum].lock); - NODE_RDLOCK(lock, &nlocktype); - - for (header = node->data; header != NULL; header = header_next) { - header_next = header->next; - if (check_stale_header(node, header, &nlocktype, lock, &search, - &header_prev)) - { - /* - * The function dns_rbt_findnode found us the a matching - * node for 'name' and stored the result in 'dcname'. - * This is the deepest known zonecut in our database. - * However, this node may be stale and if serve-stale - * is not enabled (in other words 'stale-answer-enable' - * is set to no), this node may not be used as a - * zonecut we know about. If so, find the deepest - * zonecut from this node up and return that instead. - */ - NODE_UNLOCK(lock, &nlocktype); - result = find_deepest_zonecut( - &search, node, nodep, foundname, rdataset, - sigrdataset DNS__DB_FLARG_PASS); - dns_name_copy(foundname, dcname); - goto tree_exit; - } else if (EXISTS(header) && !ANCIENT(header)) { - /* - * If we found a type we were looking for, remember - * it. - */ - if (header->type == dns_rdatatype_ns) { - /* - * Remember a NS rdataset even if we're - * not specifically looking for it, because - * we might need it later. - */ - found = header; - } else if (header->type == RBTDB_RDATATYPE_SIGNS) { - /* - * If we need the NS rdataset, we'll also - * need its signature. - */ - foundsig = header; - } - header_prev = header; - } else { - header_prev = header; - } - } - - if (found == NULL) { - /* - * No NS records here. - */ - NODE_UNLOCK(lock, &nlocktype); - result = find_deepest_zonecut(&search, node, nodep, foundname, - rdataset, - sigrdataset DNS__DB_FLARG_PASS); - goto tree_exit; - } - - if (nodep != NULL) { - new_reference(search.rbtdb, node, nlocktype DNS__DB_FLARG_PASS); - *nodep = node; - } - - dns__rbtdb_bindrdataset(search.rbtdb, node, found, search.now, - nlocktype, rdataset DNS__DB_FLARG_PASS); - if (foundsig != NULL) { - dns__rbtdb_bindrdataset(search.rbtdb, node, foundsig, - search.now, nlocktype, - sigrdataset DNS__DB_FLARG_PASS); - } - - if (need_headerupdate(found, search.now) || - (foundsig != NULL && need_headerupdate(foundsig, search.now))) - { - if (nlocktype != isc_rwlocktype_write) { - NODE_FORCEUPGRADE(lock, &nlocktype); - POST(nlocktype); - } - if (need_headerupdate(found, search.now)) { - update_header(search.rbtdb, found, search.now); - } - if (foundsig != NULL && need_headerupdate(foundsig, search.now)) - { - update_header(search.rbtdb, foundsig, search.now); - } - } - - NODE_UNLOCK(lock, &nlocktype); - -tree_exit: - TREE_UNLOCK(&search.rbtdb->tree_lock, &tlocktype); - - INSIST(!search.need_cleanup); - - dns_rbtnodechain_reset(&search.chain); - - if (result == DNS_R_DELEGATION) { - result = ISC_R_SUCCESS; - } - - return (result); -} - void dns__rbtdb_attachnode(dns_db_t *db, dns_dbnode_t *source, dns_dbnode_t **targetp DNS__DB_FLARG) { @@ -3480,109 +2307,6 @@ dns__rbtdb_createiterator(dns_db_t *db, unsigned int options, return (ISC_R_SUCCESS); } -static isc_result_t -cache_findrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version, - dns_rdatatype_t type, dns_rdatatype_t covers, - isc_stdtime_t now, dns_rdataset_t *rdataset, - dns_rdataset_t *sigrdataset DNS__DB_FLARG) { - dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; - dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node; - dns_slabheader_t *header = NULL, *header_next = NULL; - dns_slabheader_t *found = NULL, *foundsig = NULL; - dns_typepair_t matchtype, sigmatchtype, negtype; - isc_result_t result; - isc_rwlock_t *lock = NULL; - isc_rwlocktype_t nlocktype = isc_rwlocktype_none; - - REQUIRE(VALID_RBTDB(rbtdb)); - REQUIRE(type != dns_rdatatype_any); - - UNUSED(version); - - result = ISC_R_SUCCESS; - - if (now == 0) { - now = isc_stdtime_now(); - } - - lock = &rbtdb->node_locks[rbtnode->locknum].lock; - NODE_RDLOCK(lock, &nlocktype); - - matchtype = DNS_TYPEPAIR_VALUE(type, covers); - negtype = DNS_TYPEPAIR_VALUE(0, type); - if (covers == 0) { - sigmatchtype = DNS_TYPEPAIR_VALUE(dns_rdatatype_rrsig, type); - } else { - sigmatchtype = 0; - } - - for (header = rbtnode->data; header != NULL; header = header_next) { - header_next = header->next; - if (!ACTIVE(header, now)) { - if ((header->ttl + STALE_TTL(header, rbtdb) < - now - RBTDB_VIRTUAL) && - (nlocktype == isc_rwlocktype_write || - NODE_TRYUPGRADE(lock, &nlocktype) == - ISC_R_SUCCESS)) - { - /* - * We update the node's status only when we - * can get write access. - */ - - /* - * We don't check if refcurrent(rbtnode) == 0 - * and try to free like we do in cache_find(), - * because refcurrent(rbtnode) must be - * non-zero. This is so because 'node' is an - * argument to the function. - */ - mark_header_ancient(header); - } - } else if (EXISTS(header) && !ANCIENT(header)) { - if (header->type == matchtype) { - found = header; - } else if (header->type == RBTDB_RDATATYPE_NCACHEANY || - header->type == negtype) - { - found = header; - } else if (header->type == sigmatchtype) { - foundsig = header; - } - } - } - if (found != NULL) { - dns__rbtdb_bindrdataset(rbtdb, rbtnode, found, now, nlocktype, - rdataset DNS__DB_FLARG_PASS); - if (!NEGATIVE(found) && foundsig != NULL) { - dns__rbtdb_bindrdataset(rbtdb, rbtnode, foundsig, now, - nlocktype, - sigrdataset DNS__DB_FLARG_PASS); - } - } - - NODE_UNLOCK(lock, &nlocktype); - - if (found == NULL) { - return (ISC_R_NOTFOUND); - } - - if (NEGATIVE(found)) { - /* - * We found a negative cache entry. - */ - if (NXDOMAIN(found)) { - result = DNS_R_NCACHENXDOMAIN; - } else { - result = DNS_R_NCACHENXRRSET; - } - } - - update_cachestats(rbtdb, result); - - return (result); -} - isc_result_t dns__rbtdb_allrdatasets(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version, unsigned int options, @@ -3748,9 +2472,6 @@ update_recordsandxfrsize(bool add, dns_rbtdb_version_t *rbtversion, RWUNLOCK(&rbtversion->rwlock, isc_rwlocktype_write); } -/* - * write lock on rbtnode must be held. - */ isc_result_t dns__rbtdb_add(dns_rbtdb_t *rbtdb, dns_rbtnode_t *rbtnode, const dns_name_t *nodename, dns_rbtdb_version_t *rbtversion, @@ -3769,14 +2490,6 @@ dns__rbtdb_add(dns_rbtdb_t *rbtdb, dns_rbtnode_t *rbtnode, dns_trust_t trust; int idx; - /* - * Add an dns_slabheader_t to a node. - */ - - /* - * Caller must be holding the node lock. - */ - if ((options & DNS_DBADD_MERGE) != 0) { REQUIRE(rbtversion != NULL); merge = true; @@ -3826,8 +2539,7 @@ dns__rbtdb_add(dns_rbtdb_t *rbtdb, dns_rbtnode_t *rbtnode, topheader != NULL; topheader = topheader->next) { - set_ttl(topheader, 0); - mark_header_ancient(topheader); + mark_ancient(topheader); } goto find_header; } @@ -3890,8 +2602,7 @@ dns__rbtdb_add(dns_rbtdb_t *rbtdb, dns_rbtnode_t *rbtnode, * The new rdataset is better. Expire the * ncache entry. */ - set_ttl(topheader, 0); - mark_header_ancient(topheader); + mark_ancient(topheader); topheader = NULL; goto find_header; } @@ -4042,7 +2753,7 @@ find_header: * older one. */ if (header->ttl > newheader->ttl) { - set_ttl(header, newheader->ttl); + dns__rbtdb_setttl(header, newheader->ttl); } if (header->noqname == NULL && newheader->noqname != NULL) @@ -4096,7 +2807,7 @@ find_header: * older one. */ if (header->ttl > newheader->ttl) { - set_ttl(header, newheader->ttl); + dns__rbtdb_setttl(header, newheader->ttl); } if (header->noqname == NULL && newheader->noqname != NULL) @@ -4194,11 +2905,9 @@ find_header: changed->dirty = true; } if (rbtversion == NULL) { - set_ttl(header, 0); - mark_header_ancient(header); + mark_ancient(header); if (sigheader != NULL) { - set_ttl(sigheader, 0); - mark_header_ancient(sigheader); + mark_ancient(sigheader); } } if (rbtversion != NULL && !header_nx) { @@ -4389,16 +3098,6 @@ cleanup: return (result); } -static size_t -rdataset_size(dns_slabheader_t *header) { - if (!NONEXISTENT(header)) { - return (dns_rdataslab_size((unsigned char *)header, - sizeof(*header))); - } - - return (sizeof(*header)); -} - isc_result_t dns__rbtdb_addrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version, isc_stdtime_t now, @@ -4468,7 +3167,7 @@ dns__rbtdb_addrdataset(dns_db_t *db, dns_dbnode_t *node, }; dns_slabheader_reset(newheader, db, node); - set_ttl(newheader, rdataset->ttl + now); + dns__rbtdb_setttl(newheader, rdataset->ttl + now); if (rdataset->ttl == 0U) { DNS_SLABHEADER_SETATTR(newheader, DNS_SLABHEADERATTR_ZEROTTL); } @@ -4562,8 +3261,8 @@ dns__rbtdb_addrdataset(dns_db_t *db, dns_dbnode_t *node, } if (cache_is_overmem) { - overmem_purge(rbtdb, rbtnode->locknum, rdataset_size(newheader), - &tlocktype DNS__DB_FLARG_PASS); + dns__cachedb_overmem(rbtdb, newheader, rbtnode->locknum, + &tlocktype DNS__DB_FLARG_PASS); } NODE_WRLOCK(&rbtdb->node_locks[rbtnode->locknum].lock, &nlocktype); @@ -4582,19 +3281,12 @@ dns__rbtdb_addrdataset(dns_db_t *db, dns_dbnode_t *node, } header = isc_heap_element(rbtdb->heaps[rbtnode->locknum], 1); - if (header != NULL) { - dns_ttl_t ttl = header->ttl; - - /* Only account for stale TTL if cache is not overmem */ - if (!cache_is_overmem) { - ttl += STALE_TTL(header, rbtdb); - } - - if (ttl < now - RBTDB_VIRTUAL) { - dns__rbtdb_expireheader( - header, &tlocktype, - dns_expire_ttl DNS__DB_FLARG_PASS); - } + if (header != NULL && header->ttl + STALE_TTL(header, rbtdb) < + now - RBTDB_VIRTUAL) + { + dns__cachedb_expireheader( + header, &tlocktype, + dns_expire_ttl DNS__DB_FLARG_PASS); } /* @@ -4693,7 +3385,7 @@ dns__rbtdb_subtractrdataset(dns_db_t *db, dns_dbnode_t *node, newheader = (dns_slabheader_t *)region.base; dns_slabheader_reset(newheader, db, node); - set_ttl(newheader, rdataset->ttl); + dns__rbtdb_setttl(newheader, rdataset->ttl); newheader->type = DNS_TYPEPAIR_VALUE(rdataset->type, rdataset->covers); atomic_init(&newheader->attributes, 0); newheader->serial = rbtversion->serial; @@ -4796,7 +3488,7 @@ dns__rbtdb_subtractrdataset(dns_db_t *db, dns_dbnode_t *node, dns_slabheader_destroy(&newheader); newheader = dns_slabheader_new((dns_db_t *)rbtdb, (dns_dbnode_t *)rbtnode); - set_ttl(newheader, 0); + dns__rbtdb_setttl(newheader, 0); newheader->type = topheader->type; atomic_init(&newheader->attributes, DNS_SLABHEADERATTR_NONEXISTENT); @@ -4894,7 +3586,7 @@ dns__rbtdb_deleterdataset(dns_db_t *db, dns_dbnode_t *node, newheader = dns_slabheader_new(db, node); newheader->type = DNS_TYPEPAIR_VALUE(type, covers); - set_ttl(newheader, 0); + dns__rbtdb_setttl(newheader, 0); atomic_init(&newheader->attributes, DNS_SLABHEADERATTR_NONEXISTENT); if (rbtversion != NULL) { newheader->serial = rbtversion->serial; @@ -4969,23 +3661,6 @@ dns__rbtdb_nodecount(dns_db_t *db, dns_dbtree_t tree) { return (count); } -static size_t -hashsize(dns_db_t *db) { - dns_rbtdb_t *rbtdb = NULL; - size_t size; - isc_rwlocktype_t tlocktype = isc_rwlocktype_none; - - rbtdb = (dns_rbtdb_t *)db; - - REQUIRE(VALID_RBTDB(rbtdb)); - - TREE_RDLOCK(&rbtdb->tree_lock, &tlocktype); - size = dns_rbt_hashsize(rbtdb->tree); - TREE_UNLOCK(&rbtdb->tree_lock, &tlocktype); - - return (size); -} - void dns__rbtdb_setloop(dns_db_t *db, isc_loop_t *loop) { dns_rbtdb_t *rbtdb = NULL; @@ -5016,8 +3691,8 @@ dns__rbtdb_getoriginnode(dns_db_t *db, dns_dbnode_t **nodep DNS__DB_FLARG) { /* Note that the access to origin_node doesn't require a DB lock */ onode = (dns_rbtnode_t *)rbtdb->origin_node; if (onode != NULL) { - new_reference(rbtdb, onode, - isc_rwlocktype_none DNS__DB_FLARG_PASS); + dns__rbtdb_newref(rbtdb, onode, + isc_rwlocktype_none DNS__DB_FLARG_PASS); *nodep = rbtdb->origin_node; } else { INSIST(IS_CACHE(rbtdb)); @@ -5043,28 +3718,6 @@ dns__rbtdb_unlocknode(dns_db_t *db, dns_dbnode_t *node, isc_rwlocktype_t type) { RWUNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock, type); } -static isc_result_t -setcachestats(dns_db_t *db, isc_stats_t *stats) { - dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; - - REQUIRE(VALID_RBTDB(rbtdb)); - REQUIRE(IS_CACHE(rbtdb)); /* current restriction */ - REQUIRE(stats != NULL); - - isc_stats_attach(stats, &rbtdb->cachestats); - return (ISC_R_SUCCESS); -} - -static dns_stats_t * -getrrsetstats(dns_db_t *db) { - dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; - - REQUIRE(VALID_RBTDB(rbtdb)); - REQUIRE(IS_CACHE(rbtdb)); /* current restriction */ - - return (rbtdb->rrsetstats); -} - isc_result_t dns__rbtdb_nodefullname(dns_db_t *db, dns_dbnode_t *node, dns_name_t *name) { dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; @@ -5083,128 +3736,6 @@ dns__rbtdb_nodefullname(dns_db_t *db, dns_dbnode_t *node, dns_name_t *name) { return (result); } -static isc_result_t -setservestalettl(dns_db_t *db, dns_ttl_t ttl) { - dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; - - REQUIRE(VALID_RBTDB(rbtdb)); - REQUIRE(IS_CACHE(rbtdb)); - - /* currently no bounds checking. 0 means disable. */ - rbtdb->common.serve_stale_ttl = ttl; - return (ISC_R_SUCCESS); -} - -static isc_result_t -getservestalettl(dns_db_t *db, dns_ttl_t *ttl) { - dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; - - REQUIRE(VALID_RBTDB(rbtdb)); - REQUIRE(IS_CACHE(rbtdb)); - - *ttl = rbtdb->common.serve_stale_ttl; - return (ISC_R_SUCCESS); -} - -static isc_result_t -setservestalerefresh(dns_db_t *db, uint32_t interval) { - dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; - - REQUIRE(VALID_RBTDB(rbtdb)); - REQUIRE(IS_CACHE(rbtdb)); - - /* currently no bounds checking. 0 means disable. */ - rbtdb->serve_stale_refresh = interval; - return (ISC_R_SUCCESS); -} - -static isc_result_t -getservestalerefresh(dns_db_t *db, uint32_t *interval) { - dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; - - REQUIRE(VALID_RBTDB(rbtdb)); - REQUIRE(IS_CACHE(rbtdb)); - - *interval = rbtdb->serve_stale_refresh; - return (ISC_R_SUCCESS); -} - -static void -expiredata(dns_db_t *db, dns_dbnode_t *node, void *data) { - dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; - dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node; - dns_slabheader_t *header = data; - isc_rwlocktype_t nlocktype = isc_rwlocktype_none; - isc_rwlocktype_t tlocktype = isc_rwlocktype_none; - - NODE_WRLOCK(&rbtdb->node_locks[rbtnode->locknum].lock, &nlocktype); - dns__rbtdb_expireheader(header, &tlocktype, - dns_expire_flush DNS__DB_FLARG_PASS); - NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock, &nlocktype); - INSIST(tlocktype == isc_rwlocktype_none); -} - -static void -deletedata(dns_db_t *db ISC_ATTR_UNUSED, dns_dbnode_t *node ISC_ATTR_UNUSED, - void *data) { - dns_slabheader_t *header = data; - dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)header->db; - - update_rrsetstats(rbtdb->rrsetstats, header->type, - atomic_load_acquire(&header->attributes), false); - - if (ISC_LINK_LINKED(header, link)) { - int idx = HEADER_NODE(header)->locknum; - INSIST(IS_CACHE(rbtdb)); - ISC_LIST_UNLINK(rbtdb->lru[idx], header, link); - } - - if (header->heap != NULL && header->heap_index != 0) { - isc_heap_delete(header->heap, header->heap_index); - } - header->heap_index = 0; - - if (header->noqname != NULL) { - free_proof(db->mctx, &header->noqname); - } - if (header->closest != NULL) { - free_proof(db->mctx, &header->closest); - } -} - -dns_dbmethods_t dns__rbtdb_cachemethods = { - .destroy = dns__rbtdb_destroy, - .currentversion = dns__rbtdb_currentversion, - .newversion = dns__rbtdb_newversion, - .attachversion = dns__rbtdb_attachversion, - .closeversion = dns__rbtdb_closeversion, - .findnode = dns__rbtdb_findnode, - .find = cache_find, - .findzonecut = cache_findzonecut, - .attachnode = dns__rbtdb_attachnode, - .detachnode = dns__rbtdb_detachnode, - .createiterator = dns__rbtdb_createiterator, - .findrdataset = cache_findrdataset, - .allrdatasets = dns__rbtdb_allrdatasets, - .addrdataset = dns__rbtdb_addrdataset, - .subtractrdataset = dns__rbtdb_subtractrdataset, - .deleterdataset = dns__rbtdb_deleterdataset, - .nodecount = dns__rbtdb_nodecount, - .setloop = dns__rbtdb_setloop, - .getoriginnode = dns__rbtdb_getoriginnode, - .getrrsetstats = getrrsetstats, - .setcachestats = setcachestats, - .hashsize = hashsize, - .setservestalettl = setservestalettl, - .getservestalettl = getservestalettl, - .setservestalerefresh = setservestalerefresh, - .getservestalerefresh = getservestalerefresh, - .locknode = dns__rbtdb_locknode, - .unlocknode = dns__rbtdb_unlocknode, - .expiredata = expiredata, - .deletedata = deletedata, -}; - isc_result_t dns__rbtdb_create(isc_mem_t *mctx, const dns_name_t *origin, dns_dbtype_t type, dns_rdataclass_t rdclass, unsigned int argc, char *argv[], @@ -6050,7 +4581,7 @@ dbiterator_current(dns_dbiterator_t *iterator, dns_dbnode_t **nodep, result = ISC_R_SUCCESS; } - new_reference(rbtdb, node, isc_rwlocktype_none DNS__DB_FLARG_PASS); + dns__rbtdb_newref(rbtdb, node, isc_rwlocktype_none DNS__DB_FLARG_PASS); *nodep = rbtdbiter->node; @@ -6155,181 +4686,54 @@ free_gluetable(dns_rbtdb_version_t *rbtversion) { rcu_read_unlock(); } -/*% - * Routines for LRU-based cache management. - */ - -/*% - * See if a given cache entry that is being reused needs to be updated - * in the LRU-list. From the LRU management point of view, this function is - * expected to return true for almost all cases. When used with threads, - * however, this may cause a non-negligible performance penalty because a - * writer lock will have to be acquired before updating the list. - * If DNS_RBTDB_LIMITLRUUPDATE is defined to be non 0 at compilation time, this - * function returns true if the entry has not been updated for some period of - * time. We differentiate the NS or glue address case and the others since - * experiments have shown that the former tends to be accessed relatively - * infrequently and the cost of cache miss is higher (e.g., a missing NS records - * may cause external queries at a higher level zone, involving more - * transactions). - * - * Caller must hold the node (read or write) lock. - */ -static bool -need_headerupdate(dns_slabheader_t *header, isc_stdtime_t now) { - if (DNS_SLABHEADER_GETATTR(header, (DNS_SLABHEADERATTR_NONEXISTENT | - DNS_SLABHEADERATTR_ANCIENT | - DNS_SLABHEADERATTR_ZEROTTL)) != 0) - { - return (false); - } - -#if DNS_RBTDB_LIMITLRUUPDATE - if (header->type == dns_rdatatype_ns || - (header->trust == dns_trust_glue && - (header->type == dns_rdatatype_a || - header->type == dns_rdatatype_aaaa))) - { - /* - * Glue records are updated if at least DNS_RBTDB_LRUUPDATE_GLUE - * seconds have passed since the previous update time. - */ - return (header->last_used + DNS_RBTDB_LRUUPDATE_GLUE <= now); - } - - /* - * Other records are updated if DNS_RBTDB_LRUUPDATE_REGULAR seconds - * have passed. - */ - return (header->last_used + DNS_RBTDB_LRUUPDATE_REGULAR <= now); -#else - UNUSED(now); - - return (true); -#endif /* if DNS_RBTDB_LIMITLRUUPDATE */ -} - -/*% - * Update the timestamp of a given cache entry and move it to the head - * of the corresponding LRU list. - * - * Caller must hold the node (write) lock. - * - * Note that the we do NOT touch the heap here, as the TTL has not changed. - */ static void -update_header(dns_rbtdb_t *rbtdb, dns_slabheader_t *header, isc_stdtime_t now) { - INSIST(IS_CACHE(rbtdb)); - - /* To be checked: can we really assume this? XXXMLG */ - INSIST(ISC_LINK_LINKED(header, link)); - - ISC_LIST_UNLINK(rbtdb->lru[HEADER_NODE(header)->locknum], header, link); - header->last_used = now; - ISC_LIST_PREPEND(rbtdb->lru[HEADER_NODE(header)->locknum], header, - link); -} - -static size_t -expire_lru_headers(dns_rbtdb_t *rbtdb, unsigned int locknum, - isc_rwlocktype_t *tlocktypep, - size_t purgesize DNS__DB_FLARG) { - dns_slabheader_t *header = NULL, *header_prev = NULL; - size_t purged = 0; - - for (header = ISC_LIST_TAIL(rbtdb->lru[locknum]); - header != NULL && purged <= purgesize; header = header_prev) - { - size_t header_size = rdataset_size(header); - header_prev = ISC_LIST_PREV(header, link); - - /* - * Unlink the entry at this point to avoid checking it - * again even if it's currently used someone else and - * cannot be purged at this moment. This entry won't be - * referenced any more (so unlinking is safe) since the - * TTL was reset to 0. - */ - ISC_LIST_UNLINK(rbtdb->lru[locknum], header, link); - dns__rbtdb_expireheader(header, tlocktypep, - dns_expire_lru DNS__DB_FLARG_PASS); - purged += header_size; +free_proof(isc_mem_t *mctx, dns_proof_t **noqname) { + if (dns_name_dynamic(&(*noqname)->name)) { + dns_name_free(&(*noqname)->name, mctx); } - - return (purged); -} - -/*% - * Purge some expired and/or stale (i.e. unused for some period) cache entries - * under an overmem condition. To recover from this condition quickly, - * we cleanup entries up to the size of newly added rdata (passed as purgesize). - * - * This process is triggered while adding a new entry, and we specifically - * avoid purging entries in the same LRU bucket as the one to which the new - * entry will belong. Otherwise, we might purge entries of the same name - * of different RR types while adding RRsets from a single response - * (consider the case where we're adding A and AAAA glue records of the - * same NS name). - */ -static void -overmem_purge(dns_rbtdb_t *rbtdb, unsigned int locknum_start, size_t purgesize, - isc_rwlocktype_t *tlocktypep DNS__DB_FLARG) { - unsigned int locknum; - size_t purged = 0; - - for (locknum = (locknum_start + 1) % rbtdb->node_lock_count; - locknum != locknum_start && purged <= purgesize; - locknum = (locknum + 1) % rbtdb->node_lock_count) - { - isc_rwlocktype_t nlocktype = isc_rwlocktype_none; - NODE_WRLOCK(&rbtdb->node_locks[locknum].lock, &nlocktype); - - purged += expire_lru_headers(rbtdb, locknum, tlocktypep, - purgesize - - purged DNS__DB_FLARG_PASS); - - NODE_UNLOCK(&rbtdb->node_locks[locknum].lock, &nlocktype); + if ((*noqname)->neg != NULL) { + isc_mem_put(mctx, (*noqname)->neg, + dns_rdataslab_size((*noqname)->neg, 0)); } + if ((*noqname)->negsig != NULL) { + isc_mem_put(mctx, (*noqname)->negsig, + dns_rdataslab_size((*noqname)->negsig, 0)); + } + isc_mem_put(mctx, *noqname, sizeof(**noqname)); + *noqname = NULL; } -/* - * Caller must hold the node (write) lock. - */ void -dns__rbtdb_expireheader(dns_slabheader_t *header, isc_rwlocktype_t *tlocktypep, - dns_expire_t reason DNS__DB_FLARG) { - set_ttl(header, 0); - mark_header_ancient(header); +dns__rbtdb_deletedata(dns_db_t *db ISC_ATTR_UNUSED, + dns_dbnode_t *node ISC_ATTR_UNUSED, void *data) { + dns_slabheader_t *header = data; + dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)header->db; - if (isc_refcount_current(&HEADER_NODE(header)->references) == 0) { - isc_rwlocktype_t nlocktype = isc_rwlocktype_write; - dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)header->db; + if (header->heap != NULL && header->heap_index != 0) { + isc_heap_delete(header->heap, header->heap_index); + } + header->heap_index = 0; - /* - * If no one else is using the node, we can clean it up now. - * We first need to gain a new reference to the node to meet a - * requirement of dns__rbtdb_decref(). - */ - new_reference(rbtdb, HEADER_NODE(header), - nlocktype DNS__DB_FLARG_PASS); - dns__rbtdb_decref(rbtdb, HEADER_NODE(header), 0, &nlocktype, - tlocktypep, true, false DNS__DB_FLARG_PASS); + if (IS_CACHE(rbtdb)) { + update_rrsetstats(rbtdb->rrsetstats, header->type, + atomic_load_acquire(&header->attributes), + false); - if (rbtdb->cachestats == NULL) { - return; + if (ISC_LINK_LINKED(header, link)) { + int idx = HEADER_NODE(header)->locknum; + INSIST(IS_CACHE(rbtdb)); + ISC_LIST_UNLINK(rbtdb->lru[idx], header, link); } - switch (reason) { - case dns_expire_ttl: - isc_stats_increment(rbtdb->cachestats, - dns_cachestatscounter_deletettl); - break; - case dns_expire_lru: - isc_stats_increment(rbtdb->cachestats, - dns_cachestatscounter_deletelru); - break; - default: - break; + if (header->noqname != NULL) { + free_proof(db->mctx, &header->noqname); + } + if (header->closest != NULL) { + free_proof(db->mctx, &header->closest); + } + } else { + if (header->glue_list) { + dns__rbtdb_freeglue(header->glue_list); } } } diff --git a/lib/dns/rbtdb_p.h b/lib/dns/rbtdb_p.h index f7bebfc988..ddc98fe7f2 100644 --- a/lib/dns/rbtdb_p.h +++ b/lib/dns/rbtdb_p.h @@ -148,6 +148,12 @@ #define IS_STUB(rbtdb) (((rbtdb)->common.attributes & DNS_DBATTR_STUB) != 0) #define IS_CACHE(rbtdb) (((rbtdb)->common.attributes & DNS_DBATTR_CACHE) != 0) +/* + * Allow clients with a virtual time of up to 5 minutes in the past to see + * records that would have otherwise have expired. + */ +#define RBTDB_VIRTUAL 300 + /***** ***** Module Info *****/ @@ -336,11 +342,15 @@ typedef struct { extern dns_dbmethods_t dns__rbtdb_zonemethods; extern dns_dbmethods_t dns__rbtdb_cachemethods; +/* + * Common DB implementation methods shared by both cache and zone RBT + * databases: + */ + isc_result_t dns__rbtdb_create(isc_mem_t *mctx, const dns_name_t *base, dns_dbtype_t type, dns_rdataclass_t rdclass, unsigned int argc, char *argv[], void *driverarg, dns_db_t **dbp); - /*%< * Create a new database of type "rbt". Called via dns_db_create(); * see documentation for that function for more details. @@ -356,20 +366,26 @@ dns__rbtdb_create(isc_mem_t *mctx, const dns_name_t *base, dns_dbtype_t type, void dns__rbtdb_destroy(dns_db_t *arg); +/*%< + * Implement dns_db_destroy() for RBT databases, see documentation + * for that function for more details. + */ void dns__rbtdb_currentversion(dns_db_t *db, dns_dbversion_t **versionp); - isc_result_t dns__rbtdb_newversion(dns_db_t *db, dns_dbversion_t **versionp); - void dns__rbtdb_attachversion(dns_db_t *db, dns_dbversion_t *source, dns_dbversion_t **targetp); - void dns__rbtdb_closeversion(dns_db_t *db, dns_dbversion_t **versionp, bool commit DNS__DB_FLARG); +/*%< + * Implement the dns_db_currentversion(), _newversion(), + * _attachversion() and _closeversion() methods for RBT databases; + * see documentation of those functions for more details. + */ isc_result_t dns__rbtdb_findnode(dns_db_t *db, const dns_name_t *name, bool create, @@ -378,62 +394,106 @@ isc_result_t dns__rbtdb_findnodeintree(dns_rbtdb_t *rbtdb, dns_rbt_t *tree, const dns_name_t *name, bool create, dns_dbnode_t **nodep DNS__DB_FLARG); +/*%< + * Implement the dns_db_findnode() and _findnodeintree() methods for + * RBT databases; see documentation of those functions for more details. + */ + void dns__rbtdb_attachnode(dns_db_t *db, dns_dbnode_t *source, dns_dbnode_t **targetp DNS__DB_FLARG); void dns__rbtdb_detachnode(dns_db_t *db, dns_dbnode_t **targetp DNS__DB_FLARG); +/*%< + * Implement the dns_db_attachnode() and _detachnode() methods for + * RBT databases; see documentation of those functions for more details. + */ isc_result_t dns__rbtdb_createiterator(dns_db_t *db, unsigned int options, dns_dbiterator_t **iteratorp); +/*%< + * Implement dns_db_createiterator() for RBT databases; see documentation of + * that function for more details. + */ isc_result_t dns__rbtdb_allrdatasets(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version, unsigned int options, isc_stdtime_t now, dns_rdatasetiter_t **iteratorp DNS__DB_FLARG); - +/*%< + * Implement dns_db_allrdatasets() for RBT databases; see documentation of + * that function for more details. + */ isc_result_t dns__rbtdb_addrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version, isc_stdtime_t now, dns_rdataset_t *rdataset, unsigned int options, dns_rdataset_t *addedrdataset DNS__DB_FLARG); - isc_result_t dns__rbtdb_subtractrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version, dns_rdataset_t *rdataset, unsigned int options, dns_rdataset_t *newrdataset DNS__DB_FLARG); - isc_result_t dns__rbtdb_deleterdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version, dns_rdatatype_t type, dns_rdatatype_t covers DNS__DB_FLARG); +/*%< + * Implement the dns_db_addrdataset(), _subtractrdataset() and + * _deleterdataset() methods for RBT databases; see documentation of + * those functions for more details. + */ unsigned int dns__rbtdb_nodecount(dns_db_t *db, dns_dbtree_t tree); +/*%< + * Implement dns_db_nodecount() for RBT databases; see documentation of + * that function for more details. + */ void dns__rbtdb_setloop(dns_db_t *db, isc_loop_t *loop); +/*%< + * Implement dns_db_setloop() for RBT databases; see documentation of + * that function for more details. + */ isc_result_t dns__rbtdb_getoriginnode(dns_db_t *db, dns_dbnode_t **nodep DNS__DB_FLARG); +/*%< + * Implement dns_db_getoriginnode() for RBT databases; see documentation of + * that function for more details. + */ void -dns__rbtdb_bindrdataset(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node, - dns_slabheader_t *header, isc_stdtime_t now, - isc_rwlocktype_t locktype, - dns_rdataset_t *rdataset DNS__DB_FLARG); - -void -dns__rbtdb_expireheader(dns_slabheader_t *header, isc_rwlocktype_t *tlocktypep, - dns_expire_t reason DNS__DB_FLARG); +dns__rbtdb_deletedata(dns_db_t *db ISC_ATTR_UNUSED, + dns_dbnode_t *node ISC_ATTR_UNUSED, void *data); +/*%< + * Implement dns_db_deletedata() for RBT databases; see documentation of + * that function for more details. + */ void dns__rbtdb_locknode(dns_db_t *db, dns_dbnode_t *node, isc_rwlocktype_t type); void dns__rbtdb_unlocknode(dns_db_t *db, dns_dbnode_t *node, isc_rwlocktype_t type); +/*%< + * Implement the dns_db_locknode() and _unlocknode() methods for + * RBT databases; see documentation of those functions for more details. + */ + +/*% + * Functions used for the RBT implementation which are defined and + * used in rbtdb.c but may also be called from rbt-zonedb.c or + * rbt-cachedb.c: + */ +void +dns__rbtdb_bindrdataset(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node, + dns_slabheader_t *header, isc_stdtime_t now, + isc_rwlocktype_t locktype, + dns_rdataset_t *rdataset DNS__DB_FLARG); isc_result_t dns__rbtdb_nodefullname(dns_db_t *db, dns_dbnode_t *node, dns_name_t *name); @@ -441,21 +501,64 @@ dns__rbtdb_nodefullname(dns_db_t *db, dns_dbnode_t *node, dns_name_t *name); void dns__rbtdb_freeglue(dns_glue_t *glue_list); +void +dns__rbtdb_newref(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node, + isc_rwlocktype_t locktype DNS__DB_FLARG); +/*%< + * Increment the reference counter to a node in an RBT database. + * If the caller holds a node lock then its lock type is specified + * as 'locktype'. If the node is write-locked, then the node can + * be removed from the dead nodes list. If not, the list can be + * cleaned up later. + */ + bool dns__rbtdb_decref(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node, uint32_t least_serial, isc_rwlocktype_t *nlocktypep, isc_rwlocktype_t *tlocktypep, bool tryupgrade, bool pruning DNS__DB_FLARG); +/*%< + * Decrement the reference counter to a node in an RBT database. + * 'nlocktypep' and 'tlocktypep' are pointers to the current status + * of the node lock and tree lock. + * + * If references go to 0, the node will be cleaned up, which may + * necessitate upgrading the locks. + */ isc_result_t dns__rbtdb_add(dns_rbtdb_t *rbtdb, dns_rbtnode_t *rbtnode, const dns_name_t *nodename, dns_rbtdb_version_t *rbtversion, dns_slabheader_t *newheader, unsigned int options, bool loading, dns_rdataset_t *addedrdataset, isc_stdtime_t now DNS__DB_FLARG); +/*%< + * Add a slab header 'newheader' to a node in an RBT database. + * The caller must have the node write-locked. + */ void dns__rbtdb_setsecure(dns_db_t *db, dns_rbtdb_version_t *version, dns_dbnode_t *origin); +/*%< + * Update the secure status for an RBT database version 'version'. + * The version will be marked secure if it is fully signed and + * and contains a complete NSEC/NSEC3 chain. + */ + +void +dns__rbtdb_mark(dns_slabheader_t *header, uint_least16_t flag); +/*%< + * Set attribute 'flag' in a slab header 'header' - for example, + * DNS_SLABHEADERATTR_STALE or DNS_SLABHEADERATTR_ANCIENT - and, + * in a cache database, update the rrset stats accordingly. + */ + +void +dns__rbtdb_setttl(dns_slabheader_t *header, dns_ttl_t newttl); +/*%< + * Set the TTL in a slab header 'header'. In a cache database, + * also update the TTL heap accordingly. + */ /* * Functions specific to zone databases that are also called from rbtdb.c. @@ -466,12 +569,45 @@ dns__zonedb_resigninsert(dns_rbtdb_t *rbtdb, int idx, void dns__zonedb_resigndelete(dns_rbtdb_t *rbtdb, dns_rbtdb_version_t *version, dns_slabheader_t *header DNS__DB_FLARG); +/*%< + * Insert/delete a node from the zone database's resigning heap. + */ isc_result_t dns__zonedb_wildcardmagic(dns_rbtdb_t *rbtdb, const dns_name_t *name, bool lock); - +/*%< + * Add the necessary magic for the wildcard name 'name' + * to be found in 'rbtdb'. + * + * In order for wildcard matching to work correctly in + * zone_find(), we must ensure that a node for the wildcarding + * level exists in the database, and has its 'find_callback' + * and 'wild' bits set. + * + * E.g. if the wildcard name is "*.sub.example." then we + * must ensure that "sub.example." exists and is marked as + * a wildcard level. + * + * The tree must be write-locked. + */ isc_result_t dns__zonedb_addwildcards(dns_rbtdb_t *rbtdb, const dns_name_t *name, bool lock); +/*%< + * If 'name' is or contains a wildcard name, create a node for it in the + * database. The tree must be write-locked. + */ + +/* + * Cache-specific functions that are called from rbtdb.c + */ +void +dns__cachedb_expireheader(dns_slabheader_t *header, + isc_rwlocktype_t *tlocktypep, + dns_expire_t reason DNS__DB_FLARG); +void +dns__cachedb_overmem(dns_rbtdb_t *rbtdb, dns_slabheader_t *newheader, + unsigned int locknum_start, + isc_rwlocktype_t *tlocktypep DNS__DB_FLARG); ISC_LANG_ENDDECLS