From 21faf44ef7754e6f482786ca7433d3fa57c0a222 Mon Sep 17 00:00:00 2001 From: Aram Sargsyan Date: Thu, 17 Nov 2022 13:48:36 +0000 Subject: [PATCH 1/3] Add serve-stale CNAME check with stale-answer-client-timeout off Prime the cache with the following records: shortttl.cname.example. 1 IN CNAME longttl.target.example. longttl.target.example. 600 IN A 10.53.0.2 Wait for the CNAME record to expire, disable the authoritative server, and query 'shortttl.cname.example' again, expecting a stale answer. --- bin/tests/system/serve-stale/ans2/ans.pl | 24 ++++++++ bin/tests/system/serve-stale/tests.sh | 74 ++++++++++++++++++++++++ 2 files changed, 98 insertions(+) diff --git a/bin/tests/system/serve-stale/ans2/ans.pl b/bin/tests/system/serve-stale/ans2/ans.pl index 0af055a4fb..f14ae67fe8 100644 --- a/bin/tests/system/serve-stale/ans2/ans.pl +++ b/bin/tests/system/serve-stale/ans2/ans.pl @@ -58,6 +58,8 @@ my $CAA = "othertype.example 2 IN CAA 0 issue \"ca1.example.net\""; my $negSOA = "example 2 IN SOA . . 0 0 0 0 300"; my $CNAME = "cname.example 7 IN CNAME target.example"; my $TARGET = "target.example 9 IN A $localaddr"; +my $SHORTCNAME = "shortttl.cname.example 1 IN CNAME longttl.target.example"; +my $LONGTARGET = "longttl.target.example 600 IN A $localaddr"; sub reply_handler { my ($qname, $qclass, $qtype) = @_; @@ -166,6 +168,28 @@ sub reply_handler { push @auth, $rr; } $rcode = "NOERROR"; + } elsif ($qname eq "shortttl.cname.example") { + if ($qtype eq "A") { + my $rr = new Net::DNS::RR($SHORTCNAME); + push @ans, $rr; + } else { + my $rr = new Net::DNS::RR($negSOA); + push @auth, $rr; + } + $rcode = "NOERROR"; + } elsif ($qname eq "longttl.target.example") { + if ($slow_response) { + print " Sleeping 3 seconds\n"; + sleep(3); + } + if ($qtype eq "A") { + my $rr = new Net::DNS::RR($LONGTARGET); + push @ans, $rr; + } else { + my $rr = new Net::DNS::RR($negSOA); + push @auth, $rr; + } + $rcode = "NOERROR"; } elsif ($qname eq "longttl.example") { if ($qtype eq "TXT") { my $rr = new Net::DNS::RR($LONGTXT); diff --git a/bin/tests/system/serve-stale/tests.sh b/bin/tests/system/serve-stale/tests.sh index b64fc5b48c..63a87a3d58 100755 --- a/bin/tests/system/serve-stale/tests.sh +++ b/bin/tests/system/serve-stale/tests.sh @@ -1829,6 +1829,80 @@ grep "data\.example\..*[12].*IN.*TXT.*A text record with a 2 second ttl" dig.out if [ $ret != 0 ]; then echo_i "failed"; fi status=$((status+ret)) +############################################################## +# Test for stale-answer-client-timeout off and CNAME record. # +############################################################## +echo_i "test stale-answer-client-timeout (0) and CNAME record" + +n=$((n+1)) +echo_i "prime cache shortttl.cname.example (stale-answer-client-timeout off) ($n)" +ret=0 +$DIG -p ${PORT} @10.53.0.3 shortttl.cname.example A > dig.out.test$n +grep "status: NOERROR" dig.out.test$n > /dev/null || ret=1 +grep "ANSWER: 2," dig.out.test$n > /dev/null || ret=1 +grep "shortttl\.cname\.example\..*1.*IN.*CNAME.*longttl\.target\.example\." dig.out.test$n > /dev/null || ret=1 +grep "longttl\.target\.example\..*600.*IN.*A.*10\.53\.0\.2" dig.out.test$n > /dev/null || ret=1 +if [ $ret != 0 ]; then echo_i "failed"; fi +status=$((status+ret)) + +# Allow RRset to become stale. +sleep 1 + +n=$((n+1)) +echo_i "disable responses from authoritative server ($n)" +ret=0 +$DIG -p ${PORT} @10.53.0.2 txt disable > dig.out.test$n +grep "ANSWER: 1," dig.out.test$n > /dev/null || ret=1 +grep "TXT.\"0\"" dig.out.test$n > /dev/null || ret=1 +if [ $ret != 0 ]; then echo_i "failed"; fi +status=$((status+ret)) + +n=$((n+1)) +ret=0 +echo_i "check stale shortttl.cname.example comes from cache (stale-answer-client-timeout off) ($n)" +nextpart ns3/named.run > /dev/null +$DIG -p ${PORT} @10.53.0.3 shortttl.cname.example A > dig.out.test$n +wait_for_log 5 "shortttl.cname.example A resolver failure, stale answer used" ns3/named.run || ret=1 +grep "status: NOERROR" dig.out.test$n > /dev/null || ret=1 +grep "EDE: 3 (Stale Answer): (resolver failure)" dig.out.test$n > /dev/null || ret=1 +grep "ANSWER: 2," dig.out.test$n > /dev/null || ret=1 +grep "shortttl\.cname\.example\..*3.*IN.*CNAME.*longttl\.target\.example\." dig.out.test$n > /dev/null || ret=1 +# We can't reliably test the TTL of the longttl.target.example A record. +grep "longttl\.target\.example\..*IN.*A.*10\.53\.0\.2" dig.out.test$n > /dev/null || ret=1 +if [ $ret != 0 ]; then echo_i "failed"; fi +status=$((status+ret)) + +n=$((n+1)) +echo_i "enable responses from authoritative server ($n)" +ret=0 +$DIG -p ${PORT} @10.53.0.2 txt enable > dig.out.test$n +grep "ANSWER: 1," dig.out.test$n > /dev/null || ret=1 +grep "TXT.\"1\"" dig.out.test$n > /dev/null || ret=1 +if [ $ret != 0 ]; then echo_i "failed"; fi +status=$((status+ret)) + +n=$((n+1)) +echo_i "check server is alive or restart ($n)" +ret=0 +$RNDCCMD 10.53.0.3 status > rndc.out.test$n 2>&1 || ret=1 +if [ $ret != 0 ]; then + echo_i "failed" + echo_i "restart ns3" + start_server --noclean --restart --port ${PORT} serve-stale ns3 +fi +status=$((status+ret)) + +n=$((n+1)) +echo_i "check server is alive or restart ($n)" +ret=0 +$RNDCCMD 10.53.0.3 status > rndc.out.test$n 2>&1 || ret=1 +if [ $ret != 0 ]; then + echo_i "failed" + echo_i "restart ns3" + start_server --noclean --restart --port ${PORT} serve-stale ns3 +fi +status=$((status+ret)) + ############################################# # Test for stale-answer-client-timeout 0. # ############################################# From 86a80e723fc7b6bdd87174f4a48aa702dbe17c5a Mon Sep 17 00:00:00 2001 From: Matthijs Mekking Date: Thu, 17 Nov 2022 13:52:26 +0000 Subject: [PATCH 2/3] Consider non-stale data when in serve-stale mode With 'stale-answer-enable yes;' and 'stale-answer-client-timeout off;', consider the following situation: A CNAME record and its target record are in the cache, then the CNAME record expires, but the target record is still valid. When a new query for the CNAME record arrives, and the query fails, the stale record is used, and then the query "restarts" to follow the CNAME target. The problem is that the query's multiple stale options (like DNS_DBFIND_STALEOK) are not reset, so 'query_lookup()' treats the restarted query as a lookup following a failed lookup, and returns a SERVFAIL answer when there is no stale data found in the cache, even if there is valid non-stale data there available. With this change, query_lookup() now considers non-stale data in the cache in the first place, and returns it if it is available. --- lib/ns/query.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/lib/ns/query.c b/lib/ns/query.c index ee89b62a17..0c3ee43e1a 100644 --- a/lib/ns/query.c +++ b/lib/ns/query.c @@ -5960,6 +5960,13 @@ query_lookup(query_ctx_t *qctx) { dns_cache_updatestats(qctx->view->cache, result); } + if (dns_rdataset_isassociated(qctx->rdataset) && + dns_rdataset_count(qctx->rdataset) > 0 && !STALE(qctx->rdataset)) + { + /* Found non-stale usable rdataset. */ + goto gotanswer; + } + /* * If DNS_DBFIND_STALEOK is set this means we are dealing with a * lookup following a failed lookup and it is okay to serve a stale @@ -6131,6 +6138,7 @@ query_lookup(query_ctx_t *qctx) { qctx->rdataset->attributes |= DNS_RDATASETATTR_STALE_ADDED; } +gotanswer: result = query_gotanswer(qctx, result); cleanup: From 5b5f3a0ea7acae4e1f92e3bf0c38608d505323d5 Mon Sep 17 00:00:00 2001 From: Aram Sargsyan Date: Thu, 17 Nov 2022 14:21:31 +0000 Subject: [PATCH 3/3] Add a CHANGES note for [GL #3678] --- CHANGES | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CHANGES b/CHANGES index c4caf9ef6b..acbbaf255a 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,8 @@ +6038. [bug] In some serve stale scenarios, like when following an + expired CNAME record, named could return SERVFAIL if the + previous request wasn't successful. Consider non-stale + data when in serve-stale mode. [GL #3678] + 6037. [func] Reject zones which have DS records not at delegation points. [GL #3697]