From 19cf15313ce87f4088f97ad25b2530d02df165ff Mon Sep 17 00:00:00 2001 From: Tony Finch Date: Tue, 12 Apr 2022 11:49:21 +0100 Subject: [PATCH] Avoid timeouts in the notify system test There were two problems in the notify system test when it waited for log messages to appear: the shellcheck refactoring introduced a call to `wait_for_log` with a regex, but `wait_for_log` only supports fixed strings, so it always ran for the full 45 second timeout; and the new test to ensure that notify messages time out failed to reset the nextpart pointer, so if the notify messages timed out before the test ran, it would fail to see them. This change adds a `wait_for_log_re` helper that matches a regex, and uses it where appropriate in the notify system test, which stops the test from waiting longer than necessary; and it resets the nextpart pointer so that the notify timeout test works reliably. Closes #3275 (cherry picked from commit 4a30733ae5b3c4523da0add509a17a7a1fa7878b) --- bin/tests/system/conf.sh.common | 21 +++++++++++++++++++-- bin/tests/system/notify/tests.sh | 18 +++++++----------- 2 files changed, 26 insertions(+), 13 deletions(-) diff --git a/bin/tests/system/conf.sh.common b/bin/tests/system/conf.sh.common index 4f6066e468..fd8647a8e4 100644 --- a/bin/tests/system/conf.sh.common +++ b/bin/tests/system/conf.sh.common @@ -496,6 +496,13 @@ _search_log() ( nextpart "$file" | grep -F -e "$msg" > /dev/null ) +# _search_log_re: same as _search_log but the message is an egrep regex +_search_log_re() ( + msg="$1" + file="$2" + nextpart "$file" | grep -E -e "$msg" > /dev/null +) + # _search_log_peek: look for message $1 in file $2 with nextpartpeek(). _search_log_peek() ( msg="$1" @@ -513,7 +520,17 @@ wait_for_log() ( msg="$2" file="$3" retry_quiet "$timeout" _search_log "$msg" "$file" && return 0 - echo_i "exceeded time limit waiting for '$msg' in $file" + echo_i "exceeded time limit waiting for literal '$msg' in $file" + return 1 +) + +# wait_for_log_re: same as wait_for_log, but the message is an egrep regex +wait_for_log_re() ( + timeout="$1" + msg="$2" + file="$3" + retry_quiet "$timeout" _search_log_re "$msg" "$file" && return 0 + echo_i "exceeded time limit waiting for regex '$msg' in $file" return 1 ) @@ -524,7 +541,7 @@ wait_for_log_peek() ( msg="$2" file="$3" retry_quiet "$timeout" _search_log_peek "$msg" "$file" && return 0 - echo_i "exceeded time limit waiting for '$msg' in $file" + echo_i "exceeded time limit waiting for literal '$msg' in $file" return 1 ) diff --git a/bin/tests/system/notify/tests.sh b/bin/tests/system/notify/tests.sh index 52d2f81a5b..04fd34b47f 100644 --- a/bin/tests/system/notify/tests.sh +++ b/bin/tests/system/notify/tests.sh @@ -105,15 +105,7 @@ rm -f ns2/example.db cp -f ns2/example2.db ns2/example.db echo_i "reloading with example2 using HUP and waiting up to 45 seconds" kill -HUP "$(cat ns2/named.pid)" - -try=0 -while [ $try -lt 45 ] -do - nextpart ns3/named.run > tmp - grep "transfer of 'example/IN' from 10.53.0.2#.*success" tmp > /dev/null && break - sleep 1 - try=$((try + 1)) -done +wait_for_log_re 45 "transfer of 'example/IN' from 10.53.0.2#.*success" ns3/named.run test_start "checking notify message was logged" grep 'notify from 10.53.0.2#[0-9][0-9]*: serial 2$' ns3/named.run > /dev/null || ret=1 @@ -143,7 +135,7 @@ rm -f ns2/example.db cp -f ns2/example4.db ns2/example.db start_server --noclean --restart --port "${PORT}" notify ns2 -wait_for_log 45 "transfer of 'example/IN' from 10.53.0.2#.*success" ns3/named.run +wait_for_log_re 45 "transfer of 'example/IN' from 10.53.0.2#.*success" ns3/named.run test_start "checking notify message was logged" grep 'notify from 10.53.0.2#[0-9][0-9]*: serial 4$' ns3/named.run > /dev/null || ret=1 @@ -208,10 +200,14 @@ grep "test string" "$fnb" > /dev/null || ret=1 grep "test string" "$fnc" > /dev/null || ret=1 grep "sending notify to 10.53.0.5#[0-9]* : TSIG (b)" ns5/named.run > /dev/null || ret=1 grep "sending notify to 10.53.0.5#[0-9]* : TSIG (c)" ns5/named.run > /dev/null || ret=1 - test_end +# notify messages were sent to unresponsive 10.53.10.53 during the tests +# above, which should time out at some point; we need to wait for them to +# appear in the logs in case the tests run faster than the notify timeouts + test_start "checking notify retries expire within 45 seconds ($n)" +nextpartreset ns3/named.run wait_for_log 45 'retries exceeded' ns3/named.run || ret=1 test_end