fix: dev: Fix isc_quota bug

Running jobs which were entered into the isc_quota queue is the
responsibility of the isc_quota_release() function, which, when
releasing a previously acquired quota, checks whether the queue
is empty, and if it's not, it runs a job from the queue without touching
the 'quota->used' counter. This mechanism is susceptible to a possible
hangup of a newly queued job in case when between the time a decision
has been made to queue it (because used >= max) and the time it was
actually queued, the last quota was released. Since there is no more
quotas to be released (unless arriving in the future), the newly
entered job will be stuck in the queue.

Fix the issue by adding checks in both isc_quota_release() and
isc_quota_acquire_cb() to make sure that the described hangup does
not happen. Also see code comments.

Closes #4965

Merge branch '4965-isc_quota-bug-fix' into 'main'

See merge request isc-projects/bind9!10082
This commit is contained in:
Arаm Sаrgsyаn
2025-02-20 12:19:46 +00:00
2 changed files with 41 additions and 7 deletions

View File

@@ -60,6 +60,8 @@
atomic_exchange_explicit((o), (v), memory_order_acquire)
#define atomic_exchange_acq_rel(o, v) \
atomic_exchange_explicit((o), (v), memory_order_acq_rel)
#define atomic_fetch_add_acq_rel(o, v) \
atomic_fetch_add_explicit((o), (v), memory_order_acq_rel)
#define atomic_fetch_sub_acq_rel(o, v) \
atomic_fetch_sub_explicit((o), (v), memory_order_acq_rel)
#define atomic_compare_exchange_weak_acq_rel(o, e, d) \

View File

@@ -60,11 +60,12 @@ isc_quota_getsoft(isc_quota_t *quota) {
unsigned int
isc_quota_getused(isc_quota_t *quota) {
REQUIRE(VALID_QUOTA(quota));
return atomic_load_relaxed(&quota->used);
return atomic_load_acquire(&quota->used);
}
void
isc_quota_release(isc_quota_t *quota) {
struct cds_wfcq_node *node;
/*
* We are using the cds_wfcq_dequeue_blocking() variant here that
* has an internal mutex because we need synchronization on
@@ -74,11 +75,25 @@ isc_quota_release(isc_quota_t *quota) {
* with cds_wfcq_empty() before acquiring the internal lock, so if
* there's nothing queued, the call should be very lightweight.
*/
struct cds_wfcq_node *node =
cds_wfcq_dequeue_blocking(&quota->jobs.head, &quota->jobs.tail);
again:
node = cds_wfcq_dequeue_blocking(&quota->jobs.head, &quota->jobs.tail);
if (node == NULL) {
uint_fast32_t used = atomic_fetch_sub_relaxed(&quota->used, 1);
uint_fast32_t used = atomic_fetch_sub_acq_rel(&quota->used, 1);
INSIST(used > 0);
/*
* If this was the last quota released and in the meantime a
* new job has appeared in the queue, then give it a chance
* to run, otherwise it could get stuck there until a new quota
* is acquired and released again.
*/
if (used == 1 &&
!cds_wfcq_empty(&quota->jobs.head, &quota->jobs.tail))
{
atomic_fetch_add_acq_rel(&quota->used, 1);
goto again;
}
return;
}
@@ -92,10 +107,10 @@ isc_quota_acquire_cb(isc_quota_t *quota, isc_job_t *job, isc_job_cb cb,
REQUIRE(VALID_QUOTA(quota));
REQUIRE(job == NULL || cb != NULL);
uint_fast32_t used = atomic_fetch_add_relaxed(&quota->used, 1);
uint_fast32_t used = atomic_fetch_add_acq_rel(&quota->used, 1);
uint_fast32_t max = atomic_load_relaxed(&quota->max);
if (max != 0 && used >= max) {
(void)atomic_fetch_sub_relaxed(&quota->used, 1);
(void)atomic_fetch_sub_acq_rel(&quota->used, 1);
if (job != NULL) {
job->cb = cb;
job->cbarg = cbarg;
@@ -107,6 +122,23 @@ isc_quota_acquire_cb(isc_quota_t *quota, isc_job_t *job, isc_job_cb cb,
*/
cds_wfcq_enqueue(&quota->jobs.head, &quota->jobs.tail,
&job->wfcq_node);
/*
* While we were initializing and enqueuing a new node,
* quotas might have been released, and if no quota is
* used any more, then our newly enqueued job won't
* have a chance to get running until a new quota is
* acquired and released. To avoid a hangup, check
* quota->used again, if it's 0 then simulate a quota
* acquire/release for the current job to run as soon as
* possible, although we will still return ISC_R_QUOTA
* to the caller.
*/
if (atomic_compare_exchange_strong_acq_rel(
&quota->used, &(uint_fast32_t){ 0 }, 1))
{
isc_quota_release(quota);
}
}
return ISC_R_QUOTA;
}
@@ -124,7 +156,7 @@ isc_quota_destroy(isc_quota_t *quota) {
REQUIRE(VALID_QUOTA(quota));
quota->magic = 0;
INSIST(atomic_load(&quota->used) == 0);
INSIST(atomic_load_acquire(&quota->used) == 0);
INSIST(cds_wfcq_empty(&quota->jobs.head, &quota->jobs.tail));
cds_wfcq_destroy(&quota->jobs.head, &quota->jobs.tail);