[master] isc_atomic_storeq()/stats improvements

4248.	[func]		Add an isc_atomic_storeq() function, use it in
			stats counters to improve performance.
			[RT #39972] [RT #39979]
This commit is contained in:
Evan Hunt
2015-10-28 22:19:18 -07:00
parent 1ff2118d6c
commit 821ff5e8fa
11 changed files with 285 additions and 41 deletions

View File

@@ -288,11 +288,17 @@
@ISC_PLATFORM_HAVEXADDQ@
/*
* If the "atomic swap" operation is available on this architecture,
* ISC_PLATFORM_HAVEATOMICSTORE" will be defined.
* If the 32-bit "atomic swap" operation is available on this
* architecture, ISC_PLATFORM_HAVEATOMICSTORE" will be defined.
*/
@ISC_PLATFORM_HAVEATOMICSTORE@
/*
* If the 64-bit "atomic swap" operation is available on this
* architecture, ISC_PLATFORM_HAVEATOMICSTORE" will be defined.
*/
@ISC_PLATFORM_HAVEATOMICSTOREQ@
/*
* If the "compare-and-exchange" operation is available on this architecture,
* ISC_PLATFORM_HAVECMPXCHG will be defined.

View File

@@ -35,13 +35,45 @@
#define ISC_STATS_MAGIC ISC_MAGIC('S', 't', 'a', 't')
#define ISC_STATS_VALID(x) ISC_MAGIC_VALID(x, ISC_STATS_MAGIC)
#ifndef ISC_STATS_USEMULTIFIELDS
#if defined(ISC_RWLOCK_USEATOMIC) && defined(ISC_PLATFORM_HAVEXADD) && !defined(ISC_PLATFORM_HAVEXADDQ)
/*%
* Local macro confirming prescence of 64-bit
* increment and store operations, just to make
* the later macros simpler
*/
#if defined(ISC_PLATFORM_HAVEXADDQ) && defined(ISC_PLATFORM_HAVEATOMICSTOREQ)
#define ISC_STATS_HAVEATOMICQ 1
#else
#define ISC_STATS_HAVEATOMICQ 0
#endif
/*%
* Only lock the counters if 64-bit atomic operations are
* not available but cheap atomic lock operations are.
* On a modern 64-bit system this should never be the case.
*
* Normal locks are too expensive to be used whenever a counter
* is updated.
*/
#if !ISC_STATS_HAVEATOMICQ && defined(ISC_RWLOCK_HAVEATOMIC)
#define ISC_STATS_LOCKCOUNTERS 1
#else
#define ISC_STATS_LOCKCOUNTERS 0
#endif
/*%
* If 64-bit atomic operations are not available but
* 32-bit operations are then split the counter into two,
* using the atomic operations to try to ensure that any carry
* from the low word is correctly carried into the high word.
*
* Otherwise, just rely on standard 64-bit data types
* and operations
*/
#if !ISC_STATS_HAVEATOMICQ && defined(ISC_PLATFORM_HAVEXADD)
#define ISC_STATS_USEMULTIFIELDS 1
#else
#define ISC_STATS_USEMULTIFIELDS 0
#endif
#endif /* ISC_STATS_USEMULTIFIELDS */
#if ISC_STATS_USEMULTIFIELDS
typedef struct {
@@ -65,7 +97,7 @@ struct isc_stats {
* Locked by counterlock or unlocked if efficient rwlock is not
* available.
*/
#ifdef ISC_RWLOCK_USEATOMIC
#if ISC_STATS_LOCKCOUNTERS
isc_rwlock_t counterlock;
#endif
isc_stat_t *counters;
@@ -111,7 +143,7 @@ create_stats(isc_mem_t *mctx, int ncounters, isc_stats_t **statsp) {
goto clean_counters;
}
#ifdef ISC_RWLOCK_USEATOMIC
#if ISC_STATS_LOCKCOUNTERS
result = isc_rwlock_init(&stats->counterlock, 0, 0);
if (result != ISC_R_SUCCESS)
goto clean_copiedcounters;
@@ -131,7 +163,7 @@ create_stats(isc_mem_t *mctx, int ncounters, isc_stats_t **statsp) {
clean_counters:
isc_mem_put(mctx, stats->counters, sizeof(isc_stat_t) * ncounters);
#ifdef ISC_RWLOCK_USEATOMIC
#if ISC_STATS_LOCKCOUNTERS
clean_copiedcounters:
isc_mem_put(mctx, stats->copiedcounters,
sizeof(isc_stat_t) * ncounters);
@@ -177,7 +209,7 @@ isc_stats_detach(isc_stats_t **statsp) {
sizeof(isc_stat_t) * stats->ncounters);
UNLOCK(&stats->lock);
DESTROYLOCK(&stats->lock);
#ifdef ISC_RWLOCK_USEATOMIC
#if ISC_STATS_LOCKCOUNTERS
isc_rwlock_destroy(&stats->counterlock);
#endif
isc_mem_putanddetach(&stats->mctx, stats, sizeof(*stats));
@@ -198,7 +230,7 @@ static inline void
incrementcounter(isc_stats_t *stats, int counter) {
isc_int32_t prev;
#ifdef ISC_RWLOCK_USEATOMIC
#if ISC_STATS_LOCKCOUNTERS
/*
* We use a "read" lock to prevent other threads from reading the
* counter while we "writing" a counter field. The write access itself
@@ -219,7 +251,7 @@ incrementcounter(isc_stats_t *stats, int counter) {
*/
if (prev == (isc_int32_t)0xffffffff)
isc_atomic_xadd((isc_int32_t *)&stats->counters[counter].hi, 1);
#elif defined(ISC_PLATFORM_HAVEXADDQ)
#elif ISC_STATS_HAVEATOMICQ
UNUSED(prev);
isc_atomic_xaddq((isc_int64_t *)&stats->counters[counter], 1);
#else
@@ -227,7 +259,7 @@ incrementcounter(isc_stats_t *stats, int counter) {
stats->counters[counter]++;
#endif
#ifdef ISC_RWLOCK_USEATOMIC
#if ISC_STATS_LOCKCOUNTERS
isc_rwlock_unlock(&stats->counterlock, isc_rwlocktype_read);
#endif
}
@@ -236,7 +268,7 @@ static inline void
decrementcounter(isc_stats_t *stats, int counter) {
isc_int32_t prev;
#ifdef ISC_RWLOCK_USEATOMIC
#if ISC_STATS_LOCKCOUNTERS
isc_rwlock_lock(&stats->counterlock, isc_rwlocktype_read);
#endif
@@ -245,7 +277,7 @@ decrementcounter(isc_stats_t *stats, int counter) {
if (prev == 0)
isc_atomic_xadd((isc_int32_t *)&stats->counters[counter].hi,
-1);
#elif defined(ISC_PLATFORM_HAVEXADDQ)
#elif ISC_STATS_HAVEATOMICQ
UNUSED(prev);
isc_atomic_xaddq((isc_int64_t *)&stats->counters[counter], -1);
#else
@@ -253,7 +285,7 @@ decrementcounter(isc_stats_t *stats, int counter) {
stats->counters[counter]--;
#endif
#ifdef ISC_RWLOCK_USEATOMIC
#if ISC_STATS_LOCKCOUNTERS
isc_rwlock_unlock(&stats->counterlock, isc_rwlocktype_read);
#endif
}
@@ -262,7 +294,7 @@ static void
copy_counters(isc_stats_t *stats) {
int i;
#ifdef ISC_RWLOCK_USEATOMIC
#if ISC_STATS_LOCKCOUNTERS
/*
* We use a "write" lock before "reading" the statistics counters as
* an exclusive lock.
@@ -270,19 +302,21 @@ copy_counters(isc_stats_t *stats) {
isc_rwlock_lock(&stats->counterlock, isc_rwlocktype_write);
#endif
#if ISC_STATS_USEMULTIFIELDS
for (i = 0; i < stats->ncounters; i++) {
#if ISC_STATS_USEMULTIFIELDS
stats->copiedcounters[i] =
(isc_uint64_t)(stats->counters[i].hi) << 32 |
stats->counters[i].lo;
}
(isc_uint64_t)(stats->counters[i].hi) << 32 |
stats->counters[i].lo;
#elif ISC_STATS_HAVEATOMICQ
/* use xaddq(..., 0) as an atomic load */
stats->copiedcounters[i] =
(isc_uint64_t)isc_atomic_xaddq((isc_int64_t *)&stats->counters[i], 0);
#else
UNUSED(i);
memmove(stats->copiedcounters, stats->counters,
stats->ncounters * sizeof(isc_stat_t));
stats->copiedcounters[i] = stats->counters[i];
#endif
}
#ifdef ISC_RWLOCK_USEATOMIC
#if ISC_STATS_LOCKCOUNTERS
isc_rwlock_unlock(&stats->counterlock, isc_rwlocktype_write);
#endif
}
@@ -335,7 +369,7 @@ isc_stats_set(isc_stats_t *stats, isc_uint64_t val,
REQUIRE(ISC_STATS_VALID(stats));
REQUIRE(counter < stats->ncounters);
#ifdef ISC_RWLOCK_USEATOMIC
#if ISC_STATS_LOCKCOUNTERS
/*
* We use a "write" lock before "reading" the statistics counters as
* an exclusive lock.
@@ -346,11 +380,13 @@ isc_stats_set(isc_stats_t *stats, isc_uint64_t val,
#if ISC_STATS_USEMULTIFIELDS
stats->counters[counter].hi = (isc_uint32_t)((val >> 32) & 0xffffffff);
stats->counters[counter].lo = (isc_uint32_t)(val & 0xffffffff);
#elif ISC_STATS_HAVEATOMICQ
isc_atomic_storeq((isc_int64_t *)&stats->counters[counter], val);
#else
stats->counters[counter] = val;
#endif
#ifdef ISC_RWLOCK_USEATOMIC
#if ISC_STATS_LOCKCOUNTERS
isc_rwlock_unlock(&stats->counterlock, isc_rwlocktype_write);
#endif
}

View File

@@ -117,6 +117,12 @@ ATF_TC_BODY(snprintf, tc) {
n = isc_print_snprintf(buf, sizeof(buf), "%zo", size);
ATF_CHECK_EQ(n, 4);
ATF_CHECK_STREQ(buf, "1750");
zz = 0xf5f5f5f5f5f5f5f5;
memset(buf, 0xff, sizeof(buf));
n = isc_print_snprintf(buf, sizeof(buf), "0x%"ISC_PRINT_QUADFORMAT"x", zz);
ATF_CHECK_EQ(n, 18);
ATF_CHECK_STREQ(buf, "0xf5f5f5f5f5f5f5f5");
}
ATF_TC(fprintf);

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2013 Internet Systems Consortium, Inc. ("ISC")
* Copyright (C) 2013, 2015 Internet Systems Consortium, Inc. ("ISC")
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
@@ -43,7 +43,7 @@ isc_atomic_xaddq(isc_int64_t *p, isc_int64_t val) {
#endif
/*
* This routine atomically stores the value 'val' in 'p'.
* This routine atomically stores the value 'val' in 'p' (32-bit version).
*/
#ifdef ISC_PLATFORM_HAVEATOMICSTORE
static __inline void
@@ -52,6 +52,16 @@ isc_atomic_store(isc_int32_t *p, isc_int32_t val) {
}
#endif
/*
* This routine atomically stores the value 'val' in 'p' (64-bit version).
*/
#ifdef ISC_PLATFORM_HAVEATOMICSTOREQ
static __inline void
isc_atomic_storeq(isc_int64_t *p, isc_int64_t val) {
(void) _InterlockedExchange64((__int64 *)p, (__int64)val);
}
#endif
/*
* This routine atomically replaces the value in 'p' with 'val', if the
* original value is equal to 'cmpval'. The original value is returned in any

View File

@@ -62,7 +62,7 @@ isc_atomic_xaddq(isc_int64_t *p, isc_int64_t val) {
#endif /* ISC_PLATFORM_HAVEXADDQ */
/*
* This routine atomically stores the value 'val' in 'p'.
* This routine atomically stores the value 'val' in 'p' (32-bit version).
*/
static __inline__ void
isc_atomic_store(isc_int32_t *p, isc_int32_t val) {
@@ -81,6 +81,28 @@ isc_atomic_store(isc_int32_t *p, isc_int32_t val) {
: "memory");
}
#ifdef ISC_PLATFORM_HAVEATOMICSTOREQ
/*
* This routine atomically stores the value 'val' in 'p' (64-bit version).
*/
static __inline__ void
isc_atomic_storeq(isc_int64_t *p, isc_int64_t val) {
__asm__ volatile(
#ifdef ISC_PLATFORM_USETHREADS
/*
* xchg should automatically lock memory, but we add it
* explicitly just in case (it at least doesn't harm)
*/
"lock;"
#endif
"xchgq %1, %0"
:
: "r"(val), "m"(*p)
: "memory");
}
#endif /* ISC_PLATFORM_HAVEATOMICSTOREQ */
/*
* This routine atomically replaces the value in 'p' with 'val', if the
* original value is equal to 'cmpval'. The original value is returned in any

View File

@@ -100,6 +100,9 @@ isc_atomic_cmpxchg(isc_int32_t *p, isc_int32_t cmpval, isc_int32_t val) {
UNUSED(val);
__asm (
/*
* p is %rdi, cmpval is %esi, val is %edx.
*/
"movl %edx, %ecx\n"
"movl %esi, %eax\n"
"movq %rdi, %rdx\n"
@@ -108,8 +111,12 @@ isc_atomic_cmpxchg(isc_int32_t *p, isc_int32_t cmpval, isc_int32_t val) {
"lock;"
#endif
/*
* If (%rdi) == %eax then (%rdi) := %edx.
* %eax is set to old (%ecx), which will be the return value.
* If [%rdi] == %eax then [%rdi] := %ecx (equal to %edx
* from above), and %eax is untouched (equal to %esi)
* from above.
*
* Else if [%rdi] != %eax then [%rdi] := [%rdi]
* (rewritten in write cycle) and %eax := [%rdi].
*/
"cmpxchgl %ecx, (%rdx)"
);