Compare commits

...

8 Commits

Author SHA1 Message Date
alessio
291843db68 Use the improved comparison for real this time. 2025-02-28 18:22:09 +01:00
alessio
18ae0b6a93 Improved lowercase comparison 2025-02-28 16:30:20 +01:00
alessio
f035de90ea Handle lowercase hashing 2025-02-28 13:29:45 +01:00
alessio
1b1202804c Switch to fxhash 2025-02-28 13:03:47 +01:00
alessio
9aa2ee0587 Fix case in hash function 2025-02-27 13:43:45 +01:00
alessio
08827d1dee Fuzzy case comparison in symtab 2025-02-27 11:40:26 +01:00
alessio
e914b1f132 Smaller initialization 2025-02-27 10:35:20 +01:00
alessio
ccfcc78a13 Switch symtab to use djb2 hash 2025-02-27 07:37:04 +01:00
2 changed files with 76 additions and 17 deletions

View File

@@ -123,7 +123,7 @@ isc_ascii_tolower4(uint32_t octets) {
static inline uint64_t
isc__ascii_load8(const uint8_t *ptr) {
uint64_t bytes = 0;
memmove(&bytes, ptr, sizeof(bytes));
memcpy(&bytes, ptr, sizeof(bytes));
return bytes;
}
@@ -133,16 +133,25 @@ isc__ascii_load8(const uint8_t *ptr) {
static inline bool
isc_ascii_lowerequal(const uint8_t *a, const uint8_t *b, unsigned int len) {
uint64_t a8 = 0, b8 = 0;
while (len >= 8) {
a8 = isc_ascii_tolower8(isc__ascii_load8(a));
b8 = isc_ascii_tolower8(isc__ascii_load8(b));
if (a8 != b8) {
return false;
if (len >= 8) {
const uint8_t *a_tail = a + len - 8;
const uint8_t *b_tail = b + len - 8;
while (len >= 8) {
a8 = isc_ascii_tolower8(isc__ascii_load8(a));
b8 = isc_ascii_tolower8(isc__ascii_load8(b));
if (a8 != b8) {
return false;
}
len -= 8;
a += 8;
b += 8;
}
len -= 8;
a += 8;
b += 8;
a8 = isc_ascii_tolower8(isc__ascii_load8(a_tail));
b8 = isc_ascii_tolower8(isc__ascii_load8(b_tail));
return a8 == b8;
}
while (len-- > 0) {
if (isc_ascii_tolower(*a++) != isc_ascii_tolower(*b++)) {
return false;

View File

@@ -33,7 +33,7 @@ typedef struct elt {
/* 7 bits means 128 entries at creation, which matches the common use of
* symtab */
#define ISC_SYMTAB_INIT_HASH_BITS 7
#define ISC_SYMTAB_INIT_HASH_BITS 4
#define SYMTAB_MAGIC ISC_MAGIC('S', 'y', 'm', 'T')
#define VALID_SYMTAB(st) ISC_MAGIC_VALID(st, SYMTAB_MAGIC)
@@ -122,7 +122,7 @@ elt__match(void *node, const void *key0, bool case_sensitive) {
if (case_sensitive) {
return memcmp(elt->key, key->key, key->size) == 0;
} else {
return isc_ascii_lowercmp(elt->key, key->key, key->size) == 0;
return isc_ascii_lowerequal(elt->key, key->key, key->size);
}
}
@@ -136,14 +136,64 @@ elt_match_nocase(void *node, const void *key) {
return elt__match(node, key, false);
}
#define HASH_INIT_DJB2 5381
/* The constant K from Rust's fxhash */
#define K 0x9e3779b97f4a7c15ull
static inline size_t
rotate_left(size_t x, unsigned int n) {
return (x << n) | (x >> (sizeof(size_t) * 8 - n));
}
static inline size_t
fx_add_to_hash(size_t hash, size_t i) {
return rotate_left(hash, 5) ^ i * K;
}
static size_t
fx_hash_bytes(size_t initial_hash, const uint8_t *bytes, size_t len, bool case_sensitive) {
size_t hash = initial_hash;
size_t case_mask = case_sensitive ? -1ull : (0b11011111 * 0x0101010101010101ull);
/* TODO sizeof(size_t) != 8? */
while (len >= sizeof(size_t)) {
size_t value;
memcpy(&value, bytes, sizeof(size_t));
hash = fx_add_to_hash(hash, value & case_mask);
bytes += sizeof(size_t);
len -= sizeof(size_t);
}
if (len >= 4) {
uint32_t value;
memcpy(&value, bytes, sizeof(uint32_t));
hash = fx_add_to_hash(hash, value & case_mask);
bytes += 4;
len -= 4;
}
if (len >= 2) {
uint16_t value;
memcpy(&value, bytes, sizeof(uint16_t));
hash = fx_add_to_hash(hash, value & case_mask);
bytes += 2;
len -= 2;
}
if (len >= 1) {
hash = fx_add_to_hash(hash, bytes[0] & case_mask);
}
return hash;
}
static uint32_t
elt_hash(elt_t *elt, bool case_sensitive) {
isc_hash32_t hash;
isc_hash32_init(&hash);
isc_hash32_hash(&hash, elt->key, elt->size, case_sensitive);
isc_hash32_hash(&hash, &elt->type, sizeof(elt->type), false);
return isc_hash32_finalize(&hash);
const uint8_t *ptr = elt->key;
size_t len = elt->size;
return fx_hash_bytes(0, ptr, len, case_sensitive);
}
isc_result_t