From 86fd9c8abfbacea2902b4fe42a8a4664b2a531cf Mon Sep 17 00:00:00 2001 From: jdybnis Date: Mon, 16 Mar 2009 07:01:19 +0000 Subject: [PATCH] improved perf_test to measure steady state behavior improved skiplist random level selection added buitin random number generator; some platforms don't provide a good one improved heuristics for hashtable resize --- include/common.h | 4 +- include/hashtable.h | 25 ++++---- include/list.h | 2 +- include/map.h | 16 ++--- include/runtime.h | 4 +- include/skiplist.h | 2 +- makefile | 16 ++--- map/hashtable.c | 140 +++++++++++++++++++++++------------------ map/list.c | 31 ++++----- map/map.c | 10 +-- map/skiplist.c | 138 ++++++++++++++++++++-------------------- map/unsafe_skiplist.c | 142 +++++++++++++++--------------------------- perf.sh | 4 +- runtime/mem.c | 2 +- runtime/rcu.c | 4 +- runtime/runtime.c | 50 ++++++++++----- test/perf_test.c | 137 +++++++++++++++++----------------------- 17 files changed, 354 insertions(+), 373 deletions(-) diff --git a/include/common.h b/include/common.h index fbd64bc..ce3eabf 100644 --- a/include/common.h +++ b/include/common.h @@ -1,4 +1,4 @@ -/* +/* * Written by Josh Dybnis and released to the public domain, as explained at * http://creativecommons.org/licenses/publicdomain */ @@ -19,7 +19,7 @@ #ifndef NBD_SINGLE_THREADED -#define MAX_NUM_THREADS 16 // make this whatever you want, but make it a power of 2 +#define MAX_NUM_THREADS 32 // make this whatever you want, but make it a power of 2 #define SYNC_SWAP(addr,x) __sync_lock_test_and_set(addr,x) #define SYNC_CAS(addr,old,x) __sync_val_compare_and_swap(addr,old,x) diff --git a/include/hashtable.h b/include/hashtable.h index d0d12ae..d9001d4 100644 --- a/include/hashtable.h +++ b/include/hashtable.h @@ -6,22 +6,21 @@ typedef struct ht hashtable_t; typedef struct ht_iter ht_iter_t; -hashtable_t * ht_alloc (const datatype_t *key_type); -map_val_t ht_cas (hashtable_t *ht, map_key_t key, map_val_t expected_val, map_val_t val); -map_val_t ht_get (hashtable_t *ht, map_key_t key); -map_val_t ht_remove (hashtable_t *ht, map_key_t key); -size_t ht_count (hashtable_t *ht); -void ht_print (hashtable_t *ht); -void ht_free (hashtable_t *ht); - -ht_iter_t * ht_iter_begin (hashtable_t *ht, map_key_t key); -map_val_t ht_iter_next (ht_iter_t *iter, map_key_t *key_ptr); -void ht_iter_free (ht_iter_t *iter); +hashtable_t * ht_alloc (const datatype_t *key_type); +map_val_t ht_cas (hashtable_t *ht, map_key_t key, map_val_t expected_val, map_val_t val); +map_val_t ht_get (hashtable_t *ht, map_key_t key); +map_val_t ht_remove (hashtable_t *ht, map_key_t key); +size_t ht_count (hashtable_t *ht); +void ht_print (hashtable_t *ht, int verbose); +void ht_free (hashtable_t *ht); +ht_iter_t * ht_iter_begin (hashtable_t *ht, map_key_t key); +map_val_t ht_iter_next (ht_iter_t *iter, map_key_t *key_ptr); +void ht_iter_free (ht_iter_t *iter); static const map_impl_t MAP_IMPL_HT = { (map_alloc_t)ht_alloc, (map_cas_t)ht_cas, (map_get_t)ht_get, (map_remove_t)ht_remove, - (map_count_t)ht_count, (map_print_t)ht_print, (map_free_t)ht_free, (map_iter_begin_t)ht_iter_begin, - (map_iter_next_t)ht_iter_next, (map_iter_free_t)ht_iter_free + (map_count_t)ht_count, (map_print_t)ht_print, (map_free_t)ht_free, + (map_iter_begin_t)ht_iter_begin, (map_iter_next_t)ht_iter_next, (map_iter_free_t)ht_iter_free }; #endif//HASHTABLE_H diff --git a/include/list.h b/include/list.h index a246bc2..c6db953 100644 --- a/include/list.h +++ b/include/list.h @@ -11,7 +11,7 @@ map_val_t ll_cas (list_t *ll, map_key_t key, map_val_t expected_val, map_va map_val_t ll_lookup (list_t *ll, map_key_t key); map_val_t ll_remove (list_t *ll, map_key_t key); size_t ll_count (list_t *ll); -void ll_print (list_t *ll); +void ll_print (list_t *ll, int verbose); void ll_free (list_t *ll); map_key_t ll_min_key (list_t *sl); diff --git a/include/map.h b/include/map.h index 0f847aa..60c3b6c 100644 --- a/include/map.h +++ b/include/map.h @@ -23,7 +23,7 @@ map_val_t map_cas (map_t *map, map_key_t key, map_val_t expected_val, map_va map_val_t map_replace (map_t *map, map_key_t key, map_val_t new_val); map_val_t map_remove (map_t *map, map_key_t key); map_val_t map_count (map_t *map); -void map_print (map_t *map); +void map_print (map_t *map, int verbose); void map_free (map_t *map); map_iter_t * map_iter_begin (map_t *map, map_key_t key); @@ -36,13 +36,13 @@ void map_iter_free (map_iter_t *iter); #define CAS_EXPECT_EXISTS (-1) #define CAS_EXPECT_WHATEVER (-2) -typedef void * (*map_alloc_t) (const datatype_t *); -typedef map_val_t (*map_cas_t) (void *, map_key_t , map_val_t, map_val_t); -typedef map_val_t (*map_get_t) (void *, map_key_t ); -typedef map_val_t (*map_remove_t) (void *, map_key_t ); -typedef size_t (*map_count_t) (void *); -typedef void (*map_print_t) (void *); -typedef void (*map_free_t) (void *); +typedef void * (*map_alloc_t) (const datatype_t *); +typedef map_val_t (*map_cas_t) (void *, map_key_t , map_val_t, map_val_t); +typedef map_val_t (*map_get_t) (void *, map_key_t ); +typedef map_val_t (*map_remove_t) (void *, map_key_t ); +typedef size_t (*map_count_t) (void *); +typedef void (*map_print_t) (void *, int); +typedef void (*map_free_t) (void *); typedef map_iter_t * (*map_iter_begin_t) (void *, map_key_t); typedef map_val_t (*map_iter_next_t) (map_iter_t *, map_key_t *); diff --git a/include/runtime.h b/include/runtime.h index ff1d245..e453fec 100644 --- a/include/runtime.h +++ b/include/runtime.h @@ -1,4 +1,4 @@ -/* +/* * Written by Josh Dybnis and released to the public domain, as explained at * http://creativecommons.org/licenses/publicdomain */ @@ -11,7 +11,7 @@ extern DECLARE_THREAD_LOCAL(tid_, int); int nbd_thread_create (pthread_t *restrict thread, int thread_id, void *(*start_routine)(void *), void *restrict arg); -int nbd_rand (void); +uint64_t nbd_rand (void); uint64_t nbd_rand_seed (int i); int nbd_next_rand (uint64_t *r); diff --git a/include/skiplist.h b/include/skiplist.h index 4377a94..26fb7cb 100644 --- a/include/skiplist.h +++ b/include/skiplist.h @@ -11,7 +11,7 @@ map_val_t sl_cas (skiplist_t *sl, map_key_t key, map_val_t expected_val, ma map_val_t sl_lookup (skiplist_t *sl, map_key_t key); map_val_t sl_remove (skiplist_t *sl, map_key_t key); size_t sl_count (skiplist_t *sl); -void sl_print (skiplist_t *sl); +void sl_print (skiplist_t *sl, int verbose); void sl_free (skiplist_t *sl); map_key_t sl_min_key (skiplist_t *sl); diff --git a/makefile b/makefile index 5ede683..0130026 100644 --- a/makefile +++ b/makefile @@ -4,13 +4,13 @@ ################################################################################################### # Makefile for building programs with whole-program interfile optimization ################################################################################################### -CFLAGS0 := -Wall -Werror -std=gnu99 -lpthread #-m32 -DNBD32 -CFLAGS1 := $(CFLAGS0) -g -O3 -DNDEBUG #-fwhole-program -combine -CFLAGS2 := $(CFLAGS1) #-DENABLE_TRACE -CFLAGS3 := $(CFLAGS2) #-DLIST_USE_HAZARD_POINTER -CFLAGS := $(CFLAGS3) #-DNBD_SINGLE_THREADED #-DUSE_SYSTEM_MALLOC #-DTEST_STRING_KEYS +CFLAGS0 := -Wall -Werror -std=gnu99 -lpthread #-m32 -DNBD32 +CFLAGS1 := $(CFLAGS0) -g -O3 #-DNDEBUG #-fwhole-program -combine +CFLAGS2 := $(CFLAGS1) #-DENABLE_TRACE +CFLAGS3 := $(CFLAGS2) #-DLIST_USE_HAZARD_POINTER +CFLAGS := $(CFLAGS3) #-DNBD_SINGLE_THREADED #-DUSE_SYSTEM_MALLOC #-DTEST_STRING_KEYS INCS := $(addprefix -I, include) -TESTS := output/perf_test output/map_test1 output/map_test2 output/rcu_test output/txn_test #output/haz_test +TESTS := output/perf_test #output/map_test1 output/map_test2 output/rcu_test output/txn_test #output/haz_test OBJS := $(TESTS) RUNTIME_SRCS := runtime/runtime.c runtime/rcu.c runtime/lwt.c runtime/mem.c datatype/nstring.c #runtime/hazard.c @@ -19,7 +19,7 @@ MAP_SRCS := map/map.c map/list.c map/skiplist.c map/hashtable.c haz_test_SRCS := $(RUNTIME_SRCS) test/haz_test.c rcu_test_SRCS := $(RUNTIME_SRCS) test/rcu_test.c txn_test_SRCS := $(RUNTIME_SRCS) $(MAP_SRCS) test/txn_test.c test/CuTest.c txn/txn.c -map_test1_SRCS := $(RUNTIME_SRCS) $(MAP_SRCS) test/map_test1.c +map_test1_SRCS := $(RUNTIME_SRCS) $(MAP_SRCS) test/map_test1.c map_test2_SRCS := $(RUNTIME_SRCS) $(MAP_SRCS) test/map_test2.c test/CuTest.c perf_test_SRCS := $(RUNTIME_SRCS) $(MAP_SRCS) test/perf_test.c @@ -41,7 +41,7 @@ $(addsuffix .log, $(TESTS)) : %.log : % # gcc. Compilation fails when -MM -MF is used and there is more than one source file. # Otherwise "-MM -MT $@.d -MF $@.d" should be part of the command line for the compile. # -# Also, when calculating dependencies -combine is removed from CFLAGS because of another bug +# Also, when calculating dependencies -combine is removed from CFLAGS because of another bug # in gcc. It chokes when -MM is used with -combine. ################################################################################################### $(OBJS): output/% : output/%.d makefile diff --git a/map/hashtable.c b/map/hashtable.c index 85ffe41..410d97b 100644 --- a/map/hashtable.c +++ b/map/hashtable.c @@ -1,14 +1,14 @@ -/* +/* * Written by Josh Dybnis and released to the public domain, as explained at * http://creativecommons.org/licenses/publicdomain - * - * C implementation of Cliff Click's lock-free hash table from + * + * C implementation of Cliff Click's lock-free hash table from * http://www.azulsystems.com/events/javaone_2008/2008_CodingNonBlock.pdf * http://sourceforge.net/projects/high-scale-lib * - * Note: This is code uses synchronous atomic operations because that is all that x86 provides. + * Note: This is code uses synchronous atomic operations because that is all that x86 provides. * Every atomic operation is also an implicit full memory barrier. The upshot is that it simplifies - * the code a bit, but it won't be as fast as it could be on platforms that provide weaker + * the code a bit, but it won't be as fast as it could be on platforms that provide weaker * operations like unfenced CAS which would still do the job. * * 11FebO9 - Bug fix in ht_iter_next() from Rui Ueyama @@ -39,12 +39,13 @@ typedef struct hti { #ifdef USE_SYSTEM_MALLOC void *unaligned_table_ptr; // system malloc doesn't guarentee cache-line alignment #endif - unsigned scale; - int max_probe; + size_t count; // TODO: make these counters distributed + size_t key_count; + size_t copy_scan; + size_t num_entries_copied; + int probe; int ref_count; - int count; // TODO: make these counters distributed - int num_entries_copied; - int copy_scan; + uint8_t scale; } hti_t; struct ht_iter { @@ -55,6 +56,9 @@ struct ht_iter { struct ht { hti_t *hti; const datatype_t *key_type; + uint32_t hti_copies; + double density; + int probe; }; static const map_val_t COPIED_VALUE = TAG_VALUE(DOES_NOT_EXIST, TAG1); @@ -63,24 +67,27 @@ static const map_val_t TOMBSTONE = STRIP_TAG(-1, TAG1); static const unsigned ENTRIES_PER_BUCKET = CACHE_LINE_SIZE/sizeof(entry_t); static const unsigned ENTRIES_PER_COPY_CHUNK = CACHE_LINE_SIZE/sizeof(entry_t)*2; static const unsigned MIN_SCALE = 4; // min 16 entries (4 buckets) -static const unsigned MAX_BUCKETS_TO_PROBE = 250; static int hti_copy_entry (hti_t *ht1, volatile entry_t *ent, uint32_t ent_key_hash, hti_t *ht2); // Choose the next bucket to probe using the high-order bits of . static inline int get_next_ndx(int old_ndx, uint32_t key_hash, int ht_scale) { +#if 1 int incr = (key_hash >> (32 - ht_scale)); - incr += !incr; // If the increment is 0, make it 1. + if (incr < ENTRIES_PER_BUCKET) { incr += ENTRIES_PER_BUCKET; } return (old_ndx + incr) & MASK(ht_scale); +#else + return (old_ndx + ENTRIES_PER_BUCKET) & MASK(ht_scale); +#endif } -// Lookup in . +// Lookup in . // -// Return the entry that is in, or if isn't in return the entry that it would be -// in if it were inserted into . If there is no room for in then return NULL, to +// Return the entry that is in, or if isn't in return the entry that it would be +// in if it were inserted into . If there is no room for in then return NULL, to // indicate that the caller should look in next>. // -// Record if the entry being returned is empty. Otherwise the caller will have to waste time +// Record if the entry being returned is empty. Otherwise the caller will have to waste time // re-comparing the keys to confirm that it did not lose a race to fill an empty entry. static volatile entry_t *hti_lookup (hti_t *hti, map_key_t key, uint32_t key_hash, int *is_empty) { TRACE("h2", "hti_lookup(key %p in hti %p)", key, hti); @@ -88,10 +95,10 @@ static volatile entry_t *hti_lookup (hti_t *hti, map_key_t key, uint32_t key_has // Probe one cache line at a time int ndx = key_hash & MASK(hti->scale); // the first entry to search - for (int i = 0; i < hti->max_probe; ++i) { + for (int i = 0; i < hti->probe; ++i) { // The start of the bucket is the first entry in the cache line. - volatile entry_t *bucket = hti->table + (ndx & ~(ENTRIES_PER_BUCKET-1)); + volatile entry_t *bucket = hti->table + (ndx & ~(ENTRIES_PER_BUCKET-1)); // Start searching at the indexed entry. Then loop around to the begining of the cache line. for (int j = 0; j < ENTRIES_PER_BUCKET; ++j) { @@ -99,13 +106,13 @@ static volatile entry_t *hti_lookup (hti_t *hti, map_key_t key, uint32_t key_has map_key_t ent_key = ent->key; if (ent_key == DOES_NOT_EXIST) { - TRACE("h1", "hti_lookup: entry %p for key %p is empty", ent, + TRACE("h1", "hti_lookup: entry %p for key %p is empty", ent, (hti->ht->key_type == NULL) ? (void *)key : GET_PTR(key)); *is_empty = 1; // indicate an empty so the caller avoids an expensive key compare return ent; } - // Compare with the key in the entry. + // Compare with the key in the entry. if (EXPECT_TRUE(hti->ht->key_type == NULL)) { // fast path for integer keys if (ent_key == key) { @@ -152,12 +159,13 @@ static hti_t *hti_alloc (hashtable_t *parent, int scale) { #endif memset((void *)hti->table, 0, sz); - // When searching for a key probe a maximum of 1/4 of the buckets up to 1000 buckets. - hti->max_probe = ((1ULL << (hti->scale - 2)) / ENTRIES_PER_BUCKET) + 4; - if (hti->max_probe > MAX_BUCKETS_TO_PROBE) { - hti->max_probe = MAX_BUCKETS_TO_PROBE; + hti->probe = (int)(hti->scale * 1.5) + 2; + int quarter = (1ULL << (hti->scale - 2)) / ENTRIES_PER_BUCKET; + if (hti->probe > quarter && quarter > 4) { + // When searching for a key probe a maximum of 1/4 + hti->probe = quarter; } - + ASSERT(hti->probe); hti->ht = parent; hti->ref_count = 1; // one for the parent @@ -177,8 +185,7 @@ static void hti_start_copy (hti_t *hti) { // heuristics to determine the size of the new table size_t count = ht_count(hti->ht); unsigned int new_scale = hti->scale; - new_scale += (count > (1ULL << (new_scale - 2))); // double size if more than 1/4 full - new_scale += (count > (1ULL << (new_scale - 2))); // double size again if more than 1/2 full + new_scale += (count > (1ULL << (hti->scale - 1))) || (hti->key_count > (1ULL << (hti->scale - 2)) + (1ULL << (hti->scale - 3))); // double size if more than 1/2 full // Allocate the new table and attempt to install it. hti_t *next = hti_alloc(hti->ht, new_scale); @@ -194,9 +201,12 @@ static void hti_start_copy (hti_t *hti) { return; } TRACE("h0", "hti_start_copy: new hti %p scale %llu", next, next->scale); + SYNC_ADD(&hti->ht->hti_copies, 1); + hti->ht->density = (double)hti->key_count / (1ULL << hti->scale) * 100; + hti->ht->probe = hti->probe; } -// Copy the key and value stored in (which must be an entry in ) to . +// Copy the key and value stored in (which must be an entry in ) to . // // Return 1 unless is already copied (then return 0), so the caller can account for the total // number of entries left to copy. @@ -236,15 +246,15 @@ static int hti_copy_entry (hti_t *ht1, volatile entry_t *ht1_ent, uint32_t key_h // The old table's dead entries don't need to be copied to the new table if (ht1_ent_val == TOMBSTONE) - return TRUE; + return TRUE; // Install the key in the new table. map_key_t ht1_ent_key = ht1_ent->key; map_key_t key = (ht1->ht->key_type == NULL) ? (map_key_t)ht1_ent_key : (map_key_t)GET_PTR(ht1_ent_key); // We use 0 to indicate that is uninitiallized. Occasionally the key's hash will really be 0 and we - // waste time recomputing it every time. It is rare enough that it won't hurt performance. - if (key_hash == 0) { + // waste time recomputing it every time. It is rare enough that it won't hurt performance. + if (key_hash == 0) { #ifdef NBD32 key_hash = (ht1->ht->key_type == NULL) ? murmur32_4b(ht1_ent_key) : ht1->ht->key_type->hash((void *)key); #else @@ -272,6 +282,7 @@ static int hti_copy_entry (hti_t *ht1, volatile entry_t *ht1_ent, uint32_t key_h ht1_ent_key, old_ht2_ent_key); return hti_copy_entry(ht1, ht1_ent, key_hash, ht2); // recursive tail-call } + SYNC_ADD(&ht2->key_count, 1); } // Copy the value to the entry in the new table. @@ -295,22 +306,22 @@ static int hti_copy_entry (hti_t *ht1, volatile entry_t *ht1_ent, uint32_t key_h return TRUE; } - TRACE("h0", "hti_copy_entry: lost race to install value %p in new entry; found value %p", + TRACE("h0", "hti_copy_entry: lost race to install value %p in new entry; found value %p", ht1_ent_val, old_ht2_ent_val); return FALSE; // another thread completed the copy } -// Compare with the existing value associated with . If the values match then -// replace the existing value with . If is DOES_NOT_EXIST, delete the value associated with +// Compare with the existing value associated with . If the values match then +// replace the existing value with . If is DOES_NOT_EXIST, delete the value associated with // the key by replacing it with a TOMBSTONE. // // Return the previous value associated with , or DOES_NOT_EXIST if is not in the table -// or associated with a TOMBSTONE. If a copy is in progress and has been copied to the next -// table then return COPIED_VALUE. +// or associated with a TOMBSTONE. If a copy is in progress and has been copied to the next +// table then return COPIED_VALUE. // // NOTE: the returned value matches iff the set succeeds // -// Certain values of have special meaning. If is CAS_EXPECT_EXISTS then any +// Certain values of have special meaning. If is CAS_EXPECT_EXISTS then any // real value matches (i.ent. not a TOMBSTONE or DOES_NOT_EXIST) as long as is in the table. If // is CAS_EXPECT_WHATEVER then skip the test entirely. // @@ -343,13 +354,13 @@ static map_val_t hti_cas (hti_t *hti, map_key_t key, uint32_t key_hash, map_val_ return DOES_NOT_EXIST; // Allocate . - map_key_t new_key = (hti->ht->key_type == NULL) - ? (map_key_t)key + map_key_t new_key = (hti->ht->key_type == NULL) + ? (map_key_t)key : (map_key_t)hti->ht->key_type->clone((void *)key); #ifndef NBD32 if (EXPECT_FALSE(hti->ht->key_type != NULL)) { - // Combine pointer with bits from its hash - new_key = ((uint64_t)(key_hash >> 16) << 48) | new_key; + // Combine pointer with bits from its hash + new_key = ((uint64_t)(key_hash >> 16) << 48) | new_key; } #endif @@ -359,7 +370,7 @@ static map_val_t hti_cas (hti_t *hti, map_key_t key, uint32_t key_hash, map_val_ // Retry if another thread stole the entry out from under us. if (old_ent_key != DOES_NOT_EXIST) { TRACE("h0", "hti_cas: lost race to install key %p in entry %p", new_key, ent); - TRACE("h0", "hti_cas: found %p instead of NULL", + TRACE("h0", "hti_cas: found %p instead of NULL", (hti->ht->key_type == NULL) ? (void *)old_ent_key : GET_PTR(old_ent_key), 0); if (hti->ht->key_type != NULL) { nbd_free(GET_PTR(new_key)); @@ -367,9 +378,10 @@ static map_val_t hti_cas (hti_t *hti, map_key_t key, uint32_t key_hash, map_val_ return hti_cas(hti, key, key_hash, expected, new); // tail-call } TRACE("h2", "hti_cas: installed key %p in entry %p", new_key, ent); + SYNC_ADD(&hti->key_count, 1); } - TRACE("h0", "hti_cas: entry for key %p is %p", + TRACE("h0", "hti_cas: entry for key %p is %p", (hti->ht->key_type == NULL) ? (void *)ent->key : GET_PTR(ent->key), ent); // If the entry is in the middle of a copy, the copy must be completed first. @@ -380,7 +392,7 @@ static map_val_t hti_cas (hti_t *hti, map_key_t key, uint32_t key_hash, map_val_ if (did_copy) { (void)SYNC_ADD(&hti->num_entries_copied, 1); } - TRACE("h0", "hti_cas: value in the middle of a copy, copy completed by %s", + TRACE("h0", "hti_cas: value in the middle of a copy, copy completed by %s", (did_copy ? "self" : "other"), 0); } TRACE("h0", "hti_cas: value copied to next table, retry on next table", 0, 0); @@ -428,7 +440,7 @@ static map_val_t hti_get (hti_t *hti, map_key_t key, uint32_t key_hash) { volatile entry_t *ent = hti_lookup(hti, key, key_hash, &is_empty); // When hti_lookup() returns NULL it means we hit the reprobe limit while - // searching the table. In that case, if a copy is in progress the key + // searching the table. In that case, if a copy is in progress the key // might exist in the copy. if (EXPECT_FALSE(ent == NULL)) { if (VOLATILE_DEREF(hti).next != NULL) @@ -467,23 +479,23 @@ map_val_t ht_get (hashtable_t *ht, map_key_t key) { // returns TRUE if copy is done static int hti_help_copy (hti_t *hti) { volatile entry_t *ent; - size_t limit; + size_t limit; size_t total_copied = hti->num_entries_copied; size_t num_copied = 0; - size_t x = hti->copy_scan; + size_t x = hti->copy_scan; TRACE("h1", "ht_cas: help copy. scan is %llu, size is %llu", x, 1<scale); if (total_copied != (1ULL << hti->scale)) { // Panic if we've been around the array twice and still haven't finished the copy. - int panic = (x >= (1ULL << (hti->scale + 1))); + int panic = (x >= (1ULL << (hti->scale + 1))); if (!panic) { limit = ENTRIES_PER_COPY_CHUNK; // Reserve some entries for this thread to copy. There is a race condition here because the // fetch and add isn't atomic, but that is ok. - hti->copy_scan = x + ENTRIES_PER_COPY_CHUNK; + hti->copy_scan = x + ENTRIES_PER_COPY_CHUNK; - // might be larger than the size of the table, if some thread stalls while + // might be larger than the size of the table, if some thread stalls while // copying. In that case we just wrap around to the begining and make another pass through // the table. ent = hti->table + (x & MASK(hti->scale)); @@ -599,7 +611,7 @@ size_t ht_count (hashtable_t *ht) { size_t count = 0; while (hti) { count += hti->count; - hti = hti->next; + hti = hti->next; } return count; } @@ -609,6 +621,8 @@ hashtable_t *ht_alloc (const datatype_t *key_type) { hashtable_t *ht = nbd_malloc(sizeof(hashtable_t)); ht->key_type = key_type; ht->hti = (hti_t *)hti_alloc(ht, MIN_SCALE); + ht->hti_copies = 0; + ht->density = 0.0; return ht; } @@ -624,18 +638,24 @@ void ht_free (hashtable_t *ht) { nbd_free(ht); } -void ht_print (hashtable_t *ht) { +void ht_print (hashtable_t *ht, int verbose) { + printf("probe:%-2d density:%.1f%% count:%-8lld ", ht->probe, ht->density, (uint64_t)ht_count(ht)); hti_t *hti = ht->hti; while (hti) { - printf("hti:%p scale:%u count:%d copied:%d\n", hti, hti->scale, hti->count, hti->num_entries_copied); - for (int i = 0; i < (1ULL << hti->scale); ++i) { - volatile entry_t *ent = hti->table + i; - printf("[0x%x] 0x%llx:0x%llx\n", i, (uint64_t)ent->key, (uint64_t)ent->val); - if (i > 30) { - printf("...\n"); - break; + if (verbose) { + for (int i = 0; i < (1ULL << hti->scale); ++i) { + volatile entry_t *ent = hti->table + i; + printf("[0x%x] 0x%llx:0x%llx\n", i, (uint64_t)ent->key, (uint64_t)ent->val); + if (i > 30) { + printf("...\n"); + break; + } } } + int scale = hti->scale; + printf("hti count:%lld scale:%d key density:%.1f%% value density:%.1f%% probe:%d\n", + (uint64_t)hti->count, scale, (double)hti->key_count / (1ULL << scale) * 100, + (double)hti->count / (1ULL << scale) * 100, hti->probe); hti = hti->next; } } @@ -691,7 +711,7 @@ map_val_t ht_iter_next (ht_iter_t *iter, map_key_t *key_ptr) { // Go to the next entry if key is already deleted. if (val == DOES_NOT_EXIST) return ht_iter_next(iter, key_ptr); // recursive tail-call - } + } if (key_ptr) { *key_ptr = key; diff --git a/map/list.c b/map/list.c index 992de52..04bfd2d 100644 --- a/map/list.c +++ b/map/list.c @@ -329,22 +329,25 @@ map_val_t ll_remove (list_t *ll, map_key_t key) { return val; } -void ll_print (list_t *ll) { - markable_t next = ll->head->next; - int i = 0; - while (next != DOES_NOT_EXIST) { - node_t *item = STRIP_MARK(next); - if (item == NULL) - break; - printf("%s%p:0x%llx ", HAS_MARK(item->next) ? "*" : "", item, (uint64_t)item->key); - fflush(stdout); - if (i++ > 30) { - printf("..."); - break; +void ll_print (list_t *ll, int verbose) { + if (verbose) { + markable_t next = ll->head->next; + int i = 0; + while (next != DOES_NOT_EXIST) { + node_t *item = STRIP_MARK(next); + if (item == NULL) + break; + printf("%s%p:0x%llx ", HAS_MARK(item->next) ? "*" : "", item, (uint64_t)item->key); + fflush(stdout); + if (i++ > 30) { + printf("..."); + break; + } + next = item->next; } - next = item->next; + printf("\n"); } - printf("\n"); + printf("count:%llu\n", (uint64_t)ll_count(ll)); } ll_iter_t *ll_iter_begin (list_t *ll, map_key_t key) { diff --git a/map/map.c b/map/map.c index 176b79a..6305c45 100644 --- a/map/map.c +++ b/map/map.c @@ -1,4 +1,4 @@ -/* +/* * Written by Josh Dybnis and released to the public domain, as explained at * http://creativecommons.org/licenses/publicdomain * @@ -19,8 +19,8 @@ struct map_iter { void *state; }; -map_t *map_alloc (const map_impl_t *map_impl, const datatype_t *key_type) { - map_t *map = nbd_malloc(sizeof(map_t)); +map_t *map_alloc (const map_impl_t *map_impl, const datatype_t *key_type) { + map_t *map = nbd_malloc(sizeof(map_t)); map->impl = map_impl; map->data = map->impl->alloc(key_type); return map; @@ -30,8 +30,8 @@ void map_free (map_t *map) { map->impl->free_(map->data); } -void map_print (map_t *map) { - map->impl->print(map->data); +void map_print (map_t *map, int verbose) { + map->impl->print(map->data, verbose); } map_val_t map_count (map_t *map) { diff --git a/map/skiplist.c b/map/skiplist.c index 73bad7b..0746c00 100644 --- a/map/skiplist.c +++ b/map/skiplist.c @@ -1,4 +1,4 @@ -/* +/* * Written by Josh Dybnis and released to the public domain, as explained at * http://creativecommons.org/licenses/publicdomain * @@ -9,10 +9,10 @@ * See also Kir Fraser's dissertation "Practical Lock Freedom". * www.cl.cam.ac.uk/techreports/UCAM-CL-TR-579.pdf * - * I've generalized the data structure to support update operations like set() and CAS() in addition to + * I've generalized the data structure to support update operations like set() and CAS() in addition to * the normal add() and remove() operations. * - * Warning: This code is written for the x86 memory-model. The algorithim depends on certain stores + * Warning: This code is written for the x86 memory-model. The algorithim depends on certain stores * and loads being ordered. This code won't work correctly on platforms with weaker memory models if * you don't add memory barriers in the right places. */ @@ -27,7 +27,7 @@ #include "rcu.h" // Setting MAX_LEVELS to 1 essentially makes this data structure the Harris-Michael lock-free list (see list.c). -#define MAX_LEVELS 32 +#define MAX_LEVELS 15 enum unlink { FORCE_UNLINK, @@ -49,7 +49,7 @@ struct sl_iter { struct sl { node_t *head; const datatype_t *key_type; - int high_water; // max level of any item in the list + int high_water; // max historic number of levels }; // Marking the field of a node logically removes it from the list @@ -65,11 +65,18 @@ static inline node_t * STRIP_MARK(markable_t x) { return ((node_t *)STRIP_TAG(x, #define STRIP_MARK(x) ((node_t *)STRIP_TAG((x), 0x1)) #endif -static int random_levels (void) { +static int random_levels (skiplist_t *sl) { unsigned r = nbd_rand(); - int n = __builtin_ctz(r) / 2 + 1; - if (n > MAX_LEVELS) { n = MAX_LEVELS; } - return n; + int z = __builtin_ctz(r); + int levels = (int)(z / 1.5); + if (levels == 0) + return 1; + if (levels > sl->high_water) { + levels = SYNC_ADD(&sl->high_water, 1); + TRACE("s2", "random_levels: increased high water mark to %lld", sl->high_water, 0); + } + if (levels > MAX_LEVELS) { levels = MAX_LEVELS; } + return levels; } static node_t *node_alloc (int num_levels, map_key_t key, map_val_t val) { @@ -87,7 +94,7 @@ static node_t *node_alloc (int num_levels, map_key_t key, map_val_t val) { skiplist_t *sl_alloc (const datatype_t *key_type) { skiplist_t *sl = (skiplist_t *)nbd_malloc(sizeof(skiplist_t)); sl->key_type = key_type; - sl->high_water = 0; + sl->high_water = 1; sl->head = node_alloc(MAX_LEVELS, 0, 0); memset(sl->head->next, 0, MAX_LEVELS * sizeof(skiplist_t *)); return sl; @@ -190,7 +197,7 @@ static node_t *find_preds (node_t **preds, node_t **succs, int n, skiplist_t *sl TRACE("s3", "find_preds: found pred %p next %p", pred, item); - if (level < n) { + if (level < n) { if (preds != NULL) { preds[level] = pred; } @@ -222,7 +229,7 @@ map_val_t sl_lookup (skiplist_t *sl, map_key_t key) { } } - TRACE("l1", "sl_lookup: no item in the skiplist matched the key", 0, 0); + TRACE("s1", "sl_lookup: no item in the skiplist matched the key", 0, 0); return DOES_NOT_EXIST; } @@ -247,15 +254,14 @@ static map_val_t update_item (node_t *item, map_val_t expectation, map_val_t new } if (EXPECT_FALSE(expectation == CAS_EXPECT_DOES_NOT_EXIST)) { - TRACE("s1", "update_item: found an item %p in the skiplist that matched the key. the expectation was " - "not met, the skiplist was not changed", item, old_val); + TRACE("s1", "update_item: the expectation was not met; the skiplist was not changed", 0, 0); return old_val; // failure } // Use a CAS and not a SWAP. If the CAS fails it means another thread removed the node or updated its // value. If another thread removed the node but it is not unlinked yet and we used a SWAP, we could // replace DOES_NOT_EXIST with our value. Then another thread that is updating the value could think it - // succeeded and return our value even though it should return DOES_NOT_EXIST. + // succeeded and return our value even though it should return DOES_NOT_EXIST. if (old_val == SYNC_CAS(&item->val, old_val, new_val)) { TRACE("s1", "update_item: the CAS succeeded. updated the value of the item", 0, 0); return old_val; // success @@ -274,11 +280,7 @@ map_val_t sl_cas (skiplist_t *sl, map_key_t key, map_val_t expectation, map_val_ node_t *preds[MAX_LEVELS]; node_t *nexts[MAX_LEVELS]; node_t *new_item = NULL; - int n = random_levels(); - if (n > sl->high_water) { - n = SYNC_ADD(&sl->high_water, 1); - TRACE("s2", "sl_cas: incremented high water mark to %p", n, 0); - } + int n = random_levels(sl); node_t *old_item = find_preds(preds, nexts, n, sl, key, ASSIST_UNLINK); // If there is already an item in the skiplist that matches the key just update its value. @@ -288,12 +290,12 @@ map_val_t sl_cas (skiplist_t *sl, map_key_t key, map_val_t expectation, map_val_ return ret_val; // If we lose a race with a thread removing the item we tried to update then we have to retry. - return sl_cas(sl, key, expectation, new_val); // tail call + return sl_cas(sl, key, expectation, new_val); // tail call } if (EXPECT_FALSE(expectation != CAS_EXPECT_DOES_NOT_EXIST && expectation != CAS_EXPECT_WHATEVER)) { - TRACE("l1", "sl_cas: the expectation was not met, the skiplist was not changed", 0, 0); - return DOES_NOT_EXIST; // failure, the caller expected an item for the to already exist + TRACE("s1", "sl_cas: the expectation was not met, the skiplist was not changed", 0, 0); + return DOES_NOT_EXIST; // failure, the caller expected an item for the to already exist } // Create a new node and insert it into the skiplist. @@ -318,7 +320,7 @@ map_val_t sl_cas (skiplist_t *sl, map_key_t key, map_val_t expectation, map_val_ if (sl->key_type != NULL) { nbd_free((void *)new_key); } - nbd_free(new_item); + nbd_free(new_item); return sl_cas(sl, key, expectation, new_val); // tail call } @@ -350,7 +352,7 @@ map_val_t sl_cas (skiplist_t *sl, map_key_t key, map_val_t expectation, map_val_ TRACE("s3", "sl_cas: attempting to update the new item's link from %p to %p", old_next, nexts[i]); other = SYNC_CAS(&new_item->next[i], old_next, (markable_t)nexts[i]); ASSERT(other == old_next || other == MARK_NODE(old_next)); - + // If another thread is removing this item we can stop linking it into to skiplist if (HAS_MARK(other)) { find_preds(NULL, NULL, 0, sl, key, FORCE_UNLINK); // see comment below @@ -392,16 +394,16 @@ map_val_t sl_remove (skiplist_t *sl, map_key_t key) { old_next = SYNC_CAS(&item->next[level], next, MARK_NODE((node_t *)next)); if (HAS_MARK(old_next)) { TRACE("s2", "sl_remove: %p is already marked for removal by another thread (next %p)", item, old_next); - if (level == 0) + if (level == 0) return DOES_NOT_EXIST; break; } } while (next != old_next); } - // Atomically swap out the item's value in case another thread is updating the item while we are - // removing it. This establishes which operation occurs first logically, the update or the remove. - map_val_t val = SYNC_SWAP(&item->val, DOES_NOT_EXIST); + // Atomically swap out the item's value in case another thread is updating the item while we are + // removing it. This establishes which operation occurs first logically, the update or the remove. + map_val_t val = SYNC_SWAP(&item->val, DOES_NOT_EXIST); TRACE("s2", "sl_remove: replaced item %p's value with DOES_NOT_EXIT", item, 0); // unlink the item @@ -416,52 +418,54 @@ map_val_t sl_remove (skiplist_t *sl, map_key_t key) { return val; } -void sl_print (skiplist_t *sl) { - - printf("high water: %d levels\n", sl->high_water); - for (int level = MAX_LEVELS - 1; level >= 0; --level) { +void sl_print (skiplist_t *sl, int verbose) { + + if (verbose) { + for (int level = MAX_LEVELS - 1; level >= 0; --level) { + node_t *item = sl->head; + if (item->next[level] == DOES_NOT_EXIST) + continue; + printf("(%d) ", level); + int i = 0; + while (item) { + markable_t next = item->next[level]; + printf("%s%p ", HAS_MARK(next) ? "*" : "", item); + item = STRIP_MARK(next); + if (i++ > 30) { + printf("..."); + break; + } + } + printf("\n"); + fflush(stdout); + } node_t *item = sl->head; - if (item->next[level] == DOES_NOT_EXIST) - continue; - printf("(%d) ", level); int i = 0; while (item) { - markable_t next = item->next[level]; - printf("%s%p ", HAS_MARK(next) ? "*" : "", item); - item = STRIP_MARK(next); + int is_marked = HAS_MARK(item->next[0]); + printf("%s%p:0x%llx ", is_marked ? "*" : "", item, (uint64_t)item->key); + if (item != sl->head) { + printf("[%d]", item->num_levels); + } else { + printf("[HEAD]"); + } + for (int level = 1; level < item->num_levels; ++level) { + node_t *next = STRIP_MARK(item->next[level]); + is_marked = HAS_MARK(item->next[0]); + printf(" %p%s", next, is_marked ? "*" : ""); + if (item == sl->head && item->next[level] == DOES_NOT_EXIST) + break; + } + printf("\n"); + fflush(stdout); + item = STRIP_MARK(item->next[0]); if (i++ > 30) { - printf("..."); + printf("...\n"); break; } } - printf("\n"); - fflush(stdout); - } - node_t *item = sl->head; - int i = 0; - while (item) { - int is_marked = HAS_MARK(item->next[0]); - printf("%s%p:0x%llx ", is_marked ? "*" : "", item, (uint64_t)item->key); - if (item != sl->head) { - printf("[%d]", item->num_levels); - } else { - printf("[HEAD]"); - } - for (int level = 1; level < item->num_levels; ++level) { - node_t *next = STRIP_MARK(item->next[level]); - is_marked = HAS_MARK(item->next[0]); - printf(" %p%s", next, is_marked ? "*" : ""); - if (item == sl->head && item->next[level] == DOES_NOT_EXIST) - break; - } - printf("\n"); - fflush(stdout); - item = STRIP_MARK(item->next[0]); - if (i++ > 30) { - printf("...\n"); - break; - } } + printf("levels:%-2d count:%-6lld \n", sl->high_water, (uint64_t)sl_count(sl)); } sl_iter_t *sl_iter_begin (skiplist_t *sl, map_key_t key) { diff --git a/map/unsafe_skiplist.c b/map/unsafe_skiplist.c index f54e10d..596dcde 100644 --- a/map/unsafe_skiplist.c +++ b/map/unsafe_skiplist.c @@ -13,12 +13,12 @@ #include "runtime.h" #include "mem.h" -#define MAX_LEVEL 31 +#define MAX_LEVELS 32 typedef struct node { map_key_t key; map_val_t val; - int top_level; + int num_levels; struct node *next[1]; } node_t; @@ -32,31 +32,38 @@ struct sl { int high_water; // max level of any item in the list }; -static int random_level (void) { +static int random_levels (skiplist_t *sl) { unsigned r = nbd_rand(); - int n = __builtin_ctz(r) / 2; - if (n > MAX_LEVEL) { n = MAX_LEVEL; } - return n; + int z = __builtin_ctz(r); + int levels = (int)(z / 1.5); + if (levels == 0) + return 1; + if (levels > sl->high_water) { + levels = SYNC_ADD(&sl->high_water, 1); + TRACE("s2", "random_levels: increased high water mark to %lld", sl->high_water, 0); + } + if (levels > MAX_LEVELS) { levels = MAX_LEVELS; } + return levels; } -static node_t *node_alloc (int level, map_key_t key, map_val_t val) { - assert(level >= 0 && level <= MAX_LEVEL); - size_t sz = sizeof(node_t) + level * sizeof(node_t *); +static node_t *node_alloc (int num_levels, map_key_t key, map_val_t val) { + assert(num_levels > 0 && num_levels <= MAX_LEVELS); + size_t sz = sizeof(node_t) + (num_levels - 1) * sizeof(node_t *); node_t *item = (node_t *)nbd_malloc(sz); memset(item, 0, sz); item->key = key; item->val = val; - item->top_level = level; - TRACE("s2", "node_alloc: new node %p (%llu levels)", item, level); + item->num_levels = num_levels; + TRACE("s2", "node_alloc: new node %p (%llu levels)", item, num_levels); return item; } skiplist_t *sl_alloc (const datatype_t *key_type) { skiplist_t *sl = (skiplist_t *)nbd_malloc(sizeof(skiplist_t)); sl->key_type = key_type; - sl->high_water = 0; - sl->head = node_alloc(MAX_LEVEL, 0, 0); - memset(sl->head->next, 0, (MAX_LEVEL+1) * sizeof(skiplist_t *)); + sl->high_water = 1; + sl->head = node_alloc(MAX_LEVELS, 0, 0); + memset(sl->head->next, 0, MAX_LEVELS * sizeof(skiplist_t *)); return sl; } @@ -82,20 +89,16 @@ size_t sl_count (skiplist_t *sl) { return count; } -static node_t *find_preds (node_t **preds, node_t **succs, int n, skiplist_t *sl, map_key_t key, int help_remove) { +static node_t *find_preds (node_t **preds, node_t **succs, int n, skiplist_t *sl, map_key_t key, int unlink) { node_t *pred = sl->head; node_t *item = NULL; TRACE("s2", "find_preds: searching for key %p in skiplist (head is %p)", key, pred); int d = 0; - int start_level = sl->high_water; - if (EXPECT_FALSE(start_level < n)) { - start_level = n; - } // Traverse the levels of from the top level to the bottom - for (int level = start_level; level >= 0; --level) { + for (int level = sl->high_water - 1; level >= 0; --level) { node_t *next = pred->next[level]; - if (next == DOES_NOT_EXIST && level > n) + if (next == DOES_NOT_EXIST && level >= n) continue; TRACE("s3", "find_preds: traversing level %p starting at %p", level, pred); item = next; @@ -108,8 +111,15 @@ static node_t *find_preds (node_t **preds, node_t **succs, int n, skiplist_t *sl d = sl->key_type->cmp((void *)item->key, (void *)key); } - if (d >= 0) + if (d >= 0) { + if (d == 0 && unlink) { + pred->next[level] = next; + TRACE("s3", "find_preds: unlinked item from pred %p", pred, 0); + item = next; + next = (item != NULL) ? item->next[level] : DOES_NOT_EXIST; + } break; + } pred = item; item = next; @@ -117,8 +127,7 @@ static node_t *find_preds (node_t **preds, node_t **succs, int n, skiplist_t *sl TRACE("s3", "find_preds: found pred %p next %p", pred, item); - // The cast to unsigned is for the case when n is -1. - if ((unsigned)level <= (unsigned)n) { + if (level < n) { if (preds != NULL) { preds[level] = pred; } @@ -128,14 +137,6 @@ static node_t *find_preds (node_t **preds, node_t **succs, int n, skiplist_t *sl } } - // fill in empty levels - if (n == -1 && item != NULL && preds != NULL) { - assert(item->top_level <= MAX_LEVEL); - for (int level = start_level + 1; level <= item->top_level; ++level) { - preds[level] = sl->head; - } - } - if (d == 0) { TRACE("s2", "find_preds: found matching item %p in skiplist, pred is %p", item, pred); return item; @@ -144,46 +145,6 @@ static node_t *find_preds (node_t **preds, node_t **succs, int n, skiplist_t *sl return NULL; } -static void sl_unlink (skiplist_t *sl, map_key_t key) { - node_t *pred = sl->head; - node_t *item = NULL; - TRACE("s2", "sl_unlink: unlinking marked item with key %p", key, 0); - int d = 0; - - // Traverse the levels of - for (int level = sl->high_water; level >= 0; --level) { - node_t *next = pred->next[level]; - if (next == DOES_NOT_EXIST) - continue; - TRACE("s3", "sl_unlink: traversing level %p starting at %p", level, pred); - item = next; - while (item != NULL) { - next = item->next[level]; - - if (EXPECT_TRUE(sl->key_type == NULL)) { - d = item->key - key; - } else { - d = sl->key_type->cmp((void *)item->key, (void *)key); - } - - if (d == 0) { - pred->next[level] = next; - TRACE("s3", "sl_unlink: unlinked item from pred %p", pred, 0); - item = next; - next = (item != NULL) ? item->next[level] : DOES_NOT_EXIST; - break; - } - if (d > 0) - break; - - pred = item; - item = next; - } - - TRACE("s3", "sl_unlink: at pred %p next %p", pred, item); - } -} - // Fast find that does not return the node's predecessors. map_val_t sl_lookup (skiplist_t *sl, map_key_t key) { TRACE("s1", "sl_lookup: searching for key %p in skiplist %p", key, sl); @@ -195,7 +156,7 @@ map_val_t sl_lookup (skiplist_t *sl, map_key_t key) { return val; } - TRACE("l1", "sl_lookup: no item in the skiplist matched the key", 0, 0); + TRACE("s1", "sl_lookup: no item in the skiplist matched the key", 0, 0); return DOES_NOT_EXIST; } @@ -211,19 +172,18 @@ map_val_t sl_cas (skiplist_t *sl, map_key_t key, map_val_t expectation, map_val_ TRACE("s1", "sl_cas: expectation %p new value %p", expectation, new_val); ASSERT((int64_t)new_val > 0); - node_t *preds[MAX_LEVEL+1]; - node_t *nexts[MAX_LEVEL+1]; + node_t *preds[MAX_LEVELS]; + node_t *nexts[MAX_LEVELS]; node_t *new_item = NULL; - int n = random_level(); - node_t *old_item = find_preds(preds, nexts, n, sl, key, TRUE); + int n = random_levels(sl); + node_t *old_item = find_preds(preds, nexts, n, sl, key, FALSE); // If there is already an item in the skiplist that matches the key just update its value. if (old_item != NULL) { map_val_t old_val = old_item->val; if (expectation == CAS_EXPECT_DOES_NOT_EXIST || (expectation != CAS_EXPECT_WHATEVER && expectation != CAS_EXPECT_EXISTS && expectation != old_val)) { - TRACE("s1", "update_item: found an item %p in the skiplist that matched the key. the expectation was " - "not met, the skiplist was not changed", item, old_val); + TRACE("s1", "sl_cas: the expectation was not met; the skiplist was not changed", 0, 0); return old_val; } old_item->val = new_val; @@ -231,7 +191,7 @@ map_val_t sl_cas (skiplist_t *sl, map_key_t key, map_val_t expectation, map_val_ } if (EXPECT_FALSE(expectation != CAS_EXPECT_DOES_NOT_EXIST && expectation != CAS_EXPECT_WHATEVER)) { - TRACE("l1", "sl_cas: the expectation was not met, the skiplist was not changed", 0, 0); + TRACE("s1", "sl_cas: the expectation was not met, the skiplist was not changed", 0, 0); return DOES_NOT_EXIST; // failure, the caller expected an item for the to already exist } @@ -239,19 +199,15 @@ map_val_t sl_cas (skiplist_t *sl, map_key_t key, map_val_t expectation, map_val_ // Create a new node and insert it into the skiplist. map_key_t new_key = sl->key_type == NULL ? key : (map_key_t)sl->key_type->clone((void *)key); - if (n > sl->high_water) { - n = ++sl->high_water; - TRACE("s2", "sl_cas: incremented high water mark to %p", sl->high_water, 0); - } new_item = node_alloc(n, new_key, new_val); // Set 's next pointers to their proper values - for (int level = 0; level <= new_item->top_level; ++level) { + for (int level = 0; level < new_item->num_levels; ++level) { new_item->next[level] = nexts[level]; } // Link into - for (int level = 0; level <= new_item->top_level; ++level) { + for (int level = 0; level < new_item->num_levels; ++level) { preds[level]->next[level] = new_item; } @@ -260,8 +216,8 @@ map_val_t sl_cas (skiplist_t *sl, map_key_t key, map_val_t expectation, map_val_ map_val_t sl_remove (skiplist_t *sl, map_key_t key) { TRACE("s1", "sl_remove: removing item with key %p from skiplist %p", key, sl); - node_t *preds[MAX_LEVEL+1]; - node_t *item = find_preds(preds, NULL, -1, sl, key, TRUE); + node_t *preds[MAX_LEVELS]; + node_t *item = find_preds(preds, NULL, sl->high_water, sl, key, FALSE); if (item == NULL) { TRACE("s3", "sl_remove: remove failed, an item with a matching key does not exist in the skiplist", 0, 0); return DOES_NOT_EXIST; @@ -269,7 +225,7 @@ map_val_t sl_remove (skiplist_t *sl, map_key_t key) { map_val_t val = item->val; // unlink the item - sl_unlink(sl, key); + find_preds(NULL, NULL, 0, sl, key, TRUE); // free the node if (sl->key_type != NULL) { @@ -283,7 +239,7 @@ map_val_t sl_remove (skiplist_t *sl, map_key_t key) { void sl_print (skiplist_t *sl) { printf("high water: %d levels\n", sl->high_water); - for (int level = MAX_LEVEL; level >= 0; --level) { + for (int level = MAX_LEVELS - 1; level >= 0; --level) { node_t *item = sl->head; if (item->next[level] == DOES_NOT_EXIST) continue; @@ -306,11 +262,11 @@ void sl_print (skiplist_t *sl) { while (item) { printf("%p:0x%llx ", item, (uint64_t)item->key); if (item != sl->head) { - printf("[%d]", item->top_level); + printf("[%d]", item->num_levels); } else { printf("[HEAD]"); } - for (int level = 1; level <= item->top_level; ++level) { + for (int level = 1; level < item->num_levels; ++level) { node_t *next = item->next[level]; printf(" %p", next); if (item == sl->head && item->next[level] == DOES_NOT_EXIST) @@ -329,7 +285,7 @@ void sl_print (skiplist_t *sl) { sl_iter_t *sl_iter_begin (skiplist_t *sl, map_key_t key) { sl_iter_t *iter = (sl_iter_t *)nbd_malloc(sizeof(sl_iter_t)); if (key != DOES_NOT_EXIST) { - find_preds(NULL, &iter->next, 0, sl, key, FALSE); + find_preds(NULL, &iter->next, 1, sl, key, FALSE); } else { iter->next = sl->head->next[0]; } diff --git a/perf.sh b/perf.sh index ae1be88..759e3a6 100755 --- a/perf.sh +++ b/perf.sh @@ -1,7 +1,7 @@ #!/bin/sh -for ks in 28 24 20 16 12 8 4 0 +for ks in 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 #26 27 28 29 30 do - for th in 1 2 4 8 16 + for th in 8 do output/perf_test $th $ks done diff --git a/runtime/mem.c b/runtime/mem.c index 420171e..90a22e1 100644 --- a/runtime/mem.c +++ b/runtime/mem.c @@ -97,7 +97,7 @@ static void *get_new_region (int block_scale) { if ((size_t)region & (region_size - 1)) { TRACE("m0", "get_new_region: region not aligned", 0, 0); munmap(region, region_size); - region = mmap(NULL, region_size * 2, PROT_READ|PROT_WRITE, MAP_ANON|MAP_PRIVATE, -1, 0); + region = mmap(NULL, region_size * 2, PROT_READ|PROT_WRITE, MAP_NORESERVE|MAP_ANON|MAP_PRIVATE, -1, 0); if (region == (void *)-1) { perror("get_new_region: mmap"); exit(-1); diff --git a/runtime/rcu.c b/runtime/rcu.c index 8120713..12c37a3 100644 --- a/runtime/rcu.c +++ b/runtime/rcu.c @@ -1,4 +1,4 @@ -/* +/* * Written by Josh Dybnis and released to the public domain, as explained at * http://creativecommons.org/licenses/publicdomain * @@ -31,7 +31,7 @@ static fifo_t *pending_[MAX_NUM_THREADS] = {}; static int num_threads_ = 0; static fifo_t *fifo_alloc(int scale) { - fifo_t *q = (fifo_t *)nbd_malloc(sizeof(fifo_t) + (1ULL << scale) * sizeof(void *)); + fifo_t *q = (fifo_t *)nbd_malloc(sizeof(fifo_t) + (1ULL << scale) * sizeof(void *)); memset(q, 0, sizeof(fifo_t)); q->scale = scale; q->head = 0; diff --git a/runtime/runtime.c b/runtime/runtime.c index 415a161..fe4a1bd 100644 --- a/runtime/runtime.c +++ b/runtime/runtime.c @@ -1,4 +1,4 @@ -/* +/* * Written by Josh Dybnis and released to the public domain, as explained at * http://creativecommons.org/licenses/publicdomain */ @@ -12,7 +12,10 @@ #include "tls.h" DECLARE_THREAD_LOCAL(tid_, int); -DECLARE_THREAD_LOCAL(rand_seed_, unsigned); +DECLARE_THREAD_LOCAL(rx_, uint32_t); +DECLARE_THREAD_LOCAL(ry_, uint32_t); +DECLARE_THREAD_LOCAL(rz_, uint32_t); +DECLARE_THREAD_LOCAL(rc_, uint32_t); typedef struct thread_info { int thread_id; @@ -21,26 +24,28 @@ typedef struct thread_info { } thread_info_t; __attribute__ ((constructor)) void nbd_init (void) { - //sranddev(); - INIT_THREAD_LOCAL(rand_seed_); + INIT_THREAD_LOCAL(r); INIT_THREAD_LOCAL(tid_); SET_THREAD_LOCAL(tid_, 0); mem_init(); lwt_thread_init(0); rcu_thread_init(0); + srand((uint32_t)rdtsc()); } static void *worker (void *arg) { thread_info_t *ti = (thread_info_t *)arg; SET_THREAD_LOCAL(tid_, ti->thread_id); LOCALIZE_THREAD_LOCAL(tid_, int); -#ifndef NDEBUG - SET_THREAD_LOCAL(rand_seed_, tid_+1); -#else - SET_THREAD_LOCAL(rand_seed_, nbd_rand_seed(tid_+1)); -#endif + + SET_THREAD_LOCAL(rx_, rand()); + SET_THREAD_LOCAL(ry_, rand()); + SET_THREAD_LOCAL(rz_, rand()); + SET_THREAD_LOCAL(rc_, rand()); + lwt_thread_init(ti->thread_id); rcu_thread_init(ti->thread_id); + void *ret = ti->start_routine(ti->arg); nbd_free(ti); return ret; @@ -54,18 +59,35 @@ int nbd_thread_create (pthread_t *restrict thread, int thread_id, void *(*start_ return pthread_create(thread, NULL, worker, ti); } -int nbd_rand (void) { - LOCALIZE_THREAD_LOCAL(rand_seed_, unsigned); - unsigned r = rand_r(&rand_seed_); - SET_THREAD_LOCAL(rand_seed_, r); +// George Marsaglia's KISS generator +uint64_t nbd_rand (void) { + LOCALIZE_THREAD_LOCAL(rx_, unsigned); + LOCALIZE_THREAD_LOCAL(ry_, unsigned); + LOCALIZE_THREAD_LOCAL(rz_, unsigned); + LOCALIZE_THREAD_LOCAL(rc_, unsigned); + + uint32_t rx = 69069 * rx_ + 12345; + uint32_t ry = ry_; + uint32_t rz = rz_; + ry ^= (ry << 13); + ry ^= (ry >> 17); + ry ^= (ry << 5); + uint64_t t = rz * 698769069LL + rc_; + uint64_t r = rx + ry + (rz = t); + + SET_THREAD_LOCAL(rx_, rx); + SET_THREAD_LOCAL(ry_, ry); + SET_THREAD_LOCAL(rz_, rz); + SET_THREAD_LOCAL(rc_, t >> 32); + return r; } +// Fairly fast random numbers uint64_t nbd_rand_seed (int i) { return rdtsc() + -715159705 + i * 129; } -// Fairly fast random numbers int nbd_next_rand (uint64_t *r) { *r = (*r * 0x5DEECE66DLL + 0xBLL) & MASK(48); return (*r >> 17) & 0x7FFFFFFF; diff --git a/test/perf_test.c b/test/perf_test.c index cc55ce0..1ea0bcb 100644 --- a/test/perf_test.c +++ b/test/perf_test.c @@ -16,34 +16,41 @@ //#define TEST_STRING_KEYS -static volatile int wait_; -static volatile int stop_; static int num_threads_; -static int duration_; +static volatile int start_, stop_, load_; static map_t *map_; -static int get_range_; -static int put_range_; +static int get_range_, put_range_; static size_t num_keys_; -static map_key_t *keys_ = NULL; -static int ops_[MAX_NUM_THREADS] = {}; +static double load_time_; +static int duration_; -#define FOO (1ULL << 20) +#define OP_SELECT_RANGE (1ULL << 20) void *worker (void *arg) { - int tid = (int)(size_t)arg; - uint64_t s = nbd_rand_seed(tid); - int get_ops = 0, put_ops = 0, del_ops = 0; + volatile uint64_t ops = 0; // Wait for all the worker threads to be ready. - (void)SYNC_ADD(&wait_, -1); - do {} while (wait_); + (void)SYNC_ADD(&load_, -1); + do {} while (load_); + + // Pre-load map + int n = num_keys_ / 2 / num_threads_; + for (int i = 0; i < n; ++i) { + map_key_t key = (nbd_rand() & (num_keys_ - 1)) + 1; + map_set(map_, key, key); + } + + // Wait for all the worker threads to be done loading. + (void)SYNC_ADD(&start_, -1); + do {} while (start_); while (!stop_) { - map_key_t key = keys_[ nbd_next_rand(&s) & (num_keys_ - 1) ]; - uint32_t x = nbd_next_rand(&s) & (FOO - 1); + ++ops; + map_key_t key = (nbd_rand() & (num_keys_ - 1)) + 1; + map_key_t x = nbd_rand() & (OP_SELECT_RANGE - 1); if (x < get_range_) { #ifndef NDEBUG - map_val_t val = + map_val_t val = #endif map_get(map_, key); #ifdef TEST_STRING_KEYS @@ -51,39 +58,20 @@ void *worker (void *arg) { #else ASSERT(val == DOES_NOT_EXIST || key == val); #endif - get_ops++; } else if (x < put_range_) { map_add(map_, key, key); - put_ops++; } else { map_remove(map_, key); - del_ops++; } rcu_update(); } - ops_[tid] = get_ops + put_ops + del_ops; - - return NULL; + return (void *)ops; } -int run_test (void) { - int ops; - wait_ = num_threads_ + 1; - - // Quicky sanity check - int n = 100; - if (num_keys_ < n) { n = num_keys_; } - for (int i = 0; i < n; ++i) { - map_set(map_, keys_[i], keys_[i]); - for(int j = 0; j < i; ++j) { -#ifdef TEST_STRING_KEYS - ASSERT(ns_cmp((nstring_t *)map_get(map_, keys_[i]), (nstring_t *)keys_[i]) == 0); -#else - ASSERT(map_get(map_, keys_[i]) == keys_[i]); -#endif - } - } +uint64_t run_test (void) { + load_ = num_threads_ + 1; + start_ = num_threads_ + 1; stop_ = 0; @@ -93,18 +81,26 @@ int run_test (void) { if (rc != 0) { perror("pthread_create"); exit(rc); } } - do { /* nothing */ } while (wait_ != 1); + do { /* nothing */ } while (load_ != 1); + load_ = 0; + + struct timeval tv1, tv2; + gettimeofday(&tv1, NULL); - wait_ = 0; + do { /* nothing */ } while (start_ != 1); + + gettimeofday(&tv2, NULL); + load_time_ = (double)(1000000*(tv2.tv_sec - tv1.tv_sec) + tv2.tv_usec - tv1.tv_usec) / 1000000; + + start_ = 0; sleep(duration_); stop_ = 1; + uint64_t ops = 0; for (int i = 0; i < num_threads_; ++i) { - pthread_join(thread[i], NULL); - } - ops = 0; - for (int i = 0; i < num_threads_; ++i) { - ops += ops_[i]; + void *count; + pthread_join(thread[i], &count); + ops += (size_t)count; } return ops; } @@ -118,6 +114,7 @@ int main (int argc, char **argv) { } num_threads_ = 2; + if (num_threads_ > MAX_NUM_THREADS) { num_threads_ = MAX_NUM_THREADS; } if (argc > 1) { errno = 0; @@ -130,10 +127,10 @@ int main (int argc, char **argv) { fprintf(stderr, "%s: Number of threads must be at least 1\n", program_name); return -1; } - if (num_threads_ > MAX_NUM_THREADS) { - fprintf(stderr, "%s: Number of threads cannot be more than %d\n", program_name, MAX_NUM_THREADS); - return -1; - } + } + if (num_threads_ > MAX_NUM_THREADS) { + fprintf(stderr, "%s: Number of threads cannot be more than %d\n", program_name, MAX_NUM_THREADS); + return -1; } int table_scale = 12; @@ -144,19 +141,18 @@ int main (int argc, char **argv) { return -1; } table_scale = strtol(argv[2], NULL, 10); - if (table_scale < 0 || table_scale > 31) { - fprintf(stderr, "%s: The scale of the collection must be between 0 and 31\n", program_name); + if (table_scale < 0 || table_scale > 36) { + fprintf(stderr, "%s: The scale of the collection must be between 0 and 36\n", program_name); return -1; } } - int read_ratio = 90; int put_ratio = 50; - get_range_ = (int)((double)FOO / 100 * read_ratio); - put_range_ = get_range_ + (int)(((double)FOO - get_range_) / 100 * put_ratio); + get_range_ = (int)((double)OP_SELECT_RANGE / 100 * read_ratio); + put_range_ = get_range_ + (int)(((double)OP_SELECT_RANGE - get_range_) / 100 * put_ratio); - static const map_impl_t *map_types[] = { &MAP_IMPL_SL }; + static const map_impl_t *map_types[] = { &MAP_IMPL_HT }; for (int i = 0; i < sizeof(map_types)/sizeof(*map_types); ++i) { #ifdef TEST_STRING_KEYS map_ = map_alloc(map_types[i], &DATATYPE_NSTRING); @@ -164,33 +160,14 @@ int main (int argc, char **argv) { map_ = map_alloc(map_types[i], NULL); #endif - // Do some warmup num_keys_ = 1ULL << table_scale; - keys_ = nbd_malloc(sizeof(map_key_t) * num_keys_); - ASSERT(keys_ != NULL); - for (uint64_t j = 0; j < num_keys_; ++j) { -#ifdef TEST_STRING_KEYS - char tmp[64]; - snprintf(tmp, sizeof(tmp), "%dabc%d", j, j*17+123); - int n = strlen(tmp); - keys_[j] = ns_alloc(n); - memcpy(keys_[j], tmp, n); -#else - keys_[j] = j*17+123; -#endif - } - duration_ = 10; - int num_trials = 1; - int ops = 0; - for (int i = 0; i < num_trials; ++i) { - ops += run_test(); - } - double ops_per_sec = ops / num_trials / duration_; + duration_ = 1 + table_scale/4; + double mops_per_sec = (double)run_test() / 1000000.0 / duration_; - //map_print(map_); - printf("Threads:%-2d Size:2^%-2d Mops/Sec:%-4.3g per-thread:%-4.3g\n\n", - num_threads_, table_scale, ops_per_sec/1000000, ops_per_sec/num_threads_/1000000); + printf("Threads:%-2d Size:2^%-2d load time:%-4.2f Mops/s:%-4.2f per-thread:%-4.2f ", + num_threads_, table_scale, load_time_, mops_per_sec, mops_per_sec/num_threads_); + map_print(map_, FALSE); fflush(stdout); map_free(map_); -- 2.40.0