#define CACHE_LINE_SIZE 64 // 64 byte cache line on x86 and x86-64
#define CACHE_LINE_SCALE 6 // log base 2 of the cache line size
-#define EXPECT_TRUE(x) __builtin_expect(!!(x), 1)
-#define EXPECT_FALSE(x) __builtin_expect(!!(x), 0)
+#define EXPECT_TRUE(x) __builtin_expect(!!(x), 1)
+#define EXPECT_FALSE(x) __builtin_expect(!!(x), 0)
-#define SYNC_SWAP __sync_lock_test_and_set
-#define SYNC_CAS __sync_val_compare_and_swap
-#define SYNC_ADD __sync_add_and_fetch
-#define SYNC_FETCH_AND_OR __sync_fetch_and_or
+#define SYNC_SWAP __sync_lock_test_and_set
+#define SYNC_CAS __sync_val_compare_and_swap
+#define SYNC_ADD __sync_add_and_fetch
+#define SYNC_FETCH_AND_OR __sync_fetch_and_or
+
+#define COUNT_TRAILING_ZEROS __builtin_ctz
#define MASK(n) ((1ULL << (n)) - 1)
typedef unsigned long long uint64_t;
typedef unsigned int uint32_t;
+typedef unsigned short uint16_t;
typedef unsigned char uint8_t;
typedef size_t markable_t;
*/
#ifndef MEM_H
#define MEM_H
-void *nbd_malloc (size_t n);
-void nbd_free (void *x);
+void *nbd_malloc (size_t n) __attribute__((malloc, alloc_size(1)));
+void nbd_free (void *x) __attribute__((nonnull));
#endif//MEM_H
###################################################################################################
# Makefile for building programs with whole-program interfile optimization
###################################################################################################
-CFLAGS0 := -Wall -Werror -std=gnu99 -lpthread -m64 #-m32 -DNBD32
+CFLAGS0 := -Wall -Werror -std=gnu99 -lpthread #-m32 -DNBD32
CFLAGS1 := $(CFLAGS0) -g #-O3 #-DNDEBUG #-fwhole-program -combine
CFLAGS2 := $(CFLAGS1) #-DENABLE_TRACE
CFLAGS3 := $(CFLAGS2) #-DLIST_USE_HAZARD_POINTER
* Every atomic operation is also an implicit full memory barrier. The upshot is that it simplifies
* the code a bit, but it won't be as fast as it could be on platforms that provide weaker
* operations like and unfenced CAS which would still do the job.
+ *
+ * 11FebO9 - Bug fix in ht_iter_next() from Rui Ueyama
*/
#include <stdio.h>
} while (key == DOES_NOT_EXIST || val == DOES_NOT_EXIST || val == TOMBSTONE);
- if (key_ptr) {
- *key_ptr = key;
- }
if (val == COPIED_VALUE) {
const datatype_t *key_type = iter->hti->ht->key_type;
#ifdef NBD32
uint32_t hash = (key_type == NULL) ? murmur32_8b((uint64_t)key) : key_type->hash((void *)key);
#endif
val = hti_get(iter->hti->next, (map_key_t)ent->key, hash);
+
+ // Go to the next entry if key is already deleted.
+ if (val == DOES_NOT_EXIST)
+ return ht_iter_next(iter, key_ptr); // recursive tail-call
}
+ if (key_ptr) {
+ *key_ptr = key;
+ }
return val;
}
#include "rlocal.h"
#include "lwt.h"
-#define RECYCLE_PAGES
-
#define MAX_SCALE 31 // allocate blocks up to 4GB (arbitrary, could be bigger)
#ifndef NBD32
#define MIN_SCALE 3 // smallest allocated block is 8 bytes
optimization
------------
- investigate 16 byte CAS; ht can store GUIDs inline instead of pointers to actual keys
-- write after write can just update the old update record instead of pushing a new one
+- txn write after write can just update the old update record instead of pushing a new one
- use a shared scan for write-set validation in txn, similar to ht copy logic
- experiment with the performance impact of not passing the hash between functions in ht
- experiment with embedding the nstring keys in the list/skiplist nodes
- allow values of 0 to be inserted into maps (change DOES_NOT_EXIST to something other than 0)
- read-committed type transactions
- recycle free regions across size-classes and between threads
+
+