#include "common.h"
#include "murmur.h"
#include "mem.h"
-#include "mlocal.h"
#include "hashtable.h"
#define GET_PTR(x) ((void *)((x) & MASK(48))) // low-order 48 bits is a pointer to a nstring_t
-typedef struct ht_entry {
+typedef struct entry {
uint64_t key;
uint64_t val;
} entry_t;
volatile entry_t *table;
hashtable_t *ht; // parent ht;
struct hti *next;
- unsigned int scale;
+ unsigned scale;
int max_probe;
+ int references;
int count; // TODO: make these counters distributed
int num_entries_copied;
- int scan;
+ int copy_scan;
} hti_t;
+struct ht_iter {
+ hti_t * hti;
+ int64_t idx;
+ uint64_t key;
+ uint64_t val;
+};
+
struct ht {
hti_t *hti;
const datatype_t *key_type;
};
-static const map_impl_t ht_map_impl = {
- (map_alloc_t)ht_alloc, (map_cas_t)ht_cas, (map_get_t)ht_get, (map_remove_t)ht_remove,
- (map_count_t)ht_count, (map_print_t)ht_print, (map_free_t)ht_free
-};
-
-const map_impl_t *MAP_TYPE_HASHTABLE = &ht_map_impl;
-
static const uint64_t COPIED_VALUE = -1;
-static const uint64_t TOMBSTONE = STRIP_TAG(-1);
+static const uint64_t TOMBSTONE = STRIP_TAG(-1, TAG1);
static const unsigned ENTRIES_PER_BUCKET = CACHE_LINE_SIZE/sizeof(entry_t);
static const unsigned ENTRIES_PER_COPY_CHUNK = CACHE_LINE_SIZE/sizeof(entry_t)*2;
// Allocate and initialize a hti_t with 2^<scale> entries.
static hti_t *hti_alloc (hashtable_t *parent, int scale) {
- // Include enough slop to align the actual table on a cache line boundry
- size_t n = sizeof(hti_t)
- + sizeof(entry_t) * (1 << scale)
- + (CACHE_LINE_SIZE - 1);
- hti_t *hti = (hti_t *)calloc(n, 1);
+ hti_t *hti = (hti_t *)nbd_malloc(sizeof(hti_t));
+ memset(hti, 0, sizeof(hti_t));
- // Align the table of hash entries on a cache line boundry.
- hti->table = (entry_t *)(((uint64_t)hti + sizeof(hti_t) + (CACHE_LINE_SIZE-1))
- & ~(CACHE_LINE_SIZE-1));
+ size_t sz = sizeof(entry_t) * (1 << scale);
+ entry_t *table = nbd_malloc(sz);
+ memset(table, 0, sz);
+ hti->table = table;
hti->scale = scale;
}
// Tag the value in the old entry to indicate a copy is in progress.
- ht1_ent_val = SYNC_FETCH_AND_OR(&ht1_ent->val, TAG_VALUE(0));
+ ht1_ent_val = SYNC_FETCH_AND_OR(&ht1_ent->val, TAG_VALUE(0, TAG1));
TRACE("h2", "hti_copy_entry: tagged the value %p in old entry %p", ht1_ent_val, ht1_ent);
if (ht1_ent_val == COPIED_VALUE) {
TRACE("h1", "hti_copy_entry: entry %p already copied to table %p", ht1_ent, ht2);
void *key = (ht1->ht->key_type == NULL) ? (void *)ht1_ent_key : GET_PTR(ht1_ent_key);
// The old table's dead entries don't need to be copied to the new table, but their keys need to be freed.
- assert(COPIED_VALUE == TAG_VALUE(TOMBSTONE));
+ assert(COPIED_VALUE == TAG_VALUE(TOMBSTONE, TAG1));
if (ht1_ent_val == TOMBSTONE) {
TRACE("h1", "hti_copy_entry: entry %p old value was deleted, now freeing key %p", ht1_ent, key);
if (EXPECT_FALSE(ht1->ht->key_type != NULL)) {
}
// Copy the value to the entry in the new table.
- ht1_ent_val = STRIP_TAG(ht1_ent_val);
+ ht1_ent_val = STRIP_TAG(ht1_ent_val, TAG1);
uint64_t old_ht2_ent_val = SYNC_CAS(&ht2_ent->val, DOES_NOT_EXIST, ht1_ent_val);
// If there is a nested copy in progress, we might have installed the key into a dead entry.
TRACE("h1", "hti_cas: hti %p key %p", hti, key);
TRACE("h1", "hti_cas: value %p expect %p", new, expected);
assert(hti);
- assert(!IS_TAGGED(new));
+ assert(!IS_TAGGED(new, TAG1));
assert(key);
int is_empty;
// If the entry is in the middle of a copy, the copy must be completed first.
uint64_t ent_val = ent->val;
- if (EXPECT_FALSE(IS_TAGGED(ent_val))) {
+ if (EXPECT_FALSE(IS_TAGGED(ent_val, TAG1))) {
if (ent_val != COPIED_VALUE) {
int did_copy = hti_copy_entry(hti, ent, key_hash, ((volatile hti_t *)hti)->next);
if (did_copy) {
// If the entry is being copied, finish the copy and retry on the next table.
uint64_t ent_val = ent->val;
- if (EXPECT_FALSE(IS_TAGGED(ent_val))) {
+ if (EXPECT_FALSE(IS_TAGGED(ent_val, TAG1))) {
if (EXPECT_FALSE(ent_val != COPIED_VALUE)) {
int did_copy = hti_copy_entry(hti, ent, key_hash, ((volatile hti_t *)hti)->next);
if (did_copy) {
return hti_get(ht->hti, key, hash);
}
-//
-uint64_t ht_cas (hashtable_t *ht, void *key, uint64_t expected_val, uint64_t new_val) {
-
- TRACE("h2", "ht_cas: key %p ht %p", key, ht);
- TRACE("h2", "ht_cas: expected val %p new val %p", expected_val, new_val);
- assert(key != DOES_NOT_EXIST);
- assert(!IS_TAGGED(new_val) && new_val != DOES_NOT_EXIST && new_val != TOMBSTONE);
-
- hti_t *hti = ht->hti;
+// returns TRUE if copy is done
+int hti_help_copy (hti_t *hti) {
+ volatile entry_t *ent;
+ uint64_t limit;
+ uint64_t total_copied = hti->num_entries_copied;
+ int num_copied = 0;
+ int x = hti->copy_scan;
- // Help with an ongoing copy.
- if (EXPECT_FALSE(hti->next != NULL)) {
- volatile entry_t *ent;
- uint64_t limit;
- int num_copied = 0;
- int x = hti->scan;
-
- TRACE("h1", "ht_cas: help copy. scan is %llu, size is %llu", x, 1<<hti->scale);
+ TRACE("h1", "ht_cas: help copy. scan is %llu, size is %llu", x, 1<<hti->scale);
+ if (total_copied == (1 << hti->scale)) {
// Panic if we've been around the array twice and still haven't finished the copy.
int panic = (x >= (1 << (hti->scale + 1)));
if (!panic) {
// Reserve some entries for this thread to copy. There is a race condition here because the
// fetch and add isn't atomic, but that is ok.
- hti->scan = x + ENTRIES_PER_COPY_CHUNK;
+ hti->copy_scan = x + ENTRIES_PER_COPY_CHUNK;
- // <hti->scan> might be larger than the size of the table, if some thread stalls while
+ // <copy_scan> might be larger than the size of the table, if some thread stalls while
// copying. In that case we just wrap around to the begining and make another pass through
// the table.
ent = hti->table + (x & MASK(hti->scale));
assert(ent <= hti->table + (1 << hti->scale));
}
if (num_copied != 0) {
- SYNC_ADD(&hti->num_entries_copied, num_copied);
+ total_copied = SYNC_ADD(&hti->num_entries_copied, num_copied);
}
+ }
+
+ return (total_copied == (1 << hti->scale));
+}
+
+//
+uint64_t ht_cas (hashtable_t *ht, void *key, uint64_t expected_val, uint64_t new_val) {
+
+ TRACE("h2", "ht_cas: key %p ht %p", key, ht);
+ TRACE("h2", "ht_cas: expected val %p new val %p", expected_val, new_val);
+ assert(key != DOES_NOT_EXIST);
+ assert(!IS_TAGGED(new_val, TAG1) && new_val != DOES_NOT_EXIST && new_val != TOMBSTONE);
+
+ hti_t *hti = ht->hti;
+
+ // Help with an ongoing copy.
+ if (EXPECT_FALSE(hti->next != NULL)) {
+ int done = hti_help_copy(hti);
// Dispose of fully copied tables.
- if (hti->num_entries_copied == (1 << hti->scale) || panic) {
- assert(hti->next);
- if (SYNC_CAS(&ht->hti, hti, hti->next) == hti) {
- nbd_defer_free(hti);
+ if (done && hti->references == 0) {
+
+ int r = SYNC_CAS(&hti->references, 0, -1);
+ if (r == 0) {
+ assert(hti->next);
+ if (SYNC_CAS(&ht->hti, hti, hti->next) == hti) {
+ nbd_defer_free((void *)hti->table);
+ nbd_defer_free(hti);
+ }
}
}
}
hti_t *hti = ht->hti;
do {
for (uint32_t i = 0; i < (1 << hti->scale); ++i) {
- assert(hti->table[i].val == COPIED_VALUE || !IS_TAGGED(hti->table[i].val));
+ assert(hti->table[i].val == COPIED_VALUE || !IS_TAGGED(hti->table[i].val, TAG1));
if (ht->key_type != NULL && hti->table[i].key != DOES_NOT_EXIST) {
nbd_free(GET_PTR(hti->table[i].key));
}
}
hti_t *next = hti->next;
+ nbd_free((void *)hti->table);
nbd_free(hti);
hti = next;
} while (hti);
hti = hti->next;
}
}
+
+ht_iter_t *ht_iter_start (hashtable_t *ht, void *key) {
+ hti_t *hti = ht->hti;
+ int rcount;
+ do {
+ while (((volatile hti_t *)hti)->next != NULL) {
+ do { } while (hti_help_copy(hti) != TRUE);
+ hti = hti->next;
+ }
+
+ int old = hti->references;
+ do {
+ rcount = old;
+ if (rcount != -1) {
+ old = SYNC_CAS(&hti->references, rcount, rcount + 1);
+ }
+ } while (rcount != old);
+ } while (rcount == -1);
+
+ ht_iter_t *iter = nbd_malloc(sizeof(ht_iter_t));
+ iter->hti = hti;
+ iter->idx = -1;
+
+ return iter;
+}
+
+ht_iter_t *ht_iter_next (ht_iter_t *iter) {
+ volatile entry_t *ent;
+ uint64_t key;
+ uint64_t val;
+ uint64_t table_size = (1 << iter->hti->scale);
+ do {
+ if (++iter->idx == table_size) {
+ ht_iter_free(iter);
+ return NULL;
+ }
+ ent = &iter->hti->table[++iter->idx];
+ key = ent->key;
+ val = ent->val;
+
+ } while (key == DOES_NOT_EXIST || val == DOES_NOT_EXIST || val == TOMBSTONE);
+
+ iter->key = key;
+ if (val == COPIED_VALUE) {
+ uint32_t hash = (iter->hti->ht->key_type == NULL)
+ ? murmur32_8b(key)
+ : iter->hti->ht->key_type->hash((void *)key);
+ iter->val = hti_get(iter->hti->next, (void *)ent->key, hash);
+ } else {
+ iter->val = val;
+ }
+
+ return iter;
+}
+
+uint64_t ht_iter_val (ht_iter_t *iter) {
+ return iter->val;
+}
+
+uint64_t ht_iter_key (ht_iter_t *iter) {
+ return iter->key;
+}
+
+void ht_iter_free (ht_iter_t *iter) {
+ SYNC_ADD(&iter->hti->references, -1);
+}
+