2 * Written by Josh Dybnis and released to the public domain, as explained at
3 * http://creativecommons.org/licenses/publicdomain
5 * safe memory reclamation using a simple technique from rcu
7 * WARNING: not robust enough for real-world use
11 #include "runtime_local.h"
16 #define RCU_POST_THRESHOLD 10
17 #define RCU_QUEUE_SCALE 20
26 static uint64_t rcu_[MAX_NUM_THREADS][MAX_NUM_THREADS] = {};
27 static uint64_t rcu_last_posted_[MAX_NUM_THREADS][MAX_NUM_THREADS] = {};
28 static fifo_t *pending_[MAX_NUM_THREADS] = {};
29 static int num_threads_ = 0;
31 static fifo_t *fifo_alloc(int scale) {
32 fifo_t *q = (fifo_t *)nbd_malloc(sizeof(fifo_t) + (1 << scale) * sizeof(void *));
33 memset(q, 0, sizeof(fifo_t));
40 static uint32_t fifo_index (fifo_t *q, uint32_t i) {
41 return i & MASK(q->scale);
44 static void fifo_enqueue (fifo_t *q, void *x) {
45 assert(fifo_index(q, q->head + 1) != fifo_index(q, q->tail));
46 uint32_t i = fifo_index(q, q->head++);
50 static void *fifo_dequeue (fifo_t *q) {
51 uint32_t i = fifo_index(q, q->tail++);
55 void rcu_thread_init (int id) {
56 assert(id < MAX_NUM_THREADS);
57 if (pending_[id] == NULL) {
58 pending_[id] = fifo_alloc(RCU_QUEUE_SCALE);
59 SYNC_ADD(&num_threads_, 1);
63 static void rcu_post (uint64_t x) {
64 LOCALIZE_THREAD_LOCAL(tid_, int);
65 if (x - rcu_last_posted_[tid_][tid_] < RCU_POST_THRESHOLD)
68 int next_thread_id = (tid_ + 1) % num_threads_;
70 TRACE("r0", "rcu_post: %llu", x, 0);
71 rcu_[next_thread_id][tid_] = rcu_last_posted_[tid_][tid_] = x;
74 void rcu_update (void) {
75 LOCALIZE_THREAD_LOCAL(tid_, int);
76 assert(tid_ < num_threads_);
77 int next_thread_id = (tid_ + 1) % num_threads_;
79 for (i = 0; i < num_threads_; ++i) {
83 // No need to post an update if the value hasn't changed
84 if (rcu_[tid_][i] == rcu_last_posted_[tid_][i])
87 uint64_t x = rcu_[tid_][i];
88 rcu_[next_thread_id][i] = rcu_last_posted_[tid_][i] = x;
92 while (pending_[tid_]->tail != rcu_[tid_][tid_]) {
93 nbd_free(fifo_dequeue(pending_[tid_]));
97 void nbd_defer_free (void *x) {
98 LOCALIZE_THREAD_LOCAL(tid_, int);
99 fifo_enqueue(pending_[tid_], x);
100 TRACE("r0", "nbd_defer_free: put %p on queue at position %llu", x, pending_[tid_]->head);
101 rcu_post(pending_[tid_]->head);
109 #define NUM_ITERATIONS 10000000
111 typedef struct node {
115 typedef struct lifo {
119 static volatile int wait_;
122 static lifo_t *lifo_alloc (void) {
123 lifo_t *stk = (lifo_t *)nbd_malloc(sizeof(lifo_t));
124 memset(stk, 0, sizeof(lifo_t));
128 static void lifo_aba_push (lifo_t *stk, node_t *x) {
131 head = ((volatile lifo_t *)stk)->head;
132 ((volatile node_t *)x)->next = head;
133 } while (__sync_val_compare_and_swap(&stk->head, head, x) != head);
136 node_t *lifo_aba_pop (lifo_t *stk) {
139 head = ((volatile lifo_t *)stk)->head;
142 } while (__sync_val_compare_and_swap(&stk->head, head, head->next) != head);
147 node_t *node_alloc (void) {
148 node_t *node = (node_t *)nbd_malloc(sizeof(node_t));
149 memset(node, 0, sizeof(node_t));
153 void *worker (void *arg) {
154 int id = (int)(size_t)arg;
155 unsigned int rand_seed = (unsigned int)id + 1;
157 // Wait for all the worker threads to be ready.
158 __sync_fetch_and_add(&wait_, -1);
162 for (i = 0; i < NUM_ITERATIONS; ++ i) {
163 int n = rand_r(&rand_seed);
165 lifo_aba_push(stk_, node_alloc());
167 node_t *x = lifo_aba_pop(stk_);
178 int main (int argc, char **argv) {
180 //lwt_set_trace_level("m0r0");
186 num_threads = strtol(argv[1], NULL, 10);
188 fprintf(stderr, "%s: Invalid argument for number of threads\n", argv[0]);
191 if (num_threads <= 0) {
192 fprintf(stderr, "%s: Number of threads must be at least 1\n", argv[0]);
200 pthread_t thread[num_threads];
201 for (int i = 0; i < num_threads; ++i) {
202 int rc = nbd_thread_create(thread + i, i, worker, (void *)(size_t)i);
203 if (rc != 0) { perror("pthread_create"); return rc; }
205 for (int i = 0; i < num_threads; ++i) {
206 pthread_join(thread[i], NULL);