]> pd.if.org Git - liblfds/blob - liblfds/liblfds7.1.0/test_and_benchmark/libbenchmark/src/libbenchmark_benchmarks_freelist_push1_then_pop1/libbenchmark_benchmarks_freelist_liblfds710_lockfree_push1_then_pop1.c
Initial import (all versions, including the new 7.1.0)
[liblfds] / liblfds / liblfds7.1.0 / test_and_benchmark / libbenchmark / src / libbenchmark_benchmarks_freelist_push1_then_pop1 / libbenchmark_benchmarks_freelist_liblfds710_lockfree_push1_then_pop1.c
1 /***** includes *****/
2 #include "libbenchmark_benchmarks_freelist_internal.h"
3
4 /***** structs *****/
5 struct libbenchmark_benchmark_freelist_liblfds710_lockfree_push1_pop1_per_thread_benchmark_state
6 {
7   lfds710_pal_uint_t
8     operation_count;
9
10   struct lfds710_prng_st_state
11     psts;
12 };
13
14 struct libbenchmark_benchmark_freelist_liblfds710_lockfree_push1_pop1_overall_benchmark_state
15 {
16   struct lfds710_freelist_state
17     *fs;
18 };
19
20
21
22
23
24 /****************************************************************************/
25 void libbenchmark_benchmark_freelist_liblfds710_lockfree_push1_pop1_init( struct libbenchmark_topology_state *ts,
26                                                                        struct lfds710_list_aso_state *logical_processor_set,
27                                                                        struct libshared_memory_state *ms,
28                                                                        enum libbenchmark_topology_numa_mode numa_mode,
29                                                                        struct libbenchmark_threadset_state *tsets )
30 {
31   enum flag
32     finished_flag = LOWERED;
33
34   lfds710_pal_uint_t
35     ea_size_in_freelist_elements,
36     *fe_array_count,
37     index = 0,
38     loop,
39     number_freelist_elements,
40     number_logical_processors,
41     number_logical_processors_in_numa_node,
42     number_numa_nodes,
43     number_freelist_element_pointers_per_atomic_isolation = LFDS710_PAL_ATOMIC_ISOLATION_IN_BYTES / sizeof(struct lfds710_freelist_element *),
44     largest_number_logical_processors_in_numa_node = 0,
45     random_value,
46     smallest_power_of_two_larger_than_or_equal_to_number_logical_processors = 2,
47     temp_number_logical_processors;
48
49   struct lfds710_freelist_element * volatile
50     (*ea)[LFDS710_FREELIST_ELIMINATION_ARRAY_ELEMENT_SIZE_IN_FREELIST_ELEMENTS];
51
52   struct lfds710_list_asu_element
53     *lasue = NULL,
54     *lasue_lp = NULL;
55
56   struct lfds710_prng_st_state
57     psts;
58
59   struct libbenchmark_benchmark_freelist_liblfds710_lockfree_push1_pop1_overall_benchmark_state
60     *obs;
61
62   struct libbenchmark_benchmark_freelist_liblfds710_lockfree_push1_pop1_per_thread_benchmark_state
63     *ptbs;
64
65   struct lfds710_freelist_element
66     *fe,
67     **fe_array_pointers;
68
69   struct lfds710_freelist_state
70     *fs = NULL;
71
72   struct libbenchmark_threadset_per_numa_state
73     *pns,
74     *largest_pns = NULL;
75
76   struct libbenchmark_threadset_per_thread_state
77     *pts;
78
79   struct libbenchmark_topology_node_state
80     *numa_node_for_lp;
81
82   LFDS710_PAL_ASSERT( ts != NULL );
83   LFDS710_PAL_ASSERT( logical_processor_set != NULL );
84   LFDS710_PAL_ASSERT( ms != NULL );
85   // TRD : numa_mode can be any value in its range
86   LFDS710_PAL_ASSERT( tsets != NULL );
87
88   lfds710_prng_st_init( &psts, LFDS710_PRNG_SEED );
89
90   obs = libshared_memory_alloc_from_most_free_space_node( ms, sizeof(struct libbenchmark_benchmark_freelist_liblfds710_lockfree_push1_pop1_overall_benchmark_state), LFDS710_PAL_ATOMIC_ISOLATION_IN_BYTES );
91
92   libbenchmark_threadset_init( tsets, ts, logical_processor_set, ms, libbenchmark_benchmark_freelist_liblfds710_lockfree_push1_pop1_thread, NULL );
93
94   switch( numa_mode )
95   {
96     case LIBBENCHMARK_TOPOLOGY_NUMA_MODE_SMP:
97       lfds710_list_aso_query( logical_processor_set, LFDS710_LIST_ASO_QUERY_GET_POTENTIALLY_INACCURATE_COUNT, NULL, (void *) &number_logical_processors );
98       temp_number_logical_processors = number_logical_processors >> 2;
99       while( temp_number_logical_processors != 0 )
100       {
101         temp_number_logical_processors >>= 1;
102         smallest_power_of_two_larger_than_or_equal_to_number_logical_processors <<= 1;
103       }
104       fs = libshared_memory_alloc_from_most_free_space_node( ms, sizeof(struct lfds710_freelist_state), LFDS710_PAL_ATOMIC_ISOLATION_IN_BYTES );
105       ea = libshared_memory_alloc_from_unknown_node( ms, sizeof(struct lfds710_freelist_element *) * LFDS710_FREELIST_ELIMINATION_ARRAY_ELEMENT_SIZE_IN_FREELIST_ELEMENTS * smallest_power_of_two_larger_than_or_equal_to_number_logical_processors, LFDS710_PAL_ATOMIC_ISOLATION_IN_BYTES );
106       lfds710_freelist_init_valid_on_current_logical_core( fs, ea, smallest_power_of_two_larger_than_or_equal_to_number_logical_processors, NULL );
107
108       // TRD : fill the elimination array and have one element per thread in the freelist proper
109       number_freelist_elements = (smallest_power_of_two_larger_than_or_equal_to_number_logical_processors * number_freelist_element_pointers_per_atomic_isolation) + number_logical_processors;
110       fe = libshared_memory_alloc_from_most_free_space_node( ms, sizeof(struct lfds710_freelist_element) * number_freelist_elements, LFDS710_PAL_ATOMIC_ISOLATION_IN_BYTES );
111       for( loop = 0 ; loop < number_freelist_elements ; loop++ )
112         lfds710_freelist_push( fs, &fe[loop], &psts );
113       while( LFDS710_LIST_ASU_GET_START_AND_THEN_NEXT(tsets->list_of_per_thread_states,lasue) )
114       {
115         pts = LFDS710_LIST_ASU_GET_VALUE_FROM_ELEMENT( *lasue );
116         ptbs = libshared_memory_alloc_from_most_free_space_node( ms, sizeof(struct libbenchmark_benchmark_freelist_liblfds710_lockfree_push1_pop1_per_thread_benchmark_state), LFDS710_PAL_ATOMIC_ISOLATION_IN_BYTES );
117         LFDS710_PRNG_ST_GENERATE( psts, random_value );
118         LFDS710_PRNG_ST_MIXING_FUNCTION( random_value );
119         lfds710_prng_st_init( &ptbs->psts, random_value );
120         pts->users_per_thread_state = ptbs;
121       }
122     break;
123
124     case LIBBENCHMARK_TOPOLOGY_NUMA_MODE_NUMA:
125       /* TRD : init the freelist from the NUMA node with most processors from the current set
126                or, if equal threads, with lowest NUMA
127                iterate over the NUMA node list
128                for each NUMA node, allocate one freelist element per thread on that node
129                and push those elements onto the freelist
130
131                the loop over the threads, and give each one the freelist state as it's user state
132       */
133
134       while( LFDS710_LIST_ASU_GET_START_AND_THEN_NEXT(tsets->list_of_per_numa_states,lasue) )
135       {
136         pns = LFDS710_LIST_ASU_GET_VALUE_FROM_ELEMENT( *lasue );
137
138         lasue_lp = NULL;
139         number_logical_processors_in_numa_node = 0;
140
141         while( LFDS710_LIST_ASU_GET_START_AND_THEN_NEXT(tsets->list_of_per_thread_states,lasue_lp) )
142         {
143           pts = LFDS710_LIST_ASU_GET_VALUE_FROM_ELEMENT( *lasue_lp );
144
145           libbenchmark_topology_query( ts, LIBBENCHMARK_TOPOLOGY_QUERY_GET_NUMA_NODE_FOR_LOGICAL_PROCESSOR, pts->tns_lp, &numa_node_for_lp );
146
147           if( LIBBENCHMARK_TOPOLOGY_NODE_GET_NUMA_ID(*numa_node_for_lp) == pns->numa_node_id )
148             number_logical_processors_in_numa_node++;
149         }
150
151         if( number_logical_processors_in_numa_node > largest_number_logical_processors_in_numa_node )
152           largest_pns = pns;
153       }
154
155       lfds710_list_aso_query( logical_processor_set, LFDS710_LIST_ASO_QUERY_GET_POTENTIALLY_INACCURATE_COUNT, NULL, (void *) &number_logical_processors );
156       temp_number_logical_processors = number_logical_processors >> 2;
157       while( temp_number_logical_processors != 0 )
158       {
159         temp_number_logical_processors >>= 1;
160         smallest_power_of_two_larger_than_or_equal_to_number_logical_processors <<= 1;
161       }
162
163       fs = libshared_memory_alloc_from_specific_node( ms, largest_pns->numa_node_id, sizeof(struct lfds710_freelist_state), LFDS710_PAL_ATOMIC_ISOLATION_IN_BYTES );
164       ea = libshared_memory_alloc_from_specific_node( ms, largest_pns->numa_node_id, sizeof(struct lfds710_freelist_element *) * LFDS710_FREELIST_ELIMINATION_ARRAY_ELEMENT_SIZE_IN_FREELIST_ELEMENTS * smallest_power_of_two_larger_than_or_equal_to_number_logical_processors, LFDS710_PAL_ATOMIC_ISOLATION_IN_BYTES );
165       lfds710_freelist_init_valid_on_current_logical_core( fs, ea, smallest_power_of_two_larger_than_or_equal_to_number_logical_processors, NULL );
166
167       /* TRD : now figure out how many elements are needed from each NUMA node
168                allocate them all
169                them push them interleaved, round-robin, to the freelist
170       */
171
172       libbenchmark_topology_query( ts, LIBBENCHMARK_TOPOLOGY_QUERY_GET_NUMBER_OF_NODE_TYPE, (void *) (lfds710_pal_uint_t) LIBBENCHMARK_TOPOLOGY_NODE_TYPE_NUMA, (void *) &number_numa_nodes );
173
174       fe_array_pointers = libshared_memory_alloc_from_most_free_space_node( ms, sizeof(struct lfds710_freelist_element *) * number_numa_nodes, LFDS710_PAL_ATOMIC_ISOLATION_IN_BYTES );
175       fe_array_count = libshared_memory_alloc_from_most_free_space_node( ms, sizeof(lfds710_pal_uint_t) * number_numa_nodes, LFDS710_PAL_ATOMIC_ISOLATION_IN_BYTES );
176       for( loop = 0 ; loop < number_numa_nodes ; loop++ )
177         fe_array_count[loop] = 0;
178
179       // TRD : now query the freelist for the EL size
180       lfds710_freelist_query( fs, LFDS710_FREELIST_QUERY_GET_ELIMINATION_ARRAY_EXTRA_ELEMENTS_IN_FREELIST_ELEMENTS, NULL, (void *) &ea_size_in_freelist_elements );
181
182       // TRD : we need to divide that number of elements over the NUMA nodes in proportion to their number of LPs...
183
184       lasue = NULL;
185
186       while( LFDS710_LIST_ASU_GET_START_AND_THEN_NEXT(tsets->list_of_per_numa_states,lasue) )
187       {
188         pns = LFDS710_LIST_ASU_GET_VALUE_FROM_ELEMENT( *lasue );
189
190         /* TRD : for each NUMA node, figure out how many LPs in the current set are in that NUMA node
191                  and allocate then the correct number of elements from this NUMA node (1 per LP, plus for the node the correct proportion of the EA layer)
192         */
193
194         lasue_lp = NULL;
195         number_logical_processors_in_numa_node = 0;
196
197         while( LFDS710_LIST_ASU_GET_START_AND_THEN_NEXT(tsets->list_of_per_thread_states,lasue_lp) )
198         {
199           pts = LFDS710_LIST_ASU_GET_VALUE_FROM_ELEMENT( *lasue_lp );
200
201           libbenchmark_topology_query( ts, LIBBENCHMARK_TOPOLOGY_QUERY_GET_NUMA_NODE_FOR_LOGICAL_PROCESSOR, pts->tns_lp, &numa_node_for_lp );
202
203           if( LIBBENCHMARK_TOPOLOGY_NODE_GET_NUMA_ID(*numa_node_for_lp) == pns->numa_node_id )
204             number_logical_processors_in_numa_node++;
205         }
206
207         // TRD : blind +1 to deal with rounding, it will skew results but only slightly
208         fe_array_count[index] = number_logical_processors_in_numa_node + (ea_size_in_freelist_elements * number_logical_processors_in_numa_node) / number_logical_processors + 1;
209         fe_array_pointers[index] = libshared_memory_alloc_from_specific_node( ms, pns->numa_node_id, sizeof(struct lfds710_freelist_element) * fe_array_count[index], LFDS710_PAL_ATOMIC_ISOLATION_IN_BYTES );
210         index++;
211       }
212
213       while( finished_flag == LOWERED )
214       {
215         for( loop = 0 ; loop < index ; loop++ )
216           if( fe_array_count[loop] > 0 )
217             lfds710_freelist_push( fs, &fe_array_pointers[loop][ fe_array_count[loop]-- ], &psts );
218
219         finished_flag = RAISED;
220
221         for( loop = 0 ; loop < index ; loop++ )
222           if( fe_array_count[loop] > 0 )
223             finished_flag = LOWERED;
224       }
225
226       lasue = NULL;
227
228       while( LFDS710_LIST_ASU_GET_START_AND_THEN_NEXT(tsets->list_of_per_thread_states,lasue) )
229       {
230         pts = LFDS710_LIST_ASU_GET_VALUE_FROM_ELEMENT( *lasue );
231         ptbs = libshared_memory_alloc_from_specific_node( ms, largest_pns->numa_node_id, sizeof(struct libbenchmark_benchmark_freelist_liblfds710_lockfree_push1_pop1_per_thread_benchmark_state), LFDS710_PAL_ATOMIC_ISOLATION_IN_BYTES );
232         LFDS710_PRNG_ST_GENERATE( psts, random_value );
233         LFDS710_PRNG_ST_MIXING_FUNCTION( random_value );
234         lfds710_prng_st_init( &ptbs->psts, random_value );
235         pts->users_per_thread_state = ptbs;
236       }
237     break;
238
239     case LIBBENCHMARK_TOPOLOGY_NUMA_MODE_NUMA_BUT_NOT_USED:
240       /* TRD : freelist state in the NUMA node with most threads from the current set
241                or, if equal threads, with lowest NUMA
242                all elements alloced from that node as well
243
244                SO much easier to figure out allocs than with NUMA OMG
245                all of this code needs rewriting
246                and the NUMA-but-not-used stuff is interesting but I don't think it carries its own weight
247       */
248
249       while( LFDS710_LIST_ASU_GET_START_AND_THEN_NEXT(tsets->list_of_per_numa_states,lasue) )
250       {
251         pns = LFDS710_LIST_ASU_GET_VALUE_FROM_ELEMENT( *lasue );
252
253         lasue_lp = NULL;
254         number_logical_processors_in_numa_node = 0;
255
256         while( LFDS710_LIST_ASU_GET_START_AND_THEN_NEXT(tsets->list_of_per_thread_states,lasue_lp) )
257         {
258           pts = LFDS710_LIST_ASU_GET_VALUE_FROM_ELEMENT( *lasue_lp );
259
260           libbenchmark_topology_query( ts, LIBBENCHMARK_TOPOLOGY_QUERY_GET_NUMA_NODE_FOR_LOGICAL_PROCESSOR, pts->tns_lp, &numa_node_for_lp );
261
262           if( LIBBENCHMARK_TOPOLOGY_NODE_GET_NUMA_ID(*numa_node_for_lp) == pns->numa_node_id )
263             number_logical_processors_in_numa_node++;
264         }
265
266         if( number_logical_processors_in_numa_node > largest_number_logical_processors_in_numa_node )
267           largest_pns = pns;
268       }
269
270       lfds710_list_aso_query( logical_processor_set, LFDS710_LIST_ASO_QUERY_GET_POTENTIALLY_INACCURATE_COUNT, NULL, (void *) &number_logical_processors );
271       temp_number_logical_processors = number_logical_processors >> 2;
272       while( temp_number_logical_processors != 0 )
273       {
274         temp_number_logical_processors >>= 1;
275         smallest_power_of_two_larger_than_or_equal_to_number_logical_processors <<= 1;
276       }
277
278       fs = libshared_memory_alloc_from_specific_node( ms, largest_pns->numa_node_id, sizeof(struct lfds710_freelist_state), LFDS710_PAL_ATOMIC_ISOLATION_IN_BYTES );
279       ea = libshared_memory_alloc_from_specific_node( ms, largest_pns->numa_node_id, sizeof(struct lfds710_freelist_element *) * LFDS710_FREELIST_ELIMINATION_ARRAY_ELEMENT_SIZE_IN_FREELIST_ELEMENTS * smallest_power_of_two_larger_than_or_equal_to_number_logical_processors, LFDS710_PAL_ATOMIC_ISOLATION_IN_BYTES );
280       lfds710_freelist_init_valid_on_current_logical_core( fs, ea, smallest_power_of_two_larger_than_or_equal_to_number_logical_processors, NULL );
281
282       // TRD : fill the elimination array and have one element per thread in the freelist proper
283       number_freelist_elements = (smallest_power_of_two_larger_than_or_equal_to_number_logical_processors * number_freelist_element_pointers_per_atomic_isolation) + number_logical_processors;
284       fe = libshared_memory_alloc_from_specific_node( ms, largest_pns->numa_node_id, sizeof(struct lfds710_freelist_element) * number_freelist_elements, LFDS710_PAL_ATOMIC_ISOLATION_IN_BYTES );
285       for( loop = 0 ; loop < number_freelist_elements ; loop++ )
286         lfds710_freelist_push( fs, &fe[loop], &psts );
287
288       lasue = NULL;
289
290       while( LFDS710_LIST_ASU_GET_START_AND_THEN_NEXT(tsets->list_of_per_thread_states,lasue) )
291       {
292         pts = LFDS710_LIST_ASU_GET_VALUE_FROM_ELEMENT( *lasue );
293         ptbs = libshared_memory_alloc_from_specific_node( ms, largest_pns->numa_node_id, sizeof(struct libbenchmark_benchmark_freelist_liblfds710_lockfree_push1_pop1_per_thread_benchmark_state), LFDS710_PAL_ATOMIC_ISOLATION_IN_BYTES );
294         LFDS710_PRNG_ST_GENERATE( psts, random_value );
295         LFDS710_PRNG_ST_MIXING_FUNCTION( random_value );
296         lfds710_prng_st_init( &ptbs->psts, random_value );
297         pts->users_per_thread_state = ptbs;
298       }
299     break;
300   }
301
302   obs->fs = fs;
303   tsets->users_threadset_state = obs;
304
305   return;
306 }
307
308
309
310
311
312 /****************************************************************************/
313 libshared_pal_thread_return_t LIBSHARED_PAL_THREAD_CALLING_CONVENTION libbenchmark_benchmark_freelist_liblfds710_lockfree_push1_pop1_thread( void *libbenchmark_threadset_per_thread_state )
314 {
315   int long long unsigned
316     current_time = 0,
317     end_time,
318     time_units_per_second;
319
320   lfds710_pal_uint_t
321     operation_count = 0,
322     time_loop = 0;
323
324   struct lfds710_freelist_state
325     *fs;
326
327   struct lfds710_freelist_element
328     *fe;
329
330   struct libbenchmark_benchmark_freelist_liblfds710_lockfree_push1_pop1_overall_benchmark_state
331     *obs;
332
333   struct libbenchmark_benchmark_freelist_liblfds710_lockfree_push1_pop1_per_thread_benchmark_state
334     *ptbs;
335
336   struct libbenchmark_threadset_per_thread_state
337     *pts;
338
339   LFDS710_MISC_BARRIER_LOAD;
340
341   LFDS710_PAL_ASSERT( libbenchmark_threadset_per_thread_state != NULL );
342
343   pts = (struct libbenchmark_threadset_per_thread_state *) libbenchmark_threadset_per_thread_state;
344
345   ptbs = LIBBENCHMARK_THREADSET_PER_THREAD_STATE_GET_USERS_PER_THREAD_STATE( *pts );
346   obs = LIBBENCHMARK_THREADSET_PER_THREAD_STATE_GET_USERS_OVERALL_STATE( *pts );
347   fs = obs->fs;
348
349   LIBBENCHMARK_PAL_TIME_UNITS_PER_SECOND( &time_units_per_second );
350
351   libbenchmark_threadset_thread_ready_and_wait( pts );
352
353   LIBBENCHMARK_PAL_GET_HIGHRES_TIME( &current_time );
354
355   end_time = current_time + time_units_per_second * libbenchmark_globals_benchmark_duration_in_seconds;
356
357   while( current_time < end_time )
358   {
359     lfds710_freelist_pop( fs, &fe, &ptbs->psts );
360     lfds710_freelist_push( fs, fe, &ptbs->psts );
361     operation_count++;
362
363     if( time_loop++ == TIME_LOOP_COUNT )
364     {
365       time_loop = 0;
366       LIBBENCHMARK_PAL_GET_HIGHRES_TIME( &current_time );
367     }
368   }
369
370   ptbs->operation_count = operation_count;
371
372   LFDS710_MISC_BARRIER_STORE;
373
374   lfds710_misc_force_store();
375
376   return LIBSHARED_PAL_THREAD_RETURN_CAST(RETURN_SUCCESS);
377 }
378
379
380
381
382
383 /****************************************************************************/
384 void libbenchmark_benchmark_freelist_liblfds710_lockfree_push1_pop1_cleanup( struct lfds710_list_aso_state *logical_processor_set,
385                                                                           enum libbenchmark_topology_numa_mode numa_mode,
386                                                                           struct libbenchmark_results_state *rs,
387                                                                           struct libbenchmark_threadset_state *tsets )
388 {
389   struct lfds710_list_asu_element
390     *lasue = NULL;
391
392   struct libbenchmark_benchmark_freelist_liblfds710_lockfree_push1_pop1_overall_benchmark_state
393     *obs;
394
395   struct libbenchmark_benchmark_freelist_liblfds710_lockfree_push1_pop1_per_thread_benchmark_state
396     *ptbs;
397
398   struct libbenchmark_threadset_per_thread_state
399     *pts;
400
401   LFDS710_PAL_ASSERT( logical_processor_set != NULL );
402   // TRD : numa_mode can be any value in its range
403   LFDS710_PAL_ASSERT( rs != NULL );
404   LFDS710_PAL_ASSERT( tsets != NULL );
405
406   while( LFDS710_LIST_ASU_GET_START_AND_THEN_NEXT(tsets->list_of_per_thread_states,lasue) )
407   {
408     pts = LFDS710_LIST_ASU_GET_VALUE_FROM_ELEMENT( *lasue );
409
410     ptbs = LIBBENCHMARK_THREADSET_PER_THREAD_STATE_GET_USERS_PER_THREAD_STATE( *pts );
411
412     libbenchmark_results_put_result( rs,
413                                      LIBBENCHMARK_DATASTRUCTURE_ID_FREELIST,
414                                      LIBBENCHMARK_BENCHMARK_ID_PUSH1_THEN_POP1,
415                                      LIBBENCHMARK_LOCK_ID_LIBLFDS710_LOCKFREE,
416                                      numa_mode,
417                                      logical_processor_set,
418                                      LIBBENCHMARK_TOPOLOGY_NODE_GET_LOGICAL_PROCESSOR_NUMBER( *pts->tns_lp ),
419                                      LIBBENCHMARK_TOPOLOGY_NODE_GET_WINDOWS_GROUP_NUMBER( *pts->tns_lp ),
420                                      ptbs->operation_count );
421   }
422
423   obs = tsets->users_threadset_state;
424
425   lfds710_freelist_cleanup( obs->fs, NULL );
426
427   return;
428 }
429
430