2 #include "libbenchmark_benchmarks_queue_umm_internal.h"
5 struct libbenchmark_benchmark_queue_umm_liblfds700_lockfree_enqueue1_dequeue1_per_thread_benchmark_state
11 struct libbenchmark_benchmark_queue_umm_liblfds700_lockfree_enqueue1_dequeue1_overall_benchmark_state
13 struct lfds700_queue_state
21 /****************************************************************************/
22 void libbenchmark_benchmark_queue_umm_liblfds700_lockfree_enqueue1_dequeue1_init( struct libbenchmark_topology_state *ts,
23 struct lfds710_list_aso_state *logical_processor_set,
24 struct libshared_memory_state *ms,
25 enum libbenchmark_topology_numa_mode numa_mode,
26 struct libbenchmark_threadset_state *tsets )
28 struct lfds700_misc_prng_state
33 number_logical_processors,
34 number_logical_processors_in_numa_node,
35 largest_number_logical_processors_in_numa_node = 0;
37 struct lfds710_list_asu_element
41 struct libbenchmark_benchmark_queue_umm_liblfds700_lockfree_enqueue1_dequeue1_overall_benchmark_state
44 struct libbenchmark_benchmark_queue_umm_liblfds700_lockfree_enqueue1_dequeue1_per_thread_benchmark_state
47 struct lfds700_queue_element
50 struct lfds700_queue_state
53 struct libbenchmark_threadset_per_numa_state
57 struct libbenchmark_threadset_per_thread_state
60 struct libbenchmark_topology_node_state
63 LFDS710_PAL_ASSERT( ts != NULL );
64 LFDS710_PAL_ASSERT( logical_processor_set != NULL );
65 LFDS710_PAL_ASSERT( ms != NULL );
66 // TRD : numa_mode can be any value in its range
67 LFDS710_PAL_ASSERT( tsets != NULL );
69 lfds700_misc_prng_init( &ps );
71 obs = libshared_memory_alloc_from_most_free_space_node( ms, sizeof(struct libbenchmark_benchmark_queue_umm_liblfds700_lockfree_enqueue1_dequeue1_overall_benchmark_state), LFDS710_PAL_ATOMIC_ISOLATION_IN_BYTES );
73 libbenchmark_threadset_init( tsets, ts, logical_processor_set, ms, libbenchmark_benchmark_queue_umm_liblfds700_lockfree_enqueue1_dequeue1_thread, NULL );
77 case LIBBENCHMARK_TOPOLOGY_NUMA_MODE_SMP:
78 qs = libshared_memory_alloc_from_most_free_space_node( ms, sizeof(struct lfds700_queue_state), LFDS710_PAL_ATOMIC_ISOLATION_IN_BYTES );
79 lfds710_list_aso_query( logical_processor_set, LFDS710_LIST_ASO_QUERY_GET_POTENTIALLY_INACCURATE_COUNT, NULL, (void *) &number_logical_processors );
80 qe = libshared_memory_alloc_from_most_free_space_node( ms, sizeof(struct lfds700_queue_element) * (number_logical_processors+1), LFDS710_PAL_ATOMIC_ISOLATION_IN_BYTES );
81 lfds700_queue_init_valid_on_current_logical_core( qs, &qe[0], &ps, NULL );
82 for( loop = 1 ; loop < (number_logical_processors+1) ; loop++ )
83 lfds700_queue_enqueue( qs, &qe[loop], &ps );
84 // TRD : now the per-thread states
85 while( LFDS710_LIST_ASU_GET_START_AND_THEN_NEXT(tsets->list_of_per_thread_states,lasue) )
87 pts = LFDS710_LIST_ASU_GET_VALUE_FROM_ELEMENT( *lasue );
88 ptbs = libshared_memory_alloc_from_most_free_space_node( ms, sizeof(struct libbenchmark_benchmark_queue_umm_liblfds700_lockfree_enqueue1_dequeue1_per_thread_benchmark_state), LFDS710_PAL_ATOMIC_ISOLATION_IN_BYTES );
89 pts->users_per_thread_state = ptbs;
93 case LIBBENCHMARK_TOPOLOGY_NUMA_MODE_NUMA:
94 // TRD : get the NUMA node for the queue_umm state
95 while( LFDS710_LIST_ASU_GET_START_AND_THEN_NEXT(tsets->list_of_per_numa_states,lasue) )
97 pns = LFDS710_LIST_ASU_GET_VALUE_FROM_ELEMENT( *lasue );
100 number_logical_processors_in_numa_node = 0;
102 while( LFDS710_LIST_ASU_GET_START_AND_THEN_NEXT(tsets->list_of_per_thread_states,lasue_lp) )
104 pts = LFDS710_LIST_ASU_GET_VALUE_FROM_ELEMENT( *lasue_lp );
106 libbenchmark_topology_query( ts, LIBBENCHMARK_TOPOLOGY_QUERY_GET_NUMA_NODE_FOR_LOGICAL_PROCESSOR, pts->tns_lp, &numa_node_for_lp );
108 if( LIBBENCHMARK_TOPOLOGY_NODE_GET_NUMA_ID(*numa_node_for_lp) == pns->numa_node_id )
109 number_logical_processors_in_numa_node++;
112 if( number_logical_processors_in_numa_node > largest_number_logical_processors_in_numa_node )
116 qs = libshared_memory_alloc_from_specific_node( ms, largest_pns->numa_node_id, sizeof(struct lfds700_queue_state), LFDS710_PAL_ATOMIC_ISOLATION_IN_BYTES );
117 qe = libshared_memory_alloc_from_specific_node( ms, largest_pns->numa_node_id, sizeof(struct lfds700_queue_element), LFDS710_PAL_ATOMIC_ISOLATION_IN_BYTES );
118 lfds700_queue_init_valid_on_current_logical_core( qs, qe, &ps, NULL );
120 /* TRD : for each NUMA node, alloc one element per thread in that NUMA node (from the current thread set)
121 the dummy element comes from the same node as the queue_umm state and has already been done
126 while( LFDS710_LIST_ASU_GET_START_AND_THEN_NEXT(tsets->list_of_per_numa_states,lasue) )
128 pns = LFDS710_LIST_ASU_GET_VALUE_FROM_ELEMENT( *lasue );
130 /* TRD : for each NUMA node, figure out how many LPs in the current set are in that NUMA node
131 and allocate then the correct number of elements from this NUMA node (1 per LP)
135 number_logical_processors_in_numa_node = 0;
137 while( LFDS710_LIST_ASU_GET_START_AND_THEN_NEXT(tsets->list_of_per_thread_states,lasue_lp) )
139 pts = LFDS710_LIST_ASU_GET_VALUE_FROM_ELEMENT( *lasue_lp );
141 libbenchmark_topology_query( ts, LIBBENCHMARK_TOPOLOGY_QUERY_GET_NUMA_NODE_FOR_LOGICAL_PROCESSOR, pts->tns_lp, &numa_node_for_lp );
143 if( LIBBENCHMARK_TOPOLOGY_NODE_GET_NUMA_ID(*numa_node_for_lp) == pns->numa_node_id )
144 number_logical_processors_in_numa_node++;
147 qe = libshared_memory_alloc_from_specific_node( ms, pns->numa_node_id, sizeof(struct lfds700_queue_element) * number_logical_processors_in_numa_node, LFDS710_PAL_ATOMIC_ISOLATION_IN_BYTES );
148 for( loop = 0 ; loop < number_logical_processors_in_numa_node ; loop++ )
149 lfds700_queue_enqueue( qs, &qe[loop], &ps );
152 // TRD : now the per-thread states
156 while( LFDS710_LIST_ASU_GET_START_AND_THEN_NEXT(tsets->list_of_per_thread_states,lasue) )
158 pts = LFDS710_LIST_ASU_GET_VALUE_FROM_ELEMENT( *lasue );
159 ptbs = libshared_memory_alloc_from_specific_node( ms, largest_pns->numa_node_id, sizeof(struct libbenchmark_benchmark_queue_umm_liblfds700_lockfree_enqueue1_dequeue1_per_thread_benchmark_state), LFDS710_PAL_ATOMIC_ISOLATION_IN_BYTES );
160 pts->users_per_thread_state = ptbs;
164 case LIBBENCHMARK_TOPOLOGY_NUMA_MODE_NUMA_BUT_NOT_USED:
165 // TRD : get the NUMA node for the queue_umm state
166 while( LFDS710_LIST_ASU_GET_START_AND_THEN_NEXT(tsets->list_of_per_numa_states,lasue) )
168 pns = LFDS710_LIST_ASU_GET_VALUE_FROM_ELEMENT( *lasue );
171 number_logical_processors_in_numa_node = 0;
173 while( LFDS710_LIST_ASU_GET_START_AND_THEN_NEXT(tsets->list_of_per_thread_states,lasue_lp) )
175 pts = LFDS710_LIST_ASU_GET_VALUE_FROM_ELEMENT( *lasue_lp );
177 libbenchmark_topology_query( ts, LIBBENCHMARK_TOPOLOGY_QUERY_GET_NUMA_NODE_FOR_LOGICAL_PROCESSOR, pts->tns_lp, &numa_node_for_lp );
179 if( LIBBENCHMARK_TOPOLOGY_NODE_GET_NUMA_ID(*numa_node_for_lp) == pns->numa_node_id )
180 number_logical_processors_in_numa_node++;
183 if( number_logical_processors_in_numa_node > largest_number_logical_processors_in_numa_node )
187 qs = libshared_memory_alloc_from_specific_node( ms, largest_pns->numa_node_id, sizeof(struct lfds700_queue_state), LFDS710_PAL_ATOMIC_ISOLATION_IN_BYTES );
188 qe = libshared_memory_alloc_from_specific_node( ms, largest_pns->numa_node_id, sizeof(struct lfds700_queue_element), LFDS710_PAL_ATOMIC_ISOLATION_IN_BYTES );
189 lfds700_queue_init_valid_on_current_logical_core( qs, qe, &ps, NULL );
191 /* TRD : for each NUMA node, alloc one element per thread in that NUMA node (from the current thread set)
192 the dummy element comes from the same node as the queue_umm state and has already been done
197 while( LFDS710_LIST_ASU_GET_START_AND_THEN_NEXT(tsets->list_of_per_numa_states,lasue) )
199 pns = LFDS710_LIST_ASU_GET_VALUE_FROM_ELEMENT( *lasue );
201 /* TRD : for each NUMA node, figure out how many LPs in the current set are in that NUMA node
202 and allocate then the correct number of elements from this NUMA node (1 per LP)
206 number_logical_processors_in_numa_node = 0;
208 while( LFDS710_LIST_ASU_GET_START_AND_THEN_NEXT(tsets->list_of_per_thread_states,lasue_lp) )
210 pts = LFDS710_LIST_ASU_GET_VALUE_FROM_ELEMENT( *lasue_lp );
212 libbenchmark_topology_query( ts, LIBBENCHMARK_TOPOLOGY_QUERY_GET_NUMA_NODE_FOR_LOGICAL_PROCESSOR, pts->tns_lp, &numa_node_for_lp );
214 if( LIBBENCHMARK_TOPOLOGY_NODE_GET_NUMA_ID(*numa_node_for_lp) == pns->numa_node_id )
215 number_logical_processors_in_numa_node++;
218 // TRD : everything allocates from the queue_umm state NUMA node
219 qe = libshared_memory_alloc_from_specific_node( ms, largest_pns->numa_node_id, sizeof(struct lfds700_queue_element) * number_logical_processors_in_numa_node, LFDS710_PAL_ATOMIC_ISOLATION_IN_BYTES );
220 for( loop = 0 ; loop < number_logical_processors_in_numa_node ; loop++ )
221 lfds700_queue_enqueue( qs, &qe[loop], &ps );
224 // TRD : now the per-thread states
228 while( LFDS710_LIST_ASU_GET_START_AND_THEN_NEXT(tsets->list_of_per_thread_states,lasue) )
230 pts = LFDS710_LIST_ASU_GET_VALUE_FROM_ELEMENT( *lasue );
231 ptbs = libshared_memory_alloc_from_specific_node( ms, largest_pns->numa_node_id, sizeof(struct libbenchmark_benchmark_queue_umm_liblfds700_lockfree_enqueue1_dequeue1_per_thread_benchmark_state), LFDS710_PAL_ATOMIC_ISOLATION_IN_BYTES );
232 pts->users_per_thread_state = ptbs;
237 tsets->users_threadset_state = obs;
246 /****************************************************************************/
247 libshared_pal_thread_return_t LIBSHARED_PAL_THREAD_CALLING_CONVENTION libbenchmark_benchmark_queue_umm_liblfds700_lockfree_enqueue1_dequeue1_thread( void *libbenchmark_threadset_per_thread_state )
249 int long long unsigned
252 time_units_per_second;
254 struct lfds700_misc_prng_state
261 struct lfds700_queue_element
264 struct lfds700_queue_state
267 struct libbenchmark_benchmark_queue_umm_liblfds700_lockfree_enqueue1_dequeue1_overall_benchmark_state
270 struct libbenchmark_benchmark_queue_umm_liblfds700_lockfree_enqueue1_dequeue1_per_thread_benchmark_state
273 struct libbenchmark_threadset_per_thread_state
276 LFDS710_MISC_BARRIER_LOAD;
278 LFDS710_PAL_ASSERT( libbenchmark_threadset_per_thread_state != NULL );
280 pts = (struct libbenchmark_threadset_per_thread_state *) libbenchmark_threadset_per_thread_state;
282 ptbs = LIBBENCHMARK_THREADSET_PER_THREAD_STATE_GET_USERS_PER_THREAD_STATE( *pts );
283 obs = LIBBENCHMARK_THREADSET_PER_THREAD_STATE_GET_USERS_OVERALL_STATE( *pts );
286 lfds700_misc_prng_init( &ps );
288 LIBBENCHMARK_PAL_TIME_UNITS_PER_SECOND( &time_units_per_second );
290 libbenchmark_threadset_thread_ready_and_wait( pts );
292 LIBBENCHMARK_PAL_GET_HIGHRES_TIME( ¤t_time );
294 end_time = current_time + time_units_per_second * libbenchmark_globals_benchmark_duration_in_seconds;
296 while( current_time < end_time )
298 lfds700_queue_dequeue( qs, &qe, &ps );
299 lfds700_queue_enqueue( qs, qe, &ps );
302 if( time_loop++ == TIME_LOOP_COUNT )
305 LIBBENCHMARK_PAL_GET_HIGHRES_TIME( ¤t_time );
309 ptbs->operation_count = operation_count;
311 LFDS710_MISC_BARRIER_STORE;
313 lfds710_misc_force_store();
315 return LIBSHARED_PAL_THREAD_RETURN_CAST(RETURN_SUCCESS);
322 /****************************************************************************/
323 void libbenchmark_benchmark_queue_umm_liblfds700_lockfree_enqueue1_dequeue1_cleanup( struct lfds710_list_aso_state *logical_processor_set,
324 enum libbenchmark_topology_numa_mode numa_mode,
325 struct libbenchmark_results_state *rs,
326 struct libbenchmark_threadset_state *tsets )
328 struct lfds710_list_asu_element
331 struct libbenchmark_benchmark_queue_umm_liblfds700_lockfree_enqueue1_dequeue1_overall_benchmark_state
334 struct libbenchmark_benchmark_queue_umm_liblfds700_lockfree_enqueue1_dequeue1_per_thread_benchmark_state
337 struct libbenchmark_threadset_per_thread_state
340 LFDS710_PAL_ASSERT( logical_processor_set != NULL );
341 // TRD : numa_mode can be any value in its range
342 LFDS710_PAL_ASSERT( rs != NULL );
343 LFDS710_PAL_ASSERT( tsets != NULL );
345 while( LFDS710_LIST_ASU_GET_START_AND_THEN_NEXT(tsets->list_of_per_thread_states,lasue) )
347 pts = LFDS710_LIST_ASU_GET_VALUE_FROM_ELEMENT( *lasue );
349 ptbs = LIBBENCHMARK_THREADSET_PER_THREAD_STATE_GET_USERS_PER_THREAD_STATE( *pts );
351 libbenchmark_results_put_result( rs,
352 LIBBENCHMARK_DATASTRUCTURE_ID_QUEUE_UMM,
353 LIBBENCHMARK_BENCHMARK_ID_ENQUEUE_UMM1_THEN_DEQUEUE_UMM1,
354 LIBBENCHMARK_LOCK_ID_LIBLFDS700_LOCKFREE,
356 logical_processor_set,
357 LIBBENCHMARK_TOPOLOGY_NODE_GET_LOGICAL_PROCESSOR_NUMBER( *pts->tns_lp ),
358 LIBBENCHMARK_TOPOLOGY_NODE_GET_WINDOWS_GROUP_NUMBER( *pts->tns_lp ),
359 ptbs->operation_count );
362 obs = tsets->users_threadset_state;
364 lfds700_queue_cleanup( obs->qs, NULL );