2 #include "libbenchmark_benchmarks_queue_umm_internal.h"
5 struct libbenchmark_benchmark_queue_umm_liblfds710_lockfree_enqueue1_dequeue1_per_thread_benchmark_state
11 struct libbenchmark_benchmark_queue_umm_liblfds710_lockfree_enqueue1_dequeue1_overall_benchmark_state
13 struct lfds710_queue_umm_state
21 /****************************************************************************/
22 void libbenchmark_benchmark_queue_umm_liblfds710_lockfree_enqueue1_dequeue1_init( struct libbenchmark_topology_state *ts,
23 struct lfds710_list_aso_state *logical_processor_set,
24 struct libshared_memory_state *ms,
25 enum libbenchmark_topology_numa_mode numa_mode,
26 struct libbenchmark_threadset_state *tsets )
30 number_logical_processors,
31 number_logical_processors_in_numa_node,
32 largest_number_logical_processors_in_numa_node = 0;
34 struct lfds710_list_asu_element
38 struct libbenchmark_benchmark_queue_umm_liblfds710_lockfree_enqueue1_dequeue1_overall_benchmark_state
41 struct libbenchmark_benchmark_queue_umm_liblfds710_lockfree_enqueue1_dequeue1_per_thread_benchmark_state
44 struct lfds710_queue_umm_element
47 struct lfds710_queue_umm_state
50 struct libbenchmark_threadset_per_numa_state
54 struct libbenchmark_threadset_per_thread_state
57 struct libbenchmark_topology_node_state
60 LFDS710_PAL_ASSERT( ts != NULL );
61 LFDS710_PAL_ASSERT( logical_processor_set != NULL );
62 LFDS710_PAL_ASSERT( ms != NULL );
63 // TRD : numa_mode can be any value in its range
64 LFDS710_PAL_ASSERT( tsets != NULL );
66 obs = libshared_memory_alloc_from_most_free_space_node( ms, sizeof(struct libbenchmark_benchmark_queue_umm_liblfds710_lockfree_enqueue1_dequeue1_overall_benchmark_state), LFDS710_PAL_ATOMIC_ISOLATION_IN_BYTES );
68 libbenchmark_threadset_init( tsets, ts, logical_processor_set, ms, libbenchmark_benchmark_queue_umm_liblfds710_lockfree_enqueue1_dequeue1_thread, NULL );
72 case LIBBENCHMARK_TOPOLOGY_NUMA_MODE_SMP:
73 qs = libshared_memory_alloc_from_most_free_space_node( ms, sizeof(struct lfds710_queue_umm_state), LFDS710_PAL_ATOMIC_ISOLATION_IN_BYTES );
74 lfds710_list_aso_query( logical_processor_set, LFDS710_LIST_ASO_QUERY_GET_POTENTIALLY_INACCURATE_COUNT, NULL, (void *) &number_logical_processors );
75 qe = libshared_memory_alloc_from_most_free_space_node( ms, sizeof(struct lfds710_queue_umm_element) * (number_logical_processors+1), LFDS710_PAL_ATOMIC_ISOLATION_IN_BYTES );
76 lfds710_queue_umm_init_valid_on_current_logical_core( qs, &qe[0], NULL );
77 for( loop = 1 ; loop < (number_logical_processors+1) ; loop++ )
78 lfds710_queue_umm_enqueue( qs, &qe[loop] );
79 // TRD : now the per-thread states
80 while( LFDS710_LIST_ASU_GET_START_AND_THEN_NEXT(tsets->list_of_per_thread_states,lasue) )
82 pts = LFDS710_LIST_ASU_GET_VALUE_FROM_ELEMENT( *lasue );
83 ptbs = libshared_memory_alloc_from_most_free_space_node( ms, sizeof(struct libbenchmark_benchmark_queue_umm_liblfds710_lockfree_enqueue1_dequeue1_per_thread_benchmark_state), LFDS710_PAL_ATOMIC_ISOLATION_IN_BYTES );
84 pts->users_per_thread_state = ptbs;
88 case LIBBENCHMARK_TOPOLOGY_NUMA_MODE_NUMA:
89 // TRD : get the NUMA node for the queue_umm state
90 while( LFDS710_LIST_ASU_GET_START_AND_THEN_NEXT(tsets->list_of_per_numa_states,lasue) )
92 pns = LFDS710_LIST_ASU_GET_VALUE_FROM_ELEMENT( *lasue );
95 number_logical_processors_in_numa_node = 0;
97 while( LFDS710_LIST_ASU_GET_START_AND_THEN_NEXT(tsets->list_of_per_thread_states,lasue_lp) )
99 pts = LFDS710_LIST_ASU_GET_VALUE_FROM_ELEMENT( *lasue_lp );
101 libbenchmark_topology_query( ts, LIBBENCHMARK_TOPOLOGY_QUERY_GET_NUMA_NODE_FOR_LOGICAL_PROCESSOR, pts->tns_lp, &numa_node_for_lp );
103 if( LIBBENCHMARK_TOPOLOGY_NODE_GET_NUMA_ID(*numa_node_for_lp) == pns->numa_node_id )
104 number_logical_processors_in_numa_node++;
107 if( number_logical_processors_in_numa_node > largest_number_logical_processors_in_numa_node )
111 qs = libshared_memory_alloc_from_specific_node( ms, largest_pns->numa_node_id, sizeof(struct lfds710_queue_umm_state), LFDS710_PAL_ATOMIC_ISOLATION_IN_BYTES );
112 qe = libshared_memory_alloc_from_specific_node( ms, largest_pns->numa_node_id, sizeof(struct lfds710_queue_umm_element), LFDS710_PAL_ATOMIC_ISOLATION_IN_BYTES );
113 lfds710_queue_umm_init_valid_on_current_logical_core( qs, qe, NULL );
115 /* TRD : for each NUMA node, alloc one element per thread in that NUMA node (from the current thread set)
116 the dummy element comes from the same node as the queue_umm state and has already been done
121 while( LFDS710_LIST_ASU_GET_START_AND_THEN_NEXT(tsets->list_of_per_numa_states,lasue) )
123 pns = LFDS710_LIST_ASU_GET_VALUE_FROM_ELEMENT( *lasue );
125 /* TRD : for each NUMA node, figure out how many LPs in the current set are in that NUMA node
126 and allocate then the correct number of elements from this NUMA node (1 per LP)
130 number_logical_processors_in_numa_node = 0;
132 while( LFDS710_LIST_ASU_GET_START_AND_THEN_NEXT(tsets->list_of_per_thread_states,lasue_lp) )
134 pts = LFDS710_LIST_ASU_GET_VALUE_FROM_ELEMENT( *lasue_lp );
136 libbenchmark_topology_query( ts, LIBBENCHMARK_TOPOLOGY_QUERY_GET_NUMA_NODE_FOR_LOGICAL_PROCESSOR, pts->tns_lp, &numa_node_for_lp );
138 if( LIBBENCHMARK_TOPOLOGY_NODE_GET_NUMA_ID(*numa_node_for_lp) == pns->numa_node_id )
139 number_logical_processors_in_numa_node++;
142 qe = libshared_memory_alloc_from_specific_node( ms, pns->numa_node_id, sizeof(struct lfds710_queue_umm_element) * number_logical_processors_in_numa_node, LFDS710_PAL_ATOMIC_ISOLATION_IN_BYTES );
143 for( loop = 0 ; loop < number_logical_processors_in_numa_node ; loop++ )
144 lfds710_queue_umm_enqueue( qs, &qe[loop] );
147 // TRD : now the per-thread states
151 while( LFDS710_LIST_ASU_GET_START_AND_THEN_NEXT(tsets->list_of_per_thread_states,lasue) )
153 pts = LFDS710_LIST_ASU_GET_VALUE_FROM_ELEMENT( *lasue );
154 ptbs = libshared_memory_alloc_from_specific_node( ms, largest_pns->numa_node_id, sizeof(struct libbenchmark_benchmark_queue_umm_liblfds710_lockfree_enqueue1_dequeue1_per_thread_benchmark_state), LFDS710_PAL_ATOMIC_ISOLATION_IN_BYTES );
155 pts->users_per_thread_state = ptbs;
159 case LIBBENCHMARK_TOPOLOGY_NUMA_MODE_NUMA_BUT_NOT_USED:
160 // TRD : get the NUMA node for the queue_umm state
161 while( LFDS710_LIST_ASU_GET_START_AND_THEN_NEXT(tsets->list_of_per_numa_states,lasue) )
163 pns = LFDS710_LIST_ASU_GET_VALUE_FROM_ELEMENT( *lasue );
166 number_logical_processors_in_numa_node = 0;
168 while( LFDS710_LIST_ASU_GET_START_AND_THEN_NEXT(tsets->list_of_per_thread_states,lasue_lp) )
170 pts = LFDS710_LIST_ASU_GET_VALUE_FROM_ELEMENT( *lasue_lp );
172 libbenchmark_topology_query( ts, LIBBENCHMARK_TOPOLOGY_QUERY_GET_NUMA_NODE_FOR_LOGICAL_PROCESSOR, pts->tns_lp, &numa_node_for_lp );
174 if( LIBBENCHMARK_TOPOLOGY_NODE_GET_NUMA_ID(*numa_node_for_lp) == pns->numa_node_id )
175 number_logical_processors_in_numa_node++;
178 if( number_logical_processors_in_numa_node > largest_number_logical_processors_in_numa_node )
182 qs = libshared_memory_alloc_from_specific_node( ms, largest_pns->numa_node_id, sizeof(struct lfds710_queue_umm_state), LFDS710_PAL_ATOMIC_ISOLATION_IN_BYTES );
183 qe = libshared_memory_alloc_from_specific_node( ms, largest_pns->numa_node_id, sizeof(struct lfds710_queue_umm_element), LFDS710_PAL_ATOMIC_ISOLATION_IN_BYTES );
184 lfds710_queue_umm_init_valid_on_current_logical_core( qs, qe, NULL );
186 /* TRD : for each NUMA node, alloc one element per thread in that NUMA node (from the current thread set)
187 the dummy element comes from the same node as the queue_umm state and has already been done
192 while( LFDS710_LIST_ASU_GET_START_AND_THEN_NEXT(tsets->list_of_per_numa_states,lasue) )
194 pns = LFDS710_LIST_ASU_GET_VALUE_FROM_ELEMENT( *lasue );
196 /* TRD : for each NUMA node, figure out how many LPs in the current set are in that NUMA node
197 and allocate then the correct number of elements from this NUMA node (1 per LP)
201 number_logical_processors_in_numa_node = 0;
203 while( LFDS710_LIST_ASU_GET_START_AND_THEN_NEXT(tsets->list_of_per_thread_states,lasue_lp) )
205 pts = LFDS710_LIST_ASU_GET_VALUE_FROM_ELEMENT( *lasue_lp );
207 libbenchmark_topology_query( ts, LIBBENCHMARK_TOPOLOGY_QUERY_GET_NUMA_NODE_FOR_LOGICAL_PROCESSOR, pts->tns_lp, &numa_node_for_lp );
209 if( LIBBENCHMARK_TOPOLOGY_NODE_GET_NUMA_ID(*numa_node_for_lp) == pns->numa_node_id )
210 number_logical_processors_in_numa_node++;
213 // TRD : everything allocates from the queue_umm state NUMA node
214 qe = libshared_memory_alloc_from_specific_node( ms, largest_pns->numa_node_id, sizeof(struct lfds710_queue_umm_element) * number_logical_processors_in_numa_node, LFDS710_PAL_ATOMIC_ISOLATION_IN_BYTES );
215 for( loop = 0 ; loop < number_logical_processors_in_numa_node ; loop++ )
216 lfds710_queue_umm_enqueue( qs, &qe[loop] );
219 // TRD : now the per-thread states
223 while( LFDS710_LIST_ASU_GET_START_AND_THEN_NEXT(tsets->list_of_per_thread_states,lasue) )
225 pts = LFDS710_LIST_ASU_GET_VALUE_FROM_ELEMENT( *lasue );
226 ptbs = libshared_memory_alloc_from_specific_node( ms, largest_pns->numa_node_id, sizeof(struct libbenchmark_benchmark_queue_umm_liblfds710_lockfree_enqueue1_dequeue1_per_thread_benchmark_state), LFDS710_PAL_ATOMIC_ISOLATION_IN_BYTES );
227 pts->users_per_thread_state = ptbs;
232 tsets->users_threadset_state = obs;
241 /****************************************************************************/
242 libshared_pal_thread_return_t LIBSHARED_PAL_THREAD_CALLING_CONVENTION libbenchmark_benchmark_queue_umm_liblfds710_lockfree_enqueue1_dequeue1_thread( void *libbenchmark_threadset_per_thread_state )
244 int long long unsigned
247 time_units_per_second;
253 struct lfds710_queue_umm_element
256 struct lfds710_queue_umm_state
259 struct libbenchmark_benchmark_queue_umm_liblfds710_lockfree_enqueue1_dequeue1_overall_benchmark_state
262 struct libbenchmark_benchmark_queue_umm_liblfds710_lockfree_enqueue1_dequeue1_per_thread_benchmark_state
265 struct libbenchmark_threadset_per_thread_state
268 LFDS710_MISC_BARRIER_LOAD;
270 LFDS710_PAL_ASSERT( libbenchmark_threadset_per_thread_state != NULL );
272 pts = (struct libbenchmark_threadset_per_thread_state *) libbenchmark_threadset_per_thread_state;
274 ptbs = LIBBENCHMARK_THREADSET_PER_THREAD_STATE_GET_USERS_PER_THREAD_STATE( *pts );
275 obs = LIBBENCHMARK_THREADSET_PER_THREAD_STATE_GET_USERS_OVERALL_STATE( *pts );
278 LIBBENCHMARK_PAL_TIME_UNITS_PER_SECOND( &time_units_per_second );
280 libbenchmark_threadset_thread_ready_and_wait( pts );
282 LIBBENCHMARK_PAL_GET_HIGHRES_TIME( ¤t_time );
284 end_time = current_time + time_units_per_second * libbenchmark_globals_benchmark_duration_in_seconds;
286 while( current_time < end_time )
288 lfds710_queue_umm_dequeue( qs, &qe );
289 lfds710_queue_umm_enqueue( qs, qe );
292 if( time_loop++ == TIME_LOOP_COUNT )
295 LIBBENCHMARK_PAL_GET_HIGHRES_TIME( ¤t_time );
299 ptbs->operation_count = operation_count;
301 LFDS710_MISC_BARRIER_STORE;
303 lfds710_misc_force_store();
305 return LIBSHARED_PAL_THREAD_RETURN_CAST(RETURN_SUCCESS);
312 /****************************************************************************/
313 void libbenchmark_benchmark_queue_umm_liblfds710_lockfree_enqueue1_dequeue1_cleanup( struct lfds710_list_aso_state *logical_processor_set,
314 enum libbenchmark_topology_numa_mode numa_mode,
315 struct libbenchmark_results_state *rs,
316 struct libbenchmark_threadset_state *tsets )
318 struct lfds710_list_asu_element
321 struct libbenchmark_benchmark_queue_umm_liblfds710_lockfree_enqueue1_dequeue1_overall_benchmark_state
324 struct libbenchmark_benchmark_queue_umm_liblfds710_lockfree_enqueue1_dequeue1_per_thread_benchmark_state
327 struct libbenchmark_threadset_per_thread_state
330 LFDS710_PAL_ASSERT( logical_processor_set != NULL );
331 // TRD : numa_mode can be any value in its range
332 LFDS710_PAL_ASSERT( rs != NULL );
333 LFDS710_PAL_ASSERT( tsets != NULL );
335 while( LFDS710_LIST_ASU_GET_START_AND_THEN_NEXT(tsets->list_of_per_thread_states,lasue) )
337 pts = LFDS710_LIST_ASU_GET_VALUE_FROM_ELEMENT( *lasue );
339 ptbs = LIBBENCHMARK_THREADSET_PER_THREAD_STATE_GET_USERS_PER_THREAD_STATE( *pts );
341 libbenchmark_results_put_result( rs,
342 LIBBENCHMARK_DATASTRUCTURE_ID_QUEUE_UMM,
343 LIBBENCHMARK_BENCHMARK_ID_ENQUEUE_UMM1_THEN_DEQUEUE_UMM1,
344 LIBBENCHMARK_LOCK_ID_LIBLFDS710_LOCKFREE,
346 logical_processor_set,
347 LIBBENCHMARK_TOPOLOGY_NODE_GET_LOGICAL_PROCESSOR_NUMBER( *pts->tns_lp ),
348 LIBBENCHMARK_TOPOLOGY_NODE_GET_WINDOWS_GROUP_NUMBER( *pts->tns_lp ),
349 ptbs->operation_count );
352 obs = tsets->users_threadset_state;
354 lfds710_queue_umm_cleanup( obs->qs, NULL );