2 #include "libbenchmark_benchmarks_queue_umm_internal.h"
5 struct libbenchmark_benchmark_queue_umm_pthread_mutex_enqueue1_dequeue1_per_thread_benchmark_state
15 /****************************************************************************/
16 void libbenchmark_benchmark_queue_umm_pthread_mutex_enqueue1_dequeue1_init( struct libbenchmark_topology_state *ts,
17 struct lfds710_list_aso_state *logical_processor_set,
18 struct libshared_memory_state *ms,
19 enum libbenchmark_topology_numa_mode numa_mode,
20 struct libbenchmark_threadset_state *tsets )
24 number_logical_processors,
25 number_logical_processors_in_numa_node,
26 largest_number_logical_processors_in_numa_node = 0;
28 struct lfds710_list_asu_element
32 struct libbenchmark_benchmark_queue_umm_pthread_mutex_enqueue1_dequeue1_per_thread_benchmark_state
35 struct libbenchmark_datastructure_queue_umm_pthread_mutex_element
38 struct libbenchmark_datastructure_queue_umm_pthread_mutex_state
41 struct libbenchmark_threadset_per_numa_state
45 struct libbenchmark_threadset_per_thread_state
48 struct libbenchmark_topology_node_state
51 LFDS710_PAL_ASSERT( ts != NULL );
52 LFDS710_PAL_ASSERT( logical_processor_set != NULL );
53 LFDS710_PAL_ASSERT( ms != NULL );
54 // TRD : numa_mode can be any value in its range
55 LFDS710_PAL_ASSERT( tsets != NULL );
57 libbenchmark_threadset_init( tsets, ts, logical_processor_set, ms, libbenchmark_benchmark_queue_umm_pthread_mutex_enqueue1_dequeue1_thread, NULL );
61 case LIBBENCHMARK_TOPOLOGY_NUMA_MODE_SMP:
62 qs = libshared_memory_alloc_from_most_free_space_node( ms, sizeof(struct libbenchmark_datastructure_queue_umm_pthread_mutex_state), LFDS710_PAL_ATOMIC_ISOLATION_IN_BYTES );
63 lfds710_list_aso_query( logical_processor_set, LFDS710_LIST_ASO_QUERY_GET_POTENTIALLY_INACCURATE_COUNT, NULL, (void *) &number_logical_processors );
64 qe = libshared_memory_alloc_from_most_free_space_node( ms, sizeof(struct libbenchmark_datastructure_queue_umm_pthread_mutex_element) * (number_logical_processors+1), LFDS710_PAL_ATOMIC_ISOLATION_IN_BYTES );
65 libbenchmark_datastructure_queue_umm_pthread_mutex_init( qs, &qe[0], NULL );
66 for( loop = 1 ; loop < (number_logical_processors+1) ; loop++ )
67 libbenchmark_datastructure_queue_umm_pthread_mutex_enqueue_umm( qs, &qe[loop] );
68 // TRD : now the per-thread states
69 while( LFDS710_LIST_ASU_GET_START_AND_THEN_NEXT(tsets->list_of_per_thread_states,lasue) )
71 pts = LFDS710_LIST_ASU_GET_VALUE_FROM_ELEMENT( *lasue );
72 ptbs = libshared_memory_alloc_from_most_free_space_node( ms, sizeof(struct libbenchmark_benchmark_queue_umm_pthread_mutex_enqueue1_dequeue1_per_thread_benchmark_state), LFDS710_PAL_ATOMIC_ISOLATION_IN_BYTES );
73 pts->users_per_thread_state = ptbs;
77 case LIBBENCHMARK_TOPOLOGY_NUMA_MODE_NUMA:
78 // TRD : get the NUMA node for the queue_umm state
79 while( LFDS710_LIST_ASU_GET_START_AND_THEN_NEXT(tsets->list_of_per_numa_states,lasue) )
81 pns = LFDS710_LIST_ASU_GET_VALUE_FROM_ELEMENT( *lasue );
84 number_logical_processors_in_numa_node = 0;
86 while( LFDS710_LIST_ASU_GET_START_AND_THEN_NEXT(tsets->list_of_per_thread_states,lasue_lp) )
88 pts = LFDS710_LIST_ASU_GET_VALUE_FROM_ELEMENT( *lasue_lp );
90 libbenchmark_topology_query( ts, LIBBENCHMARK_TOPOLOGY_QUERY_GET_NUMA_NODE_FOR_LOGICAL_PROCESSOR, pts->tns_lp, &numa_node_for_lp );
92 if( LIBBENCHMARK_TOPOLOGY_NODE_GET_NUMA_ID(*numa_node_for_lp) == pns->numa_node_id )
93 number_logical_processors_in_numa_node++;
96 if( number_logical_processors_in_numa_node > largest_number_logical_processors_in_numa_node )
100 qs = libshared_memory_alloc_from_specific_node( ms, largest_pns->numa_node_id, sizeof(struct libbenchmark_datastructure_queue_umm_pthread_mutex_state), LFDS710_PAL_ATOMIC_ISOLATION_IN_BYTES );
101 qe = libshared_memory_alloc_from_specific_node( ms, largest_pns->numa_node_id, sizeof(struct libbenchmark_datastructure_queue_umm_pthread_mutex_element), LFDS710_PAL_ATOMIC_ISOLATION_IN_BYTES );
102 libbenchmark_datastructure_queue_umm_pthread_mutex_init( qs, qe, NULL );
104 /* TRD : for each NUMA node, alloc one element per thread in that NUMA node (from the current thread set)
105 the dummy element comes from the same node as the queue_umm state and has already been done
110 while( LFDS710_LIST_ASU_GET_START_AND_THEN_NEXT(tsets->list_of_per_numa_states,lasue) )
112 pns = LFDS710_LIST_ASU_GET_VALUE_FROM_ELEMENT( *lasue );
114 /* TRD : for each NUMA node, figure out how many LPs in the current set are in that NUMA node
115 and allocate then the correct number of elements from this NUMA node (1 per LP)
119 number_logical_processors_in_numa_node = 0;
121 while( LFDS710_LIST_ASU_GET_START_AND_THEN_NEXT(tsets->list_of_per_thread_states,lasue_lp) )
123 pts = LFDS710_LIST_ASU_GET_VALUE_FROM_ELEMENT( *lasue_lp );
125 libbenchmark_topology_query( ts, LIBBENCHMARK_TOPOLOGY_QUERY_GET_NUMA_NODE_FOR_LOGICAL_PROCESSOR, pts->tns_lp, &numa_node_for_lp );
127 if( LIBBENCHMARK_TOPOLOGY_NODE_GET_NUMA_ID(*numa_node_for_lp) == pns->numa_node_id )
128 number_logical_processors_in_numa_node++;
131 qe = libshared_memory_alloc_from_specific_node( ms, pns->numa_node_id, sizeof(struct libbenchmark_datastructure_queue_umm_pthread_mutex_element) * number_logical_processors_in_numa_node, LFDS710_PAL_ATOMIC_ISOLATION_IN_BYTES );
132 for( loop = 0 ; loop < number_logical_processors_in_numa_node ; loop++ )
133 libbenchmark_datastructure_queue_umm_pthread_mutex_enqueue_umm( qs, &qe[loop] );
136 // TRD : now the per-thread states
140 while( LFDS710_LIST_ASU_GET_START_AND_THEN_NEXT(tsets->list_of_per_thread_states,lasue) )
142 pts = LFDS710_LIST_ASU_GET_VALUE_FROM_ELEMENT( *lasue );
143 ptbs = libshared_memory_alloc_from_specific_node( ms, largest_pns->numa_node_id, sizeof(struct libbenchmark_benchmark_queue_umm_pthread_mutex_enqueue1_dequeue1_per_thread_benchmark_state), LFDS710_PAL_ATOMIC_ISOLATION_IN_BYTES );
144 pts->users_per_thread_state = ptbs;
148 case LIBBENCHMARK_TOPOLOGY_NUMA_MODE_NUMA_BUT_NOT_USED:
149 lfds710_list_aso_query( logical_processor_set, LFDS710_LIST_ASO_QUERY_GET_POTENTIALLY_INACCURATE_COUNT, NULL, (void *) &number_logical_processors );
151 // TRD : get the NUMA node for the queue_umm state
152 while( LFDS710_LIST_ASU_GET_START_AND_THEN_NEXT(tsets->list_of_per_numa_states,lasue) )
154 pns = LFDS710_LIST_ASU_GET_VALUE_FROM_ELEMENT( *lasue );
157 number_logical_processors_in_numa_node = 0;
159 while( LFDS710_LIST_ASU_GET_START_AND_THEN_NEXT(tsets->list_of_per_thread_states,lasue_lp) )
161 pts = LFDS710_LIST_ASU_GET_VALUE_FROM_ELEMENT( *lasue_lp );
163 libbenchmark_topology_query( ts, LIBBENCHMARK_TOPOLOGY_QUERY_GET_NUMA_NODE_FOR_LOGICAL_PROCESSOR, pts->tns_lp, &numa_node_for_lp );
165 if( LIBBENCHMARK_TOPOLOGY_NODE_GET_NUMA_ID(*numa_node_for_lp) == pns->numa_node_id )
166 number_logical_processors_in_numa_node++;
169 if( number_logical_processors_in_numa_node > largest_number_logical_processors_in_numa_node )
173 qs = libshared_memory_alloc_from_specific_node( ms, largest_pns->numa_node_id, sizeof(struct libbenchmark_datastructure_queue_umm_pthread_mutex_state), LFDS710_PAL_ATOMIC_ISOLATION_IN_BYTES );
174 qe = libshared_memory_alloc_from_specific_node( ms, largest_pns->numa_node_id, sizeof(struct libbenchmark_datastructure_queue_umm_pthread_mutex_element) * (number_logical_processors+1), LFDS710_PAL_ATOMIC_ISOLATION_IN_BYTES );
175 libbenchmark_datastructure_queue_umm_pthread_mutex_init( qs, qe, NULL );
177 /* TRD : for each NUMA node, alloc one element per thread in that NUMA node (from the current thread set)
178 the dummy element comes from the same node as the queue_umm state and has already been done
183 while( LFDS710_LIST_ASU_GET_START_AND_THEN_NEXT(tsets->list_of_per_numa_states,lasue) )
185 pns = LFDS710_LIST_ASU_GET_VALUE_FROM_ELEMENT( *lasue );
187 /* TRD : for each NUMA node, figure out how many LPs in the current set are in that NUMA node
188 and allocate then the correct number of elements from this NUMA node (1 per LP)
192 number_logical_processors_in_numa_node = 0;
194 while( LFDS710_LIST_ASU_GET_START_AND_THEN_NEXT(tsets->list_of_per_thread_states,lasue_lp) )
196 pts = LFDS710_LIST_ASU_GET_VALUE_FROM_ELEMENT( *lasue_lp );
198 libbenchmark_topology_query( ts, LIBBENCHMARK_TOPOLOGY_QUERY_GET_NUMA_NODE_FOR_LOGICAL_PROCESSOR, pts->tns_lp, &numa_node_for_lp );
200 if( LIBBENCHMARK_TOPOLOGY_NODE_GET_NUMA_ID(*numa_node_for_lp) == pns->numa_node_id )
201 number_logical_processors_in_numa_node++;
204 // TRD : everything allocates from the queue_umm state NUMA node
205 qe = libshared_memory_alloc_from_specific_node( ms, largest_pns->numa_node_id, sizeof(struct libbenchmark_datastructure_queue_umm_pthread_mutex_element) * number_logical_processors_in_numa_node, LFDS710_PAL_ATOMIC_ISOLATION_IN_BYTES );
206 for( loop = 0 ; loop < number_logical_processors_in_numa_node ; loop++ )
207 libbenchmark_datastructure_queue_umm_pthread_mutex_enqueue_umm( qs, &qe[loop] );
210 // TRD : now the per-thread states
214 while( LFDS710_LIST_ASU_GET_START_AND_THEN_NEXT(tsets->list_of_per_thread_states,lasue) )
216 pts = LFDS710_LIST_ASU_GET_VALUE_FROM_ELEMENT( *lasue );
217 ptbs = libshared_memory_alloc_from_specific_node( ms, largest_pns->numa_node_id, sizeof(struct libbenchmark_benchmark_queue_umm_pthread_mutex_enqueue1_dequeue1_per_thread_benchmark_state), LFDS710_PAL_ATOMIC_ISOLATION_IN_BYTES );
218 pts->users_per_thread_state = ptbs;
222 tsets->users_threadset_state = qs;
231 /****************************************************************************/
232 libshared_pal_thread_return_t LIBSHARED_PAL_THREAD_CALLING_CONVENTION libbenchmark_benchmark_queue_umm_pthread_mutex_enqueue1_dequeue1_thread( void *libbenchmark_threadset_per_thread_state )
234 int long long unsigned
237 time_units_per_second;
243 struct libbenchmark_datastructure_queue_umm_pthread_mutex_element
246 struct libbenchmark_datastructure_queue_umm_pthread_mutex_state
249 struct libbenchmark_benchmark_queue_umm_pthread_mutex_enqueue1_dequeue1_per_thread_benchmark_state
252 struct libbenchmark_threadset_per_thread_state
255 LFDS710_MISC_BARRIER_LOAD;
257 LFDS710_PAL_ASSERT( libbenchmark_threadset_per_thread_state != NULL );
259 pts = (struct libbenchmark_threadset_per_thread_state *) libbenchmark_threadset_per_thread_state;
261 ptbs = LIBBENCHMARK_THREADSET_PER_THREAD_STATE_GET_USERS_PER_THREAD_STATE( *pts );
262 qs = LIBBENCHMARK_THREADSET_PER_THREAD_STATE_GET_USERS_OVERALL_STATE( *pts );
264 LIBBENCHMARK_PAL_TIME_UNITS_PER_SECOND( &time_units_per_second );
266 libbenchmark_threadset_thread_ready_and_wait( pts );
268 LIBBENCHMARK_PAL_GET_HIGHRES_TIME( ¤t_time );
270 end_time = current_time + time_units_per_second * libbenchmark_globals_benchmark_duration_in_seconds;
272 while( current_time < end_time )
274 libbenchmark_datastructure_queue_umm_pthread_mutex_dequeue_umm( qs, &qe );
275 libbenchmark_datastructure_queue_umm_pthread_mutex_enqueue_umm( qs, qe );
278 if( time_loop++ == TIME_LOOP_COUNT )
281 LIBBENCHMARK_PAL_GET_HIGHRES_TIME( ¤t_time );
285 ptbs->operation_count = operation_count;
287 LFDS710_MISC_BARRIER_STORE;
289 lfds710_misc_force_store();
291 return LIBSHARED_PAL_THREAD_RETURN_CAST(RETURN_SUCCESS);
298 /****************************************************************************/
299 void libbenchmark_benchmark_queue_umm_pthread_mutex_enqueue1_dequeue1_cleanup( struct lfds710_list_aso_state *logical_processor_set,
300 enum libbenchmark_topology_numa_mode numa_mode,
301 struct libbenchmark_results_state *rs,
302 struct libbenchmark_threadset_state *tsets )
304 struct libbenchmark_datastructure_queue_umm_pthread_mutex_state
307 struct lfds710_list_asu_element
310 struct libbenchmark_benchmark_queue_umm_pthread_mutex_enqueue1_dequeue1_per_thread_benchmark_state
313 struct libbenchmark_threadset_per_thread_state
316 LFDS710_PAL_ASSERT( logical_processor_set != NULL );
317 // TRD : numa_mode can be any value in its range
318 LFDS710_PAL_ASSERT( rs != NULL );
319 LFDS710_PAL_ASSERT( tsets != NULL );
321 while( LFDS710_LIST_ASU_GET_START_AND_THEN_NEXT(tsets->list_of_per_thread_states,lasue) )
323 pts = LFDS710_LIST_ASU_GET_VALUE_FROM_ELEMENT( *lasue );
325 ptbs = LIBBENCHMARK_THREADSET_PER_THREAD_STATE_GET_USERS_PER_THREAD_STATE( *pts );
327 libbenchmark_results_put_result( rs,
328 LIBBENCHMARK_DATASTRUCTURE_ID_QUEUE_UMM,
329 LIBBENCHMARK_BENCHMARK_ID_ENQUEUE_UMM1_THEN_DEQUEUE_UMM1,
330 LIBBENCHMARK_LOCK_ID_PTHREAD_MUTEX,
332 logical_processor_set,
333 LIBBENCHMARK_TOPOLOGY_NODE_GET_LOGICAL_PROCESSOR_NUMBER( *pts->tns_lp ),
334 LIBBENCHMARK_TOPOLOGY_NODE_GET_WINDOWS_GROUP_NUMBER( *pts->tns_lp ),
335 ptbs->operation_count );
338 qs = tsets->users_threadset_state;
340 libbenchmark_datastructure_queue_umm_pthread_mutex_cleanup( qs, NULL );