2 #include "libbenchmark_benchmarks_queue_umm_internal.h"
5 struct libbenchmark_benchmark_queue_umm_pthread_rwlock_enqueue1_dequeue1_per_thread_benchmark_state
15 /****************************************************************************/
16 void libbenchmark_benchmark_queue_umm_pthread_rwlock_enqueue1_dequeue1_init( struct libbenchmark_topology_state *ts,
17 struct lfds710_list_aso_state *logical_processor_set,
18 struct libshared_memory_state *ms,
19 enum libbenchmark_topology_numa_mode numa_mode,
20 struct libbenchmark_threadset_state *tsets )
24 number_logical_processors,
25 number_logical_processors_in_numa_node,
26 largest_number_logical_processors_in_numa_node = 0;
28 struct lfds710_list_asu_element
32 struct libbenchmark_benchmark_queue_umm_pthread_rwlock_enqueue1_dequeue1_per_thread_benchmark_state
35 struct libbenchmark_datastructure_queue_umm_pthread_rwlock_element
38 struct libbenchmark_datastructure_queue_umm_pthread_rwlock_state
41 struct libbenchmark_threadset_per_numa_state
45 struct libbenchmark_threadset_per_thread_state
48 struct libbenchmark_topology_node_state
51 LFDS710_PAL_ASSERT( ts != NULL );
52 LFDS710_PAL_ASSERT( logical_processor_set != NULL );
53 LFDS710_PAL_ASSERT( ms != NULL );
54 // TRD : numa_mode can be any value in its range
55 LFDS710_PAL_ASSERT( tsets != NULL );
57 libbenchmark_threadset_init( tsets, ts, logical_processor_set, ms, libbenchmark_benchmark_queue_umm_pthread_rwlock_enqueue1_dequeue1_thread, NULL );
61 case LIBBENCHMARK_TOPOLOGY_NUMA_MODE_SMP:
62 qs = libshared_memory_alloc_from_most_free_space_node( ms, sizeof(struct libbenchmark_datastructure_queue_umm_pthread_rwlock_state), LFDS710_PAL_ATOMIC_ISOLATION_IN_BYTES );
63 lfds710_list_aso_query( logical_processor_set, LFDS710_LIST_ASO_QUERY_GET_POTENTIALLY_INACCURATE_COUNT, NULL, (void *) &number_logical_processors );
64 qe = libshared_memory_alloc_from_most_free_space_node( ms, sizeof(struct libbenchmark_datastructure_queue_umm_pthread_rwlock_element) * (number_logical_processors+1), LFDS710_PAL_ATOMIC_ISOLATION_IN_BYTES );
65 libbenchmark_datastructure_queue_umm_pthread_rwlock_init( qs, &qe[0], NULL );
66 for( loop = 1 ; loop < (number_logical_processors+1) ; loop++ )
67 libbenchmark_datastructure_queue_umm_pthread_rwlock_enqueue_umm( qs, &qe[loop] );
68 // TRD : now the per-thread states
69 while( LFDS710_LIST_ASU_GET_START_AND_THEN_NEXT(tsets->list_of_per_thread_states,lasue) )
71 pts = LFDS710_LIST_ASU_GET_VALUE_FROM_ELEMENT( *lasue );
72 ptbs = libshared_memory_alloc_from_most_free_space_node( ms, sizeof(struct libbenchmark_benchmark_queue_umm_pthread_rwlock_enqueue1_dequeue1_per_thread_benchmark_state), LFDS710_PAL_ATOMIC_ISOLATION_IN_BYTES );
73 pts->users_per_thread_state = ptbs;
77 case LIBBENCHMARK_TOPOLOGY_NUMA_MODE_NUMA:
78 // TRD : get the NUMA node for the queue_umm state
79 while( LFDS710_LIST_ASU_GET_START_AND_THEN_NEXT(tsets->list_of_per_numa_states,lasue) )
81 pns = LFDS710_LIST_ASU_GET_VALUE_FROM_ELEMENT( *lasue );
84 number_logical_processors_in_numa_node = 0;
86 while( LFDS710_LIST_ASU_GET_START_AND_THEN_NEXT(tsets->list_of_per_thread_states,lasue_lp) )
88 pts = LFDS710_LIST_ASU_GET_VALUE_FROM_ELEMENT( *lasue_lp );
90 libbenchmark_topology_query( ts, LIBBENCHMARK_TOPOLOGY_QUERY_GET_NUMA_NODE_FOR_LOGICAL_PROCESSOR, pts->tns_lp, &numa_node_for_lp );
92 if( LIBBENCHMARK_TOPOLOGY_NODE_GET_NUMA_ID(*numa_node_for_lp) == pns->numa_node_id )
93 number_logical_processors_in_numa_node++;
96 if( number_logical_processors_in_numa_node > largest_number_logical_processors_in_numa_node )
100 qs = libshared_memory_alloc_from_specific_node( ms, largest_pns->numa_node_id, sizeof(struct libbenchmark_datastructure_queue_umm_pthread_rwlock_state), LFDS710_PAL_ATOMIC_ISOLATION_IN_BYTES );
101 qe = libshared_memory_alloc_from_specific_node( ms, largest_pns->numa_node_id, sizeof(struct libbenchmark_datastructure_queue_umm_pthread_rwlock_element), LFDS710_PAL_ATOMIC_ISOLATION_IN_BYTES );
102 libbenchmark_datastructure_queue_umm_pthread_rwlock_init( qs, qe, NULL );
104 /* TRD : for each NUMA node, alloc one element per thread in that NUMA node (from the current thread set)
105 the dummy element comes from the same node as the queue_umm state and has already been done
110 while( LFDS710_LIST_ASU_GET_START_AND_THEN_NEXT(tsets->list_of_per_numa_states,lasue) )
112 pns = LFDS710_LIST_ASU_GET_VALUE_FROM_ELEMENT( *lasue );
114 /* TRD : for each NUMA node, figure out how many LPs in the current set are in that NUMA node
115 and allocate then the correct number of elements from this NUMA node (1 per LP)
119 number_logical_processors_in_numa_node = 0;
121 while( LFDS710_LIST_ASU_GET_START_AND_THEN_NEXT(tsets->list_of_per_thread_states,lasue_lp) )
123 pts = LFDS710_LIST_ASU_GET_VALUE_FROM_ELEMENT( *lasue_lp );
125 libbenchmark_topology_query( ts, LIBBENCHMARK_TOPOLOGY_QUERY_GET_NUMA_NODE_FOR_LOGICAL_PROCESSOR, pts->tns_lp, &numa_node_for_lp );
127 if( LIBBENCHMARK_TOPOLOGY_NODE_GET_NUMA_ID(*numa_node_for_lp) == pns->numa_node_id )
128 number_logical_processors_in_numa_node++;
131 qe = libshared_memory_alloc_from_specific_node( ms, pns->numa_node_id, sizeof(struct libbenchmark_datastructure_queue_umm_pthread_rwlock_element) * number_logical_processors_in_numa_node, LFDS710_PAL_ATOMIC_ISOLATION_IN_BYTES );
132 for( loop = 0 ; loop < number_logical_processors_in_numa_node ; loop++ )
133 libbenchmark_datastructure_queue_umm_pthread_rwlock_enqueue_umm( qs, &qe[loop] );
136 // TRD : now the per-thread states
140 while( LFDS710_LIST_ASU_GET_START_AND_THEN_NEXT(tsets->list_of_per_thread_states,lasue) )
142 pts = LFDS710_LIST_ASU_GET_VALUE_FROM_ELEMENT( *lasue );
143 ptbs = libshared_memory_alloc_from_specific_node( ms, largest_pns->numa_node_id, sizeof(struct libbenchmark_benchmark_queue_umm_pthread_rwlock_enqueue1_dequeue1_per_thread_benchmark_state), LFDS710_PAL_ATOMIC_ISOLATION_IN_BYTES );
144 pts->users_per_thread_state = ptbs;
148 case LIBBENCHMARK_TOPOLOGY_NUMA_MODE_NUMA_BUT_NOT_USED:
149 lfds710_list_aso_query( logical_processor_set, LFDS710_LIST_ASO_QUERY_GET_POTENTIALLY_INACCURATE_COUNT, NULL, (void *) &number_logical_processors );
151 // TRD : get the NUMA node for the queue_umm state
152 while( LFDS710_LIST_ASU_GET_START_AND_THEN_NEXT(tsets->list_of_per_numa_states,lasue) )
154 pns = LFDS710_LIST_ASU_GET_VALUE_FROM_ELEMENT( *lasue );
157 number_logical_processors_in_numa_node = 0;
159 while( LFDS710_LIST_ASU_GET_START_AND_THEN_NEXT(tsets->list_of_per_thread_states,lasue_lp) )
161 pts = LFDS710_LIST_ASU_GET_VALUE_FROM_ELEMENT( *lasue_lp );
163 libbenchmark_topology_query( ts, LIBBENCHMARK_TOPOLOGY_QUERY_GET_NUMA_NODE_FOR_LOGICAL_PROCESSOR, pts->tns_lp, &numa_node_for_lp );
165 if( LIBBENCHMARK_TOPOLOGY_NODE_GET_NUMA_ID(*numa_node_for_lp) == pns->numa_node_id )
166 number_logical_processors_in_numa_node++;
169 if( number_logical_processors_in_numa_node > largest_number_logical_processors_in_numa_node )
173 qs = libshared_memory_alloc_from_specific_node( ms, largest_pns->numa_node_id, sizeof(struct libbenchmark_datastructure_queue_umm_pthread_rwlock_state), LFDS710_PAL_ATOMIC_ISOLATION_IN_BYTES );
174 qe = libshared_memory_alloc_from_specific_node( ms, largest_pns->numa_node_id, sizeof(struct libbenchmark_datastructure_queue_umm_pthread_rwlock_element) * (number_logical_processors+1), LFDS710_PAL_ATOMIC_ISOLATION_IN_BYTES );
175 libbenchmark_datastructure_queue_umm_pthread_rwlock_init( qs, qe, NULL );
177 /* TRD : for each NUMA node, alloc one element per thread in that NUMA node (from the current thread set)
178 the dummy element comes from the same node as the queue_umm state and has already been done
183 while( LFDS710_LIST_ASU_GET_START_AND_THEN_NEXT(tsets->list_of_per_numa_states,lasue) )
185 pns = LFDS710_LIST_ASU_GET_VALUE_FROM_ELEMENT( *lasue );
187 /* TRD : for each NUMA node, figure out how many LPs in the current set are in that NUMA node
188 and allocate then the correct number of elements from this NUMA node (1 per LP)
192 number_logical_processors_in_numa_node = 0;
194 while( LFDS710_LIST_ASU_GET_START_AND_THEN_NEXT(tsets->list_of_per_thread_states,lasue_lp) )
196 pts = LFDS710_LIST_ASU_GET_VALUE_FROM_ELEMENT( *lasue_lp );
198 libbenchmark_topology_query( ts, LIBBENCHMARK_TOPOLOGY_QUERY_GET_NUMA_NODE_FOR_LOGICAL_PROCESSOR, pts->tns_lp, &numa_node_for_lp );
200 if( LIBBENCHMARK_TOPOLOGY_NODE_GET_NUMA_ID(*numa_node_for_lp) == pns->numa_node_id )
201 number_logical_processors_in_numa_node++;
204 // TRD : everything allocates from the queue_umm state NUMA node
205 qe = libshared_memory_alloc_from_specific_node( ms, largest_pns->numa_node_id, sizeof(struct libbenchmark_datastructure_queue_umm_pthread_rwlock_element) * number_logical_processors_in_numa_node, LFDS710_PAL_ATOMIC_ISOLATION_IN_BYTES );
206 for( loop = 0 ; loop < number_logical_processors_in_numa_node ; loop++ )
207 libbenchmark_datastructure_queue_umm_pthread_rwlock_enqueue_umm( qs, &qe[loop] );
210 // TRD : now the per-thread states
214 while( LFDS710_LIST_ASU_GET_START_AND_THEN_NEXT(tsets->list_of_per_thread_states,lasue) )
216 pts = LFDS710_LIST_ASU_GET_VALUE_FROM_ELEMENT( *lasue );
217 ptbs = libshared_memory_alloc_from_specific_node( ms, largest_pns->numa_node_id, sizeof(struct libbenchmark_benchmark_queue_umm_pthread_rwlock_enqueue1_dequeue1_per_thread_benchmark_state), LFDS710_PAL_ATOMIC_ISOLATION_IN_BYTES );
218 pts->users_per_thread_state = ptbs;
222 tsets->users_threadset_state = qs;
231 /****************************************************************************/
232 libshared_pal_thread_return_t LIBSHARED_PAL_THREAD_CALLING_CONVENTION libbenchmark_benchmark_queue_umm_pthread_rwlock_enqueue1_dequeue1_thread( void *libbenchmark_threadset_per_thread_state )
234 int long long unsigned
237 time_units_per_second;
243 struct libbenchmark_datastructure_queue_umm_pthread_rwlock_element
246 struct libbenchmark_datastructure_queue_umm_pthread_rwlock_state
249 struct libbenchmark_benchmark_queue_umm_pthread_rwlock_enqueue1_dequeue1_per_thread_benchmark_state
252 struct libbenchmark_threadset_per_thread_state
255 LFDS710_MISC_BARRIER_LOAD;
257 LFDS710_PAL_ASSERT( libbenchmark_threadset_per_thread_state != NULL );
259 pts = (struct libbenchmark_threadset_per_thread_state *) libbenchmark_threadset_per_thread_state;
261 ptbs = LIBBENCHMARK_THREADSET_PER_THREAD_STATE_GET_USERS_PER_THREAD_STATE( *pts );
262 qs = LIBBENCHMARK_THREADSET_PER_THREAD_STATE_GET_USERS_OVERALL_STATE( *pts );
264 LIBBENCHMARK_PAL_TIME_UNITS_PER_SECOND( &time_units_per_second );
266 libbenchmark_threadset_thread_ready_and_wait( pts );
268 LIBBENCHMARK_PAL_GET_HIGHRES_TIME( ¤t_time );
270 end_time = current_time + time_units_per_second * libbenchmark_globals_benchmark_duration_in_seconds;
272 while( current_time < end_time )
274 libbenchmark_datastructure_queue_umm_pthread_rwlock_dequeue_umm( qs, &qe );
275 libbenchmark_datastructure_queue_umm_pthread_rwlock_enqueue_umm( qs, qe );
278 if( time_loop++ == TIME_LOOP_COUNT )
281 LIBBENCHMARK_PAL_GET_HIGHRES_TIME( ¤t_time );
285 ptbs->operation_count = operation_count;
287 LFDS710_MISC_BARRIER_STORE;
289 lfds710_misc_force_store();
291 return LIBSHARED_PAL_THREAD_RETURN_CAST(RETURN_SUCCESS);
298 /****************************************************************************/
299 void libbenchmark_benchmark_queue_umm_pthread_rwlock_enqueue1_dequeue1_cleanup( struct lfds710_list_aso_state *logical_processor_set,
300 enum libbenchmark_topology_numa_mode numa_mode,
301 struct libbenchmark_results_state *rs,
302 struct libbenchmark_threadset_state *tsets )
304 struct libbenchmark_datastructure_queue_umm_pthread_rwlock_state
307 struct lfds710_list_asu_element
310 struct libbenchmark_benchmark_queue_umm_pthread_rwlock_enqueue1_dequeue1_per_thread_benchmark_state
313 struct libbenchmark_threadset_per_thread_state
316 LFDS710_PAL_ASSERT( logical_processor_set != NULL );
317 // TRD : numa_mode can be any value in its range
318 LFDS710_PAL_ASSERT( rs != NULL );
319 LFDS710_PAL_ASSERT( tsets != NULL );
321 while( LFDS710_LIST_ASU_GET_START_AND_THEN_NEXT(tsets->list_of_per_thread_states,lasue) )
323 pts = LFDS710_LIST_ASU_GET_VALUE_FROM_ELEMENT( *lasue );
325 ptbs = LIBBENCHMARK_THREADSET_PER_THREAD_STATE_GET_USERS_PER_THREAD_STATE( *pts );
327 libbenchmark_results_put_result( rs,
328 LIBBENCHMARK_DATASTRUCTURE_ID_QUEUE_UMM,
329 LIBBENCHMARK_BENCHMARK_ID_ENQUEUE_UMM1_THEN_DEQUEUE_UMM1,
330 LIBBENCHMARK_LOCK_ID_PTHREAD_RWLOCK,
332 logical_processor_set,
333 LIBBENCHMARK_TOPOLOGY_NODE_GET_LOGICAL_PROCESSOR_NUMBER( *pts->tns_lp ),
334 LIBBENCHMARK_TOPOLOGY_NODE_GET_WINDOWS_GROUP_NUMBER( *pts->tns_lp ),
335 ptbs->operation_count );
338 qs = tsets->users_threadset_state;
340 libbenchmark_datastructure_queue_umm_pthread_rwlock_cleanup( qs, NULL );