2 #include "libbenchmark_benchmarks_freelist_internal.h"
5 struct libbenchmark_benchmark_freelist_liblfds700_lockfree_push1_pop1_per_thread_benchmark_state
11 struct libbenchmark_benchmark_freelist_liblfds700_lockfree_push1_pop1_overall_benchmark_state
13 struct lfds700_freelist_state
21 /****************************************************************************/
22 void libbenchmark_benchmark_freelist_liblfds700_lockfree_push1_pop1_init( struct libbenchmark_topology_state *ts,
23 struct lfds710_list_aso_state *logical_processor_set,
24 struct libshared_memory_state *ms,
25 enum libbenchmark_topology_numa_mode numa_mode,
26 struct libbenchmark_threadset_state *tsets )
29 finished_flag = LOWERED;
31 struct lfds700_misc_prng_state
38 number_logical_processors,
39 number_logical_processors_in_numa_node,
41 largest_number_logical_processors_in_numa_node = 0;
43 struct lfds710_list_asu_element
47 struct libbenchmark_benchmark_freelist_liblfds700_lockfree_push1_pop1_overall_benchmark_state
50 struct libbenchmark_benchmark_freelist_liblfds700_lockfree_push1_pop1_per_thread_benchmark_state
53 struct lfds700_freelist_element
57 struct lfds700_freelist_state
60 struct libbenchmark_threadset_per_numa_state
64 struct libbenchmark_threadset_per_thread_state
67 struct libbenchmark_topology_node_state
70 LFDS710_PAL_ASSERT( ts != NULL );
71 LFDS710_PAL_ASSERT( logical_processor_set != NULL );
72 LFDS710_PAL_ASSERT( ms != NULL );
73 // TRD : numa_mode can be any value in its range
74 LFDS710_PAL_ASSERT( tsets != NULL );
76 lfds700_misc_prng_init( &ps );
78 obs = libshared_memory_alloc_from_most_free_space_node( ms, sizeof(struct libbenchmark_benchmark_freelist_liblfds700_lockfree_push1_pop1_overall_benchmark_state), LFDS710_PAL_ATOMIC_ISOLATION_IN_BYTES );
80 libbenchmark_threadset_init( tsets, ts, logical_processor_set, ms, libbenchmark_benchmark_freelist_liblfds700_lockfree_push1_pop1_thread, NULL );
84 case LIBBENCHMARK_TOPOLOGY_NUMA_MODE_SMP:
85 lfds710_list_aso_query( logical_processor_set, LFDS710_LIST_ASO_QUERY_GET_POTENTIALLY_INACCURATE_COUNT, NULL, (void *) &number_logical_processors );
86 fs = libshared_memory_alloc_from_most_free_space_node( ms, sizeof(struct lfds700_freelist_state), LFDS700_PAL_ATOMIC_ISOLATION_IN_BYTES );
87 lfds700_freelist_init_valid_on_current_logical_core( fs, NULL );
89 fe = libshared_memory_alloc_from_most_free_space_node( ms, sizeof(struct lfds710_freelist_element) * number_logical_processors, LFDS710_PAL_ATOMIC_ISOLATION_IN_BYTES );
90 for( loop = 0 ; loop < number_logical_processors ; loop++ )
91 lfds700_freelist_push( fs, &fe[loop], &ps );
92 while( LFDS710_LIST_ASU_GET_START_AND_THEN_NEXT(tsets->list_of_per_thread_states,lasue) )
94 pts = LFDS710_LIST_ASU_GET_VALUE_FROM_ELEMENT( *lasue );
95 ptbs = libshared_memory_alloc_from_most_free_space_node( ms, sizeof(struct libbenchmark_benchmark_freelist_liblfds700_lockfree_push1_pop1_per_thread_benchmark_state), LFDS710_PAL_ATOMIC_ISOLATION_IN_BYTES );
96 pts->users_per_thread_state = ptbs;
100 case LIBBENCHMARK_TOPOLOGY_NUMA_MODE_NUMA:
101 /* TRD : init the freelist from the NUMA node with most processors from the current set
102 or, if equal threads, with lowest NUMA
103 iterate over the NUMA node list
104 for each NUMA node, allocate one freelist element per thread on that node
105 and push those elements onto the freelist
107 the loop over the threads, and give each one the freelist state as it's user state
110 while( LFDS710_LIST_ASU_GET_START_AND_THEN_NEXT(tsets->list_of_per_numa_states,lasue) )
112 pns = LFDS710_LIST_ASU_GET_VALUE_FROM_ELEMENT( *lasue );
115 number_logical_processors_in_numa_node = 0;
117 while( LFDS710_LIST_ASU_GET_START_AND_THEN_NEXT(tsets->list_of_per_thread_states,lasue_lp) )
119 pts = LFDS710_LIST_ASU_GET_VALUE_FROM_ELEMENT( *lasue_lp );
121 libbenchmark_topology_query( ts, LIBBENCHMARK_TOPOLOGY_QUERY_GET_NUMA_NODE_FOR_LOGICAL_PROCESSOR, pts->tns_lp, &numa_node_for_lp );
123 if( LIBBENCHMARK_TOPOLOGY_NODE_GET_NUMA_ID(*numa_node_for_lp) == pns->numa_node_id )
124 number_logical_processors_in_numa_node++;
127 if( number_logical_processors_in_numa_node > largest_number_logical_processors_in_numa_node )
131 fs = libshared_memory_alloc_from_specific_node( ms, largest_pns->numa_node_id, sizeof(struct lfds710_freelist_state), LFDS710_PAL_ATOMIC_ISOLATION_IN_BYTES );
132 lfds700_freelist_init_valid_on_current_logical_core( fs, NULL );
134 /* TRD : now figure out how many elements are needed from each NUMA node
136 them push them interleaved, round-robin, to the freelist
139 libbenchmark_topology_query( ts, LIBBENCHMARK_TOPOLOGY_QUERY_GET_NUMBER_OF_NODE_TYPE, (void *) (lfds710_pal_uint_t) LIBBENCHMARK_TOPOLOGY_NODE_TYPE_NUMA, (void *) &number_numa_nodes );
141 fe_array_pointers = libshared_memory_alloc_from_most_free_space_node( ms, sizeof(struct lfds710_freelist_element *) * number_numa_nodes, LFDS710_PAL_ATOMIC_ISOLATION_IN_BYTES );
142 fe_array_count = libshared_memory_alloc_from_most_free_space_node( ms, sizeof(lfds710_pal_uint_t) * number_numa_nodes, LFDS710_PAL_ATOMIC_ISOLATION_IN_BYTES );
143 for( loop = 0 ; loop < number_numa_nodes ; loop++ )
144 fe_array_count[loop] = 0;
148 while( LFDS710_LIST_ASU_GET_START_AND_THEN_NEXT(tsets->list_of_per_numa_states,lasue) )
150 pns = LFDS710_LIST_ASU_GET_VALUE_FROM_ELEMENT( *lasue );
152 /* TRD : for each NUMA node, figure out how many LPs in the current set are in that NUMA node
153 and allocate then the correct number of elements from this NUMA node (1 per LP)
157 number_logical_processors_in_numa_node = 0;
159 while( LFDS710_LIST_ASU_GET_START_AND_THEN_NEXT(tsets->list_of_per_thread_states,lasue_lp) )
161 pts = LFDS710_LIST_ASU_GET_VALUE_FROM_ELEMENT( *lasue_lp );
163 libbenchmark_topology_query( ts, LIBBENCHMARK_TOPOLOGY_QUERY_GET_NUMA_NODE_FOR_LOGICAL_PROCESSOR, pts->tns_lp, &numa_node_for_lp );
165 if( LIBBENCHMARK_TOPOLOGY_NODE_GET_NUMA_ID(*numa_node_for_lp) == pns->numa_node_id )
166 number_logical_processors_in_numa_node++;
169 fe_array_count[index] = number_logical_processors_in_numa_node;
170 fe_array_pointers[index] = libshared_memory_alloc_from_specific_node( ms, pns->numa_node_id, sizeof(struct lfds710_freelist_element) * fe_array_count[index], LFDS710_PAL_ATOMIC_ISOLATION_IN_BYTES );
174 while( finished_flag == LOWERED )
176 for( loop = 0 ; loop < index ; loop++ )
177 if( fe_array_count[loop] > 0 )
178 lfds700_freelist_push( fs, &fe_array_pointers[loop][ fe_array_count[loop]-- ], &ps );
180 finished_flag = RAISED;
182 for( loop = 0 ; loop < index ; loop++ )
183 if( fe_array_count[loop] > 0 )
184 finished_flag = LOWERED;
189 while( LFDS710_LIST_ASU_GET_START_AND_THEN_NEXT(tsets->list_of_per_thread_states,lasue) )
191 pts = LFDS710_LIST_ASU_GET_VALUE_FROM_ELEMENT( *lasue );
192 ptbs = libshared_memory_alloc_from_specific_node( ms, largest_pns->numa_node_id, sizeof(struct libbenchmark_benchmark_freelist_liblfds700_lockfree_push1_pop1_per_thread_benchmark_state), LFDS710_PAL_ATOMIC_ISOLATION_IN_BYTES );
193 pts->users_per_thread_state = ptbs;
197 case LIBBENCHMARK_TOPOLOGY_NUMA_MODE_NUMA_BUT_NOT_USED:
198 /* TRD : freelist state in the NUMA node with most threads from the current set
199 or, if equal threads, with lowest NUMA
200 all elements alloced from that node as well
202 SO much easier to figure out allocs than with NUMA OMG
203 all of this code needs rewriting
204 and the NUMA-but-not-used stuff is interesting but I don't think it carries its own weight
207 while( LFDS710_LIST_ASU_GET_START_AND_THEN_NEXT(tsets->list_of_per_numa_states,lasue) )
209 pns = LFDS710_LIST_ASU_GET_VALUE_FROM_ELEMENT( *lasue );
212 number_logical_processors_in_numa_node = 0;
214 while( LFDS710_LIST_ASU_GET_START_AND_THEN_NEXT(tsets->list_of_per_thread_states,lasue_lp) )
216 pts = LFDS710_LIST_ASU_GET_VALUE_FROM_ELEMENT( *lasue_lp );
218 libbenchmark_topology_query( ts, LIBBENCHMARK_TOPOLOGY_QUERY_GET_NUMA_NODE_FOR_LOGICAL_PROCESSOR, pts->tns_lp, &numa_node_for_lp );
220 if( LIBBENCHMARK_TOPOLOGY_NODE_GET_NUMA_ID(*numa_node_for_lp) == pns->numa_node_id )
221 number_logical_processors_in_numa_node++;
224 if( number_logical_processors_in_numa_node > largest_number_logical_processors_in_numa_node )
228 lfds710_list_aso_query( logical_processor_set, LFDS710_LIST_ASO_QUERY_GET_POTENTIALLY_INACCURATE_COUNT, NULL, (void *) &number_logical_processors );
229 fs = libshared_memory_alloc_from_specific_node( ms, largest_pns->numa_node_id, sizeof(struct lfds710_freelist_state), LFDS710_PAL_ATOMIC_ISOLATION_IN_BYTES );
230 lfds700_freelist_init_valid_on_current_logical_core( fs, NULL );
232 // TRD : fill the elimination array and have one element per thread in the freelist proper
233 fe = libshared_memory_alloc_from_specific_node( ms, largest_pns->numa_node_id, sizeof(struct lfds710_freelist_element) * number_logical_processors, LFDS710_PAL_ATOMIC_ISOLATION_IN_BYTES );
234 for( loop = 0 ; loop < number_logical_processors ; loop++ )
235 lfds700_freelist_push( fs, &fe[loop], &ps );
239 while( LFDS710_LIST_ASU_GET_START_AND_THEN_NEXT(tsets->list_of_per_thread_states,lasue) )
241 pts = LFDS710_LIST_ASU_GET_VALUE_FROM_ELEMENT( *lasue );
242 ptbs = libshared_memory_alloc_from_specific_node( ms, largest_pns->numa_node_id, sizeof(struct libbenchmark_benchmark_freelist_liblfds700_lockfree_push1_pop1_per_thread_benchmark_state), LFDS710_PAL_ATOMIC_ISOLATION_IN_BYTES );
243 pts->users_per_thread_state = ptbs;
249 tsets->users_threadset_state = obs;
258 /****************************************************************************/
259 libshared_pal_thread_return_t LIBSHARED_PAL_THREAD_CALLING_CONVENTION libbenchmark_benchmark_freelist_liblfds700_lockfree_push1_pop1_thread( void *libbenchmark_threadset_per_thread_state )
261 int long long unsigned
264 time_units_per_second;
270 struct lfds700_misc_prng_state
273 struct lfds700_freelist_state
276 struct lfds700_freelist_element
279 struct libbenchmark_benchmark_freelist_liblfds700_lockfree_push1_pop1_overall_benchmark_state
282 struct libbenchmark_benchmark_freelist_liblfds700_lockfree_push1_pop1_per_thread_benchmark_state
285 struct libbenchmark_threadset_per_thread_state
288 LFDS710_MISC_BARRIER_LOAD;
290 LFDS710_PAL_ASSERT( libbenchmark_threadset_per_thread_state != NULL );
292 pts = (struct libbenchmark_threadset_per_thread_state *) libbenchmark_threadset_per_thread_state;
294 ptbs = LIBBENCHMARK_THREADSET_PER_THREAD_STATE_GET_USERS_PER_THREAD_STATE( *pts );
295 obs = LIBBENCHMARK_THREADSET_PER_THREAD_STATE_GET_USERS_OVERALL_STATE( *pts );
298 lfds700_misc_prng_init( &ps );
300 LIBBENCHMARK_PAL_TIME_UNITS_PER_SECOND( &time_units_per_second );
302 libbenchmark_threadset_thread_ready_and_wait( pts );
304 LIBBENCHMARK_PAL_GET_HIGHRES_TIME( ¤t_time );
306 end_time = current_time + time_units_per_second * libbenchmark_globals_benchmark_duration_in_seconds;
308 while( current_time < end_time )
310 lfds700_freelist_pop( fs, &fe, &ps );
311 lfds700_freelist_push( fs, fe, &ps );
314 if( time_loop++ == TIME_LOOP_COUNT )
317 LIBBENCHMARK_PAL_GET_HIGHRES_TIME( ¤t_time );
321 ptbs->operation_count = operation_count;
323 LFDS710_MISC_BARRIER_STORE;
325 lfds710_misc_force_store();
327 return LIBSHARED_PAL_THREAD_RETURN_CAST(RETURN_SUCCESS);
334 /****************************************************************************/
335 void libbenchmark_benchmark_freelist_liblfds700_lockfree_push1_pop1_cleanup( struct lfds710_list_aso_state *logical_processor_set,
336 enum libbenchmark_topology_numa_mode numa_mode,
337 struct libbenchmark_results_state *rs,
338 struct libbenchmark_threadset_state *tsets )
340 struct lfds710_list_asu_element
343 struct libbenchmark_benchmark_freelist_liblfds700_lockfree_push1_pop1_overall_benchmark_state
346 struct libbenchmark_benchmark_freelist_liblfds700_lockfree_push1_pop1_per_thread_benchmark_state
349 struct libbenchmark_threadset_per_thread_state
352 LFDS710_PAL_ASSERT( logical_processor_set != NULL );
353 // TRD : numa_mode can be any value in its range
354 LFDS710_PAL_ASSERT( rs != NULL );
355 LFDS710_PAL_ASSERT( tsets != NULL );
357 while( LFDS710_LIST_ASU_GET_START_AND_THEN_NEXT(tsets->list_of_per_thread_states,lasue) )
359 pts = LFDS710_LIST_ASU_GET_VALUE_FROM_ELEMENT( *lasue );
361 ptbs = LIBBENCHMARK_THREADSET_PER_THREAD_STATE_GET_USERS_PER_THREAD_STATE( *pts );
363 libbenchmark_results_put_result( rs,
364 LIBBENCHMARK_DATASTRUCTURE_ID_FREELIST,
365 LIBBENCHMARK_BENCHMARK_ID_PUSH1_THEN_POP1,
366 LIBBENCHMARK_LOCK_ID_LIBLFDS700_LOCKFREE,
368 logical_processor_set,
369 LIBBENCHMARK_TOPOLOGY_NODE_GET_LOGICAL_PROCESSOR_NUMBER( *pts->tns_lp ),
370 LIBBENCHMARK_TOPOLOGY_NODE_GET_WINDOWS_GROUP_NUMBER( *pts->tns_lp ),
371 ptbs->operation_count );
374 obs = tsets->users_threadset_state;
376 lfds700_freelist_cleanup( obs->fs, NULL );