2 #include "libbenchmark_topology_internal.h"
4 /***** private prototypes *****/
5 static void libbenchmark_topology_internal_generate_thread_set_one_lp_per_lowest_level_cache( struct libbenchmark_topology_state *ts, struct libshared_memory_state *ms, struct lfds710_list_asu_state *lp_sets );
6 static void libbenchmark_topology_internal_generate_thread_set_one_to_all_lps_per_lowest_level_cache( struct libbenchmark_topology_state *ts, struct libshared_memory_state *ms, struct lfds710_list_asu_state *lp_sets );
7 static void libbenchmark_topology_internal_generate_thread_set_all_lps_per_lowest_level_cache( struct libbenchmark_topology_state *ts, struct libshared_memory_state *ms, struct lfds710_list_asu_state *lp_sets );
8 static void libbenchmark_topology_internal_generate_thread_set_all_lps( struct libbenchmark_topology_state *ts, struct libshared_memory_state *ms, struct lfds710_list_asu_state *lp_sets );
14 /****************************************************************************/
15 void libbenchmark_topology_generate_deduplicated_logical_processor_sets( struct libbenchmark_topology_state *ts, struct libshared_memory_state *ms, struct lfds710_list_asu_state *lp_sets )
20 struct lfds710_list_asu_element
24 struct lfds710_list_asu_state
28 struct libbenchmark_topology_logical_processor_set
33 LFDS710_PAL_ASSERT( ts != NULL );
34 LFDS710_PAL_ASSERT( ms != NULL );
35 LFDS710_PAL_ASSERT( lp_sets != NULL );
37 lfds710_list_asu_init_valid_on_current_logical_core( &throw_lp_sets, NULL );
38 lfds710_list_asu_init_valid_on_current_logical_core( &local_lp_sets, NULL );
40 // TRD : order is a useful hack - we want the full set to come last after deduplication, this order achieves this
41 libbenchmark_topology_internal_generate_thread_set_one_lp_per_lowest_level_cache( ts, ms, &local_lp_sets );
42 libbenchmark_topology_internal_generate_thread_set_all_lps_per_lowest_level_cache( ts, ms, &local_lp_sets );
43 libbenchmark_topology_internal_generate_thread_set_one_to_all_lps_per_lowest_level_cache( ts, ms, &local_lp_sets );
44 libbenchmark_topology_internal_generate_thread_set_all_lps( ts, ms, &throw_lp_sets );
48 while( LFDS710_LIST_ASU_GET_START_AND_THEN_NEXT(local_lp_sets, local_lasue) )
53 local_lps = LFDS710_LIST_ASU_GET_VALUE_FROM_ELEMENT( *local_lasue );
55 benchmark_topology_construct_benchmark_logical_processor_set_string( t, local_lps, &lps_string );
56 printf( "%s\n", lps_string );
63 /* TRD : now de-duplicate local_lp_sets
64 dumbo algorithm, loop over every value in local_lp_sets
65 and if not present in lp_sets, add to lp_sets
67 algorithmically better to sort and then pass over once, removing duplicates
68 however, this is not a coding test - it's real life - and in this case
69 the extra coding work makes no sense at all
72 lfds710_list_asu_init_valid_on_current_logical_core( lp_sets, NULL );
74 while( LFDS710_LIST_ASU_GET_START_AND_THEN_NEXT(local_lp_sets, local_lasue) )
76 local_lps = LFDS710_LIST_ASU_GET_VALUE_FROM_ELEMENT( *local_lasue );
81 // TRD : exit loop if cr is 0, which means we found the set exists already
82 while( cr != 0 and LFDS710_LIST_ASU_GET_START_AND_THEN_NEXT(*lp_sets, lasue) )
84 lps = LFDS710_LIST_ASU_GET_VALUE_FROM_ELEMENT( *lasue );
85 cr = libbenchmark_topology_node_compare_lpsets_function( &local_lps->logical_processors, &lps->logical_processors );
88 // TRD : if we did NOT find this set already, then keep it
91 new_lps = libshared_memory_alloc_from_most_free_space_node( ms, sizeof(struct libbenchmark_topology_logical_processor_set), LFDS710_PAL_ATOMIC_ISOLATION_IN_BYTES );
92 new_lps->logical_processors = local_lps->logical_processors;
93 LFDS710_LIST_ASU_SET_VALUE_IN_ELEMENT( new_lps->lasue, new_lps );
94 lfds710_list_asu_insert_at_end( lp_sets, &new_lps->lasue );
98 lfds710_list_asu_cleanup( &local_lp_sets, NULL );
107 /****************************************************************************/
108 static void libbenchmark_topology_internal_generate_thread_set_one_lp_per_lowest_level_cache( struct libbenchmark_topology_state *ts, struct libshared_memory_state *ms, struct lfds710_list_asu_state *lp_sets )
112 lowest_level_type_count = 0,
115 struct lfds710_btree_au_element
120 struct libbenchmark_topology_logical_processor_set
123 struct libbenchmark_topology_node_state
129 LFDS710_PAL_ASSERT( ts != NULL );
130 LFDS710_PAL_ASSERT( ms != NULL );
131 LFDS710_PAL_ASSERT( lp_sets != NULL );
133 /* TRD : we find the lowest level cache type
134 there are a certain number of these caches in the system
135 each will service a certain number of logical processors
137 we create one thread set per lowest level cache type,
138 with the the first thread set having only the first lowest
139 level cache and following sets adding one more lowest level
140 cache at a time, until the final thread set has all the
141 lowest level caches; for each lowest level cache in a thread set,
142 that thread set has the first logical processor of each lowest
146 // TRD : find lowest level memory, bus or cache
147 while( lfds710_btree_au_get_by_absolute_position_and_then_by_relative_position(&ts->topology_tree, &be_llc, LFDS710_BTREE_AU_ABSOLUTE_POSITION_SMALLEST_IN_TREE, LFDS710_BTREE_AU_RELATIVE_POSITION_NEXT_LARGER_ELEMENT_IN_ENTIRE_TREE) )
149 node_llc = LFDS710_BTREE_AU_GET_VALUE_FROM_ELEMENT( *be_llc );
151 if( node_llc->type == LIBBENCHMARK_TOPOLOGY_NODE_TYPE_SYSTEM or node_llc->type == LIBBENCHMARK_TOPOLOGY_NODE_TYPE_NUMA or node_llc->type == LIBBENCHMARK_TOPOLOGY_NODE_TYPE_CACHE )
155 // TRD : count the number of the lowest level type
156 while( lfds710_btree_au_get_by_absolute_position_and_then_by_relative_position(&ts->topology_tree, &be, LFDS710_BTREE_AU_ABSOLUTE_POSITION_LARGEST_IN_TREE, LFDS710_BTREE_AU_RELATIVE_POSITION_NEXT_SMALLER_ELEMENT_IN_ENTIRE_TREE) )
158 node = LFDS710_BTREE_AU_GET_VALUE_FROM_ELEMENT( *be );
160 if( 0 == libbenchmark_topology_node_compare_node_types_function(node, node_llc) )
161 lowest_level_type_count++;
164 // TRD : create the thread sets
165 for( loop = 0 ; loop < lowest_level_type_count ; loop++ )
167 /* TRD : find the first 0 to (loop+1) lowest level types
168 add the smallest LP under that type to the thread set
171 lps = libshared_memory_alloc_from_most_free_space_node( ms, sizeof(struct libbenchmark_topology_logical_processor_set), LFDS710_PAL_ATOMIC_ISOLATION_IN_BYTES );
172 lfds710_list_aso_init_valid_on_current_logical_core( &lps->logical_processors, libbenchmark_topology_node_compare_nodes_function, LFDS710_LIST_ASO_INSERT_RESULT_FAILURE_EXISTING_KEY, NULL );
176 while( lps_count < loop+1 and lfds710_btree_au_get_by_absolute_position_and_then_by_relative_position(&ts->topology_tree, &be, LFDS710_BTREE_AU_ABSOLUTE_POSITION_SMALLEST_IN_TREE, LFDS710_BTREE_AU_RELATIVE_POSITION_NEXT_LARGER_ELEMENT_IN_ENTIRE_TREE) )
178 node = LFDS710_BTREE_AU_GET_VALUE_FROM_ELEMENT( *be );
180 // TRD : if we've found a lowest level type...
181 if( 0 == libbenchmark_topology_node_compare_node_types_function(node, node_llc) )
183 // TRD : now use a temp copy of be and go LARGEST_TO_SMALLEST until we find an LP
186 while( lfds710_btree_au_get_by_relative_position(&be_lp, LFDS710_BTREE_AU_RELATIVE_POSITION_NEXT_SMALLER_ELEMENT_IN_ENTIRE_TREE) )
188 node_lp = LFDS710_BTREE_AU_GET_VALUE_FROM_ELEMENT( *be_lp );
190 if( node_lp->type == LIBBENCHMARK_TOPOLOGY_NODE_TYPE_LOGICAL_PROCESSOR )
195 new_tns = libshared_memory_alloc_from_most_free_space_node( ms, sizeof(struct libbenchmark_topology_node_state), LFDS710_PAL_ATOMIC_ISOLATION_IN_BYTES );
197 LFDS710_LIST_ASO_SET_KEY_IN_ELEMENT( new_tns->lasoe, new_tns );
198 LFDS710_LIST_ASO_SET_VALUE_IN_ELEMENT( new_tns->lasoe, new_tns );
199 lfds710_list_aso_insert( &lps->logical_processors, &new_tns->lasoe, NULL );
204 LFDS710_LIST_ASU_SET_KEY_IN_ELEMENT( lps->lasue, lps );
205 LFDS710_LIST_ASU_SET_VALUE_IN_ELEMENT( lps->lasue, lps );
206 lfds710_list_asu_insert_at_end( lp_sets, &lps->lasue );
216 /****************************************************************************/
217 static void libbenchmark_topology_internal_generate_thread_set_one_to_all_lps_per_lowest_level_cache( struct libbenchmark_topology_state *ts, struct libshared_memory_state *ms, struct lfds710_list_asu_state *lp_sets )
221 lowest_level_type_count = 0,
226 struct lfds710_btree_au_element
231 struct libbenchmark_topology_logical_processor_set
234 struct libbenchmark_topology_node_state
240 LFDS710_PAL_ASSERT( ts != NULL );
241 LFDS710_PAL_ASSERT( ms != NULL );
242 LFDS710_PAL_ASSERT( lp_sets != NULL );
244 /* TRD : we find the lowest level cache type
245 there are a certain number of these caches in the system
246 each will service a certain number of logical processors
248 we create one thread set per logical processor under
249 the lowest level type (they will have all have the same
250 number of logical processors)
252 each set contains an increasing number of LPs from each
253 lowest level type, e.g. the first set has one LP from
254 each lowest level type, the second has two, enode, until
258 // TRD : find lowest level memory, bus or cache
259 while( lfds710_btree_au_get_by_absolute_position_and_then_by_relative_position(&ts->topology_tree, &be_llc, LFDS710_BTREE_AU_ABSOLUTE_POSITION_SMALLEST_IN_TREE, LFDS710_BTREE_AU_RELATIVE_POSITION_NEXT_LARGER_ELEMENT_IN_ENTIRE_TREE) )
261 node_llc = LFDS710_BTREE_AU_GET_VALUE_FROM_ELEMENT( *be_llc );
263 if( node_llc->type == LIBBENCHMARK_TOPOLOGY_NODE_TYPE_SYSTEM or node_llc->type == LIBBENCHMARK_TOPOLOGY_NODE_TYPE_NUMA or node_llc->type == LIBBENCHMARK_TOPOLOGY_NODE_TYPE_CACHE )
267 /* TRD : count the number of LPs under the lowest level type
268 since be_llc points at the smallest of the lowest level types
269 we will once we've counted all it's LPs naturally exit the tree
270 since we're walking LFDS710_ADDONLY_UNBALANCED_BTREE_WALK_FROM_LARGEST_TO_SMALLEST
274 while( lfds710_btree_au_get_by_relative_position(&be_lp, LFDS710_BTREE_AU_RELATIVE_POSITION_NEXT_SMALLER_ELEMENT_IN_ENTIRE_TREE) )
276 node_lp = LFDS710_BTREE_AU_GET_VALUE_FROM_ELEMENT( *be_lp );
278 if( node_lp->type == LIBBENCHMARK_TOPOLOGY_NODE_TYPE_LOGICAL_PROCESSOR )
282 // TRD : count the number of the lowest level type
283 while( lfds710_btree_au_get_by_absolute_position_and_then_by_relative_position(&ts->topology_tree, &be, LFDS710_BTREE_AU_ABSOLUTE_POSITION_LARGEST_IN_TREE, LFDS710_BTREE_AU_RELATIVE_POSITION_NEXT_SMALLER_ELEMENT_IN_ENTIRE_TREE) )
285 node = LFDS710_BTREE_AU_GET_VALUE_FROM_ELEMENT( *be );
287 if( 0 == libbenchmark_topology_node_compare_node_types_function(node, node_llc) )
288 lowest_level_type_count++;
291 // TRD : create the thread sets
292 for( loop = 0 ; loop < lp_per_llc ; loop++ )
294 /* TRD : visit each lowest level type
295 add from 0 to loop+1 of its LPs to the set
298 lps = libshared_memory_alloc_from_most_free_space_node( ms, sizeof(struct libbenchmark_topology_logical_processor_set), LFDS710_PAL_ATOMIC_ISOLATION_IN_BYTES );
299 lfds710_list_aso_init_valid_on_current_logical_core( &lps->logical_processors, libbenchmark_topology_node_compare_nodes_function, LFDS710_LIST_ASO_INSERT_RESULT_FAILURE_EXISTING_KEY, NULL );
304 while( lps_count < lowest_level_type_count*(loop+1) and lfds710_btree_au_get_by_absolute_position_and_then_by_relative_position(&ts->topology_tree, &be, LFDS710_BTREE_AU_ABSOLUTE_POSITION_SMALLEST_IN_TREE, LFDS710_BTREE_AU_RELATIVE_POSITION_NEXT_LARGER_ELEMENT_IN_ENTIRE_TREE) )
306 node = LFDS710_BTREE_AU_GET_VALUE_FROM_ELEMENT( *be );
308 // TRD : if we've found a lowest level type...
309 if( 0 == libbenchmark_topology_node_compare_node_types_function(node, node_llc) )
311 // TRD : now use a temp copy of be and go LARGEST_TO_SMALLEST until we have (loop+1) LPs
315 while( lp_count < loop+1 and lfds710_btree_au_get_by_relative_position(&be_lp, LFDS710_BTREE_AU_RELATIVE_POSITION_NEXT_SMALLER_ELEMENT_IN_ENTIRE_TREE) )
317 node_lp = LFDS710_BTREE_AU_GET_VALUE_FROM_ELEMENT( *be_lp );
319 if( node_lp->type == LIBBENCHMARK_TOPOLOGY_NODE_TYPE_LOGICAL_PROCESSOR )
321 new_lp = libshared_memory_alloc_from_most_free_space_node( ms, sizeof(struct libbenchmark_topology_node_state), LFDS710_PAL_ATOMIC_ISOLATION_IN_BYTES );
323 LFDS710_LIST_ASO_SET_KEY_IN_ELEMENT( new_lp->lasoe, new_lp );
324 LFDS710_LIST_ASO_SET_VALUE_IN_ELEMENT( new_lp->lasoe, new_lp );
325 lfds710_list_aso_insert( &lps->logical_processors, &new_lp->lasoe, NULL );
333 LFDS710_LIST_ASU_SET_KEY_IN_ELEMENT( lps->lasue, lps );
334 LFDS710_LIST_ASU_SET_VALUE_IN_ELEMENT( lps->lasue, lps );
335 lfds710_list_asu_insert_at_end( lp_sets, &lps->lasue );
345 /****************************************************************************/
346 static void libbenchmark_topology_internal_generate_thread_set_all_lps_per_lowest_level_cache( struct libbenchmark_topology_state *ts, struct libshared_memory_state *ms, struct lfds710_list_asu_state *lp_sets )
350 lowest_level_type_count = 0,
354 struct lfds710_btree_au_element
359 struct libbenchmark_topology_logical_processor_set
362 struct libbenchmark_topology_node_state
368 LFDS710_PAL_ASSERT( ts != NULL );
369 LFDS710_PAL_ASSERT( ms != NULL );
370 LFDS710_PAL_ASSERT( lp_sets != NULL );
372 /* TRD : we find the lowest level cache type
373 there are a certain number of these caches in the system
374 each will service a certain number of logical processors
376 each lowest level type has a given number of logical processors
377 (the number is the same for each lowest level type)
378 we create one thread set per lowest level type,
379 where we're adding in full blocks of lowest level type LPs,
380 e.g. the first set has all the LPs of the first lowest level
381 type, the second set has all the LPs of the first and second
382 lowest level types, enode
385 // TRD : find lowest level memory, bus or cache
386 while( lfds710_btree_au_get_by_absolute_position_and_then_by_relative_position(&ts->topology_tree, &be_llc, LFDS710_BTREE_AU_ABSOLUTE_POSITION_SMALLEST_IN_TREE, LFDS710_BTREE_AU_RELATIVE_POSITION_NEXT_LARGER_ELEMENT_IN_ENTIRE_TREE) )
388 node_llc = LFDS710_BTREE_AU_GET_VALUE_FROM_ELEMENT( *be_llc );
390 if( node_llc->type == LIBBENCHMARK_TOPOLOGY_NODE_TYPE_SYSTEM or node_llc->type == LIBBENCHMARK_TOPOLOGY_NODE_TYPE_NUMA or node_llc->type == LIBBENCHMARK_TOPOLOGY_NODE_TYPE_CACHE )
394 /* TRD : count the number of LPs under the lowest level type
395 since be_llc points at the smallest of the lowest level types
396 we will once we've counted all it's LPs naturally exit the tree
397 since we're walking LFDS710_ADDONLY_UNBALANCED_BTREE_WALK_FROM_LARGEST_TO_SMALLEST
401 while( lfds710_btree_au_get_by_relative_position(&be_lp, LFDS710_BTREE_AU_RELATIVE_POSITION_NEXT_SMALLER_ELEMENT_IN_ENTIRE_TREE) )
403 node_lp = LFDS710_BTREE_AU_GET_VALUE_FROM_ELEMENT( *be_lp );
405 if( node_lp->type == LIBBENCHMARK_TOPOLOGY_NODE_TYPE_LOGICAL_PROCESSOR )
409 // TRD : count the number of the lowest level type
410 while( lfds710_btree_au_get_by_absolute_position_and_then_by_relative_position(&ts->topology_tree, &be, LFDS710_BTREE_AU_ABSOLUTE_POSITION_LARGEST_IN_TREE, LFDS710_BTREE_AU_RELATIVE_POSITION_NEXT_SMALLER_ELEMENT_IN_ENTIRE_TREE) )
412 node = LFDS710_BTREE_AU_GET_VALUE_FROM_ELEMENT( *be );
414 if( 0 == libbenchmark_topology_node_compare_node_types_function(node, node_llc) )
415 lowest_level_type_count++;
418 // TRD : create the thread sets
419 for( loop = 0 ; loop < lowest_level_type_count ; loop++ )
421 /* TRD : find the first 0 to (loop+1) lowest level types
422 add all LPs under those lowest level types to the set
425 lps = libshared_memory_alloc_from_most_free_space_node( ms, sizeof(struct libbenchmark_topology_logical_processor_set), LFDS710_PAL_ATOMIC_ISOLATION_IN_BYTES );
426 lfds710_list_aso_init_valid_on_current_logical_core( &lps->logical_processors, libbenchmark_topology_node_compare_nodes_function, LFDS710_LIST_ASO_INSERT_RESULT_FAILURE_EXISTING_KEY, NULL );
430 while( lps_count < lp_per_llc*(loop+1) and lfds710_btree_au_get_by_absolute_position_and_then_by_relative_position(&ts->topology_tree, &be, LFDS710_BTREE_AU_ABSOLUTE_POSITION_SMALLEST_IN_TREE, LFDS710_BTREE_AU_RELATIVE_POSITION_NEXT_LARGER_ELEMENT_IN_ENTIRE_TREE) )
432 node = LFDS710_BTREE_AU_GET_VALUE_FROM_ELEMENT( *be );
434 // TRD : if we've found a lowest level type...
435 if( 0 == libbenchmark_topology_node_compare_node_types_function(node, node_llc) )
437 // TRD : now use a temp copy of be and go LARGEST_TO_SMALLEST until we exit the tree or find another lowest level type
440 while( lfds710_btree_au_get_by_relative_position(&be_lp, LFDS710_BTREE_AU_RELATIVE_POSITION_NEXT_SMALLER_ELEMENT_IN_ENTIRE_TREE) )
442 node_lp = LFDS710_BTREE_AU_GET_VALUE_FROM_ELEMENT( *be_lp );
444 if( node_lp->type == LIBBENCHMARK_TOPOLOGY_NODE_TYPE_LOGICAL_PROCESSOR )
447 new_lp = libshared_memory_alloc_from_most_free_space_node( ms, sizeof(struct libbenchmark_topology_node_state), LFDS710_PAL_ATOMIC_ISOLATION_IN_BYTES );
449 LFDS710_LIST_ASO_SET_KEY_IN_ELEMENT( new_lp->lasoe, new_lp );
450 LFDS710_LIST_ASO_SET_VALUE_IN_ELEMENT( new_lp->lasoe, new_lp );
451 lfds710_list_aso_insert( &lps->logical_processors, &new_lp->lasoe, NULL );
455 if( node_lp->type == node_llc->type )
461 LFDS710_LIST_ASU_SET_KEY_IN_ELEMENT( lps->lasue, lps );
462 LFDS710_LIST_ASU_SET_VALUE_IN_ELEMENT( lps->lasue, lps );
463 lfds710_list_asu_insert_at_end( lp_sets, &lps->lasue );
473 /****************************************************************************/
474 static void libbenchmark_topology_internal_generate_thread_set_all_lps( struct libbenchmark_topology_state *ts, struct libshared_memory_state *ms, struct lfds710_list_asu_state *lp_sets )
476 struct lfds710_btree_au_element
479 struct libbenchmark_topology_logical_processor_set
482 struct libbenchmark_topology_node_state
486 LFDS710_PAL_ASSERT( ts != NULL );
487 LFDS710_PAL_ASSERT( ms != NULL );
488 LFDS710_PAL_ASSERT( lp_sets != NULL );
490 lps = libshared_memory_alloc_from_most_free_space_node( ms, sizeof(struct libbenchmark_topology_logical_processor_set), LFDS710_PAL_ATOMIC_ISOLATION_IN_BYTES );
491 lfds710_list_aso_init_valid_on_current_logical_core( &lps->logical_processors, libbenchmark_topology_node_compare_nodes_function, LFDS710_LIST_ASO_INSERT_RESULT_FAILURE_EXISTING_KEY, NULL );
493 // TRD : iterate over tree - add in every logical processor
494 while( lfds710_btree_au_get_by_absolute_position_and_then_by_relative_position(&ts->topology_tree, &be, LFDS710_BTREE_AU_ABSOLUTE_POSITION_SMALLEST_IN_TREE, LFDS710_BTREE_AU_RELATIVE_POSITION_NEXT_LARGER_ELEMENT_IN_ENTIRE_TREE) )
496 node = LFDS710_BTREE_AU_GET_VALUE_FROM_ELEMENT( *be );
498 if( node->type == LIBBENCHMARK_TOPOLOGY_NODE_TYPE_LOGICAL_PROCESSOR )
501 new_lp = libshared_memory_alloc_from_most_free_space_node( ms, sizeof(struct libbenchmark_topology_node_state), LFDS710_PAL_ATOMIC_ISOLATION_IN_BYTES );
503 LFDS710_LIST_ASO_SET_KEY_IN_ELEMENT( new_lp->lasoe, new_lp );
504 LFDS710_LIST_ASO_SET_VALUE_IN_ELEMENT( new_lp->lasoe, new_lp );
505 lfds710_list_aso_insert( &lps->logical_processors, &new_lp->lasoe, NULL );
509 LFDS710_LIST_ASU_SET_KEY_IN_ELEMENT( lps->lasue, lps );
510 LFDS710_LIST_ASU_SET_VALUE_IN_ELEMENT( lps->lasue, lps );
511 lfds710_list_asu_insert_at_end( lp_sets, &lps->lasue );