/***** includes *****/
#include "internal.h"


/****************************************************************************/
#if( defined _WIN32 && !defined KERNEL_MODE && NTDDI_VERSION >= NTDDI_VISTA )

  #ifdef BENCHMARK_PAL_NUMA_MALLOC
    #error More than one porting abstraction layer matches the current platform in porting_abstraction_malloc.c
  #endif

  #define BENCHMARK_PAL_NUMA_MALLOC

  void *benchmark_pal_numa_malloc( lfds710_pal_uint_t numa_node_id, lfds710_pal_uint_t size_in_bytes )
  {
    HANDLE
      process_handle;

    LPVOID
      memory;

    // TRD : numa_node_id can be any value in its range
    // TRD : size_in_bytes can be any value in its range

    process_handle = GetCurrentProcess();

    memory = VirtualAllocExNuma( process_handle, NULL, size_in_bytes, MEM_COMMIT, PAGE_READWRITE, (DWORD) numa_node_id );

    return memory;
  }

#endif


/****************************************************************************/
#if( defined __linux__ && defined LIBNUMA )

  #ifdef BENCHMARK_PAL_NUMA_MALLOC
    #error More than one porting abstraction layer matches the current platform in porting_abstraction_malloc.c
  #endif

  #define BENCHMARK_PAL_NUMA_MALLOC

  void *benchmark_pal_numa_malloc( lfds710_pal_uint_t numa_node_id, lfds710_pal_uint_t size_in_bytes )
  {
    void
      *memory;

    // TRD : numa_node_id can be any value in its range
    // TRD : size_in_bytes can be any value in its range

    memory = numa_alloc_onnode( size_in_bytes, (int) numa_node_id );

    /* TRD : mlock prevents paging
             this is unfortunately necessary on Linux
             due to serious shortcomings in the way NUMA is handled

             in particular that the NUMA node is re-chosen if a memory page is paged out and then paged back in
             but also because Linux doesn't page in a single page at a time, but a line of pages
             so another process can end up moving *your* pages into *its* NUMA node (e.g. your pages are
             in the line of pages), because the NUMA policy for *its* pages would put them in that node!

             it seems to me this is one of the very rare occasions
             where Windows has something right and Linux has it wrong
             (Windows has the notion of an ideal NUMA node for a thread, and continually works
              to move any pages which leave that node back into that node, and on page-in will
              try first to re-use that node)

             since we use small amounts of memory, I address the whole sorry mess
             simply by locking the pages into memory - this way they will stay in the NUMA node
             they were allocated into (assuming they've not been paged out and then back in,
             between the numa_alloc_onnode() call and the mlock() call)
    */

    #if( defined _POSIX_MEMLOCK_RANGE > 0 )
      mlock( memory, size_in_bytes );
    #endif

    return memory;
  }

#endif