123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292 |
- // Copyright (c) 2016, libnumaapi authors
- //
- // Permission is hereby granted, free of charge, to any person obtaining a copy
- // of this software and associated documentation files (the "Software"), to
- // deal in the Software without restriction, including without limitation the
- // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- // sell copies of the Software, and to permit persons to whom the Software is
- // furnished to do so, subject to the following conditions:
- //
- // The above copyright notice and this permission notice shall be included in
- // all copies or substantial portions of the Software.
- //
- // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- // IN THE SOFTWARE.
- //
- // Author: Sergey Sharybin (sergey.vfx@gmail.com)
- #include "build_config.h"
- #if OS_WIN
- #include "numaapi.h"
- #ifndef NOGDI
- # define NOGDI
- #endif
- #ifndef NOMINMAX
- # define NOMINMAX
- #endif
- #ifndef WIN32_LEAN_AND_MEAN
- # define WIN32_LEAN_AND_MEAN
- #endif
- #ifndef NOCOMM
- # define NOCOMM
- #endif
- #include <stdlib.h>
- #include <stdint.h>
- #include <windows.h>
- #if ARCH_CPU_64_BITS
- # include <VersionHelpers.h>
- #endif
- ////////////////////////////////////////////////////////////////////////////////
- // Initialization.
- // Kernel library, from where the symbols come.
- static HMODULE kernel_lib;
- // Types of all symbols which are read from the library.
- // NUMA function types.
- typedef BOOL t_GetNumaHighestNodeNumber(PULONG highest_node_number);
- typedef BOOL t_GetNumaNodeProcessorMask(UCHAR node, ULONGLONG* processor_mask);
- typedef BOOL t_GetNumaNodeProcessorMaskEx(USHORT node,
- GROUP_AFFINITY* processor_mask);
- typedef BOOL t_GetNumaProcessorNode(UCHAR processor, UCHAR* node_number);
- typedef void* t_VirtualAllocExNuma(HANDLE process_handle,
- LPVOID address,
- SIZE_T size,
- DWORD allocation_type,
- DWORD protect,
- DWORD preferred);
- typedef BOOL t_VirtualFree(void* address, SIZE_T size, DWORD free_type);
- // Threading function types.
- typedef BOOL t_SetProcessAffinityMask(HANDLE process_handle,
- DWORD_PTR process_affinity_mask);
- typedef BOOL t_SetThreadGroupAffinity(HANDLE thread_handle,
- const GROUP_AFFINITY* group_affinity,
- GROUP_AFFINITY* PreviousGroupAffinity);
- typedef BOOL t_GetThreadGroupAffinity(HANDLE thread_handle,
- GROUP_AFFINITY* group_affinity);
- typedef DWORD t_GetCurrentProcessorNumber(void);
- typedef void t_GetCurrentProcessorNumberEx(PROCESSOR_NUMBER* proc_number);
- typedef DWORD t_GetActiveProcessorCount(WORD group_number);
- // NUMA symbols.
- static t_GetNumaHighestNodeNumber* _GetNumaHighestNodeNumber;
- static t_GetNumaNodeProcessorMask* _GetNumaNodeProcessorMask;
- static t_GetNumaNodeProcessorMaskEx* _GetNumaNodeProcessorMaskEx;
- static t_GetNumaProcessorNode* _GetNumaProcessorNode;
- static t_VirtualAllocExNuma* _VirtualAllocExNuma;
- static t_VirtualFree* _VirtualFree;
- // Threading symbols.
- static t_SetProcessAffinityMask* _SetProcessAffinityMask;
- static t_SetThreadGroupAffinity* _SetThreadGroupAffinity;
- static t_GetThreadGroupAffinity* _GetThreadGroupAffinity;
- static t_GetCurrentProcessorNumber* _GetCurrentProcessorNumber;
- static t_GetCurrentProcessorNumberEx* _GetCurrentProcessorNumberEx;
- static t_GetActiveProcessorCount* _GetActiveProcessorCount;
- static void numaExit(void) {
- // TODO(sergey): Consider closing library here.
- }
- static NUMAAPI_Result loadNumaSymbols(void) {
- // Prevent multiple initializations.
- static bool initialized = false;
- static NUMAAPI_Result result = NUMAAPI_NOT_AVAILABLE;
- if (initialized) {
- return result;
- }
- initialized = true;
- // Register de-initialization.
- const int error = atexit(numaExit);
- if (error) {
- result = NUMAAPI_ERROR_ATEXIT;
- return result;
- }
- // Load library.
- kernel_lib = LoadLibraryA("Kernel32.dll");
- // Load symbols.
- #define _LIBRARY_FIND(lib, name) \
- do { \
- _##name = (t_##name *)GetProcAddress(lib, #name); \
- } while (0)
- #define KERNEL_LIBRARY_FIND(name) _LIBRARY_FIND(kernel_lib, name)
- // NUMA.
- KERNEL_LIBRARY_FIND(GetNumaHighestNodeNumber);
- KERNEL_LIBRARY_FIND(GetNumaNodeProcessorMask);
- KERNEL_LIBRARY_FIND(GetNumaNodeProcessorMaskEx);
- KERNEL_LIBRARY_FIND(GetNumaProcessorNode);
- KERNEL_LIBRARY_FIND(VirtualAllocExNuma);
- KERNEL_LIBRARY_FIND(VirtualFree);
- // Threading.
- KERNEL_LIBRARY_FIND(SetProcessAffinityMask);
- KERNEL_LIBRARY_FIND(SetThreadGroupAffinity);
- KERNEL_LIBRARY_FIND(GetThreadGroupAffinity);
- KERNEL_LIBRARY_FIND(GetCurrentProcessorNumber);
- KERNEL_LIBRARY_FIND(GetCurrentProcessorNumberEx);
- KERNEL_LIBRARY_FIND(GetActiveProcessorCount);
- #undef KERNEL_LIBRARY_FIND
- #undef _LIBRARY_FIND
- result = NUMAAPI_SUCCESS;
- return result;
- }
- NUMAAPI_Result numaAPI_Initialize(void) {
- #if !ARCH_CPU_64_BITS
- // No NUMA on 32 bit platforms.
- return NUMAAPI_NOT_AVAILABLE;
- #else
- if (!IsWindows7OrGreater()) {
- // Require Windows 7 or higher.
- NUMAAPI_NOT_AVAILABLE;
- }
- loadNumaSymbols();
- return NUMAAPI_SUCCESS;
- #endif
- }
- ////////////////////////////////////////////////////////////////////////////////
- // Internal helpers.
- static int countNumSetBits(ULONGLONG mask) {
- // TODO(sergey): There might be faster way calculating number of set bits.
- // NOTE: mask must be unsigned, there is undefined behavior for signed ints.
- int num_bits = 0;
- while (mask != 0) {
- num_bits += (mask & 1);
- mask = (mask >> 1);
- }
- return num_bits;
- }
- ////////////////////////////////////////////////////////////////////////////////
- // Topology query.
- int numaAPI_GetNumNodes(void) {
- ULONG highest_node_number;
- if (!_GetNumaHighestNodeNumber(&highest_node_number)) {
- return 0;
- }
- // TODO(sergey): Resolve the type narrowing.
- // NOTE: This is not necessarily a total amount of nodes in the system.
- return (int)highest_node_number + 1;
- }
- bool numaAPI_IsNodeAvailable(int node) {
- // Trick to detect whether the node is usable or not: check whether
- // there are any processors associated with it.
- //
- // This is needed because numaApiGetNumNodes() is not guaranteed to
- // give total amount of nodes and some nodes might be unavailable.
- ULONGLONG processor_mask;
- if (!_GetNumaNodeProcessorMask(node, &processor_mask)) {
- return false;
- }
- if (processor_mask == 0) {
- return false;
- }
- return true;
- }
- int numaAPI_GetNumNodeProcessors(int node) {
- ULONGLONG processor_mask;
- if (!_GetNumaNodeProcessorMask(node, &processor_mask)) {
- return 0;
- }
- return countNumSetBits(processor_mask);
- }
- ////////////////////////////////////////////////////////////////////////////////
- // Topology helpers.
- int numaAPI_GetNumCurrentNodesProcessors(void) {
- HANDLE thread_handle = GetCurrentThread();
- GROUP_AFFINITY group_affinity;
- // TODO(sergey): Needs implementation.
- if (!_GetThreadGroupAffinity(thread_handle, &group_affinity)) {
- return 0;
- }
- // First, count number of possible bits in the affinity mask.
- const int num_processors = countNumSetBits(group_affinity.Mask);
- // Then check that it's not exceeding number of processors in tjhe group.
- const int num_group_processors =
- _GetActiveProcessorCount(group_affinity.Group);
- if (num_group_processors < num_processors) {
- return num_group_processors;
- }
- return num_processors;
- }
- ////////////////////////////////////////////////////////////////////////////////
- // Affinities.
- bool numaAPI_RunProcessOnNode(int node) {
- // TODO(sergey): Make sure requested node is within active CPU group.
- // Change affinity of the proces to make it to run on a given node.
- HANDLE process_handle = GetCurrentProcess();
- ULONGLONG processor_mask;
- if (_GetNumaNodeProcessorMask(node, &processor_mask) == 0) {
- return false;
- }
- if (_SetProcessAffinityMask(process_handle, processor_mask) == 0) {
- return false;
- }
- return true;
- }
- bool numaAPI_RunThreadOnNode(int node) {
- HANDLE thread_handle = GetCurrentThread();
- GROUP_AFFINITY group_affinity = { 0 };
- if (_GetNumaNodeProcessorMaskEx(node, &group_affinity) == 0) {
- return false;
- }
- if (_SetThreadGroupAffinity(thread_handle, &group_affinity, NULL) == 0) {
- return false;
- }
- return true;
- }
- ////////////////////////////////////////////////////////////////////////////////
- // Memory management.
- void* numaAPI_AllocateOnNode(size_t size, int node) {
- return _VirtualAllocExNuma(GetCurrentProcess(),
- NULL,
- size,
- MEM_RESERVE | MEM_COMMIT,
- PAGE_READWRITE,
- node);
- }
- void* numaAPI_AllocateLocal(size_t size) {
- UCHAR current_processor = (UCHAR)_GetCurrentProcessorNumber();
- UCHAR node;
- if (!_GetNumaProcessorNode(current_processor, &node)) {
- return NULL;
- }
- return numaAPI_AllocateOnNode(size, node);
- }
- void numaAPI_Free(void* start, size_t size) {
- if (!_VirtualFree(start, size, MEM_RELEASE)) {
- // TODO(sergey): Throw an error!
- }
- }
- #endif // OS_WIN
|