numaapi_win32.c 9.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292
  1. // Copyright (c) 2016, libnumaapi authors
  2. //
  3. // Permission is hereby granted, free of charge, to any person obtaining a copy
  4. // of this software and associated documentation files (the "Software"), to
  5. // deal in the Software without restriction, including without limitation the
  6. // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
  7. // sell copies of the Software, and to permit persons to whom the Software is
  8. // furnished to do so, subject to the following conditions:
  9. //
  10. // The above copyright notice and this permission notice shall be included in
  11. // all copies or substantial portions of the Software.
  12. //
  13. // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  14. // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  15. // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  16. // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  17. // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  18. // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  19. // IN THE SOFTWARE.
  20. //
  21. // Author: Sergey Sharybin (sergey.vfx@gmail.com)
  22. #include "build_config.h"
  23. #if OS_WIN
  24. #include "numaapi.h"
  25. #ifndef NOGDI
  26. # define NOGDI
  27. #endif
  28. #ifndef NOMINMAX
  29. # define NOMINMAX
  30. #endif
  31. #ifndef WIN32_LEAN_AND_MEAN
  32. # define WIN32_LEAN_AND_MEAN
  33. #endif
  34. #ifndef NOCOMM
  35. # define NOCOMM
  36. #endif
  37. #include <stdlib.h>
  38. #include <stdint.h>
  39. #include <windows.h>
  40. #if ARCH_CPU_64_BITS
  41. # include <VersionHelpers.h>
  42. #endif
  43. ////////////////////////////////////////////////////////////////////////////////
  44. // Initialization.
  45. // Kernel library, from where the symbols come.
  46. static HMODULE kernel_lib;
  47. // Types of all symbols which are read from the library.
  48. // NUMA function types.
  49. typedef BOOL t_GetNumaHighestNodeNumber(PULONG highest_node_number);
  50. typedef BOOL t_GetNumaNodeProcessorMask(UCHAR node, ULONGLONG* processor_mask);
  51. typedef BOOL t_GetNumaNodeProcessorMaskEx(USHORT node,
  52. GROUP_AFFINITY* processor_mask);
  53. typedef BOOL t_GetNumaProcessorNode(UCHAR processor, UCHAR* node_number);
  54. typedef void* t_VirtualAllocExNuma(HANDLE process_handle,
  55. LPVOID address,
  56. SIZE_T size,
  57. DWORD allocation_type,
  58. DWORD protect,
  59. DWORD preferred);
  60. typedef BOOL t_VirtualFree(void* address, SIZE_T size, DWORD free_type);
  61. // Threading function types.
  62. typedef BOOL t_SetProcessAffinityMask(HANDLE process_handle,
  63. DWORD_PTR process_affinity_mask);
  64. typedef BOOL t_SetThreadGroupAffinity(HANDLE thread_handle,
  65. const GROUP_AFFINITY* group_affinity,
  66. GROUP_AFFINITY* PreviousGroupAffinity);
  67. typedef BOOL t_GetThreadGroupAffinity(HANDLE thread_handle,
  68. GROUP_AFFINITY* group_affinity);
  69. typedef DWORD t_GetCurrentProcessorNumber(void);
  70. typedef void t_GetCurrentProcessorNumberEx(PROCESSOR_NUMBER* proc_number);
  71. typedef DWORD t_GetActiveProcessorCount(WORD group_number);
  72. // NUMA symbols.
  73. static t_GetNumaHighestNodeNumber* _GetNumaHighestNodeNumber;
  74. static t_GetNumaNodeProcessorMask* _GetNumaNodeProcessorMask;
  75. static t_GetNumaNodeProcessorMaskEx* _GetNumaNodeProcessorMaskEx;
  76. static t_GetNumaProcessorNode* _GetNumaProcessorNode;
  77. static t_VirtualAllocExNuma* _VirtualAllocExNuma;
  78. static t_VirtualFree* _VirtualFree;
  79. // Threading symbols.
  80. static t_SetProcessAffinityMask* _SetProcessAffinityMask;
  81. static t_SetThreadGroupAffinity* _SetThreadGroupAffinity;
  82. static t_GetThreadGroupAffinity* _GetThreadGroupAffinity;
  83. static t_GetCurrentProcessorNumber* _GetCurrentProcessorNumber;
  84. static t_GetCurrentProcessorNumberEx* _GetCurrentProcessorNumberEx;
  85. static t_GetActiveProcessorCount* _GetActiveProcessorCount;
  86. static void numaExit(void) {
  87. // TODO(sergey): Consider closing library here.
  88. }
  89. static NUMAAPI_Result loadNumaSymbols(void) {
  90. // Prevent multiple initializations.
  91. static bool initialized = false;
  92. static NUMAAPI_Result result = NUMAAPI_NOT_AVAILABLE;
  93. if (initialized) {
  94. return result;
  95. }
  96. initialized = true;
  97. // Register de-initialization.
  98. const int error = atexit(numaExit);
  99. if (error) {
  100. result = NUMAAPI_ERROR_ATEXIT;
  101. return result;
  102. }
  103. // Load library.
  104. kernel_lib = LoadLibraryA("Kernel32.dll");
  105. // Load symbols.
  106. #define _LIBRARY_FIND(lib, name) \
  107. do { \
  108. _##name = (t_##name *)GetProcAddress(lib, #name); \
  109. } while (0)
  110. #define KERNEL_LIBRARY_FIND(name) _LIBRARY_FIND(kernel_lib, name)
  111. // NUMA.
  112. KERNEL_LIBRARY_FIND(GetNumaHighestNodeNumber);
  113. KERNEL_LIBRARY_FIND(GetNumaNodeProcessorMask);
  114. KERNEL_LIBRARY_FIND(GetNumaNodeProcessorMaskEx);
  115. KERNEL_LIBRARY_FIND(GetNumaProcessorNode);
  116. KERNEL_LIBRARY_FIND(VirtualAllocExNuma);
  117. KERNEL_LIBRARY_FIND(VirtualFree);
  118. // Threading.
  119. KERNEL_LIBRARY_FIND(SetProcessAffinityMask);
  120. KERNEL_LIBRARY_FIND(SetThreadGroupAffinity);
  121. KERNEL_LIBRARY_FIND(GetThreadGroupAffinity);
  122. KERNEL_LIBRARY_FIND(GetCurrentProcessorNumber);
  123. KERNEL_LIBRARY_FIND(GetCurrentProcessorNumberEx);
  124. KERNEL_LIBRARY_FIND(GetActiveProcessorCount);
  125. #undef KERNEL_LIBRARY_FIND
  126. #undef _LIBRARY_FIND
  127. result = NUMAAPI_SUCCESS;
  128. return result;
  129. }
  130. NUMAAPI_Result numaAPI_Initialize(void) {
  131. #if !ARCH_CPU_64_BITS
  132. // No NUMA on 32 bit platforms.
  133. return NUMAAPI_NOT_AVAILABLE;
  134. #else
  135. if (!IsWindows7OrGreater()) {
  136. // Require Windows 7 or higher.
  137. NUMAAPI_NOT_AVAILABLE;
  138. }
  139. loadNumaSymbols();
  140. return NUMAAPI_SUCCESS;
  141. #endif
  142. }
  143. ////////////////////////////////////////////////////////////////////////////////
  144. // Internal helpers.
  145. static int countNumSetBits(ULONGLONG mask) {
  146. // TODO(sergey): There might be faster way calculating number of set bits.
  147. // NOTE: mask must be unsigned, there is undefined behavior for signed ints.
  148. int num_bits = 0;
  149. while (mask != 0) {
  150. num_bits += (mask & 1);
  151. mask = (mask >> 1);
  152. }
  153. return num_bits;
  154. }
  155. ////////////////////////////////////////////////////////////////////////////////
  156. // Topology query.
  157. int numaAPI_GetNumNodes(void) {
  158. ULONG highest_node_number;
  159. if (!_GetNumaHighestNodeNumber(&highest_node_number)) {
  160. return 0;
  161. }
  162. // TODO(sergey): Resolve the type narrowing.
  163. // NOTE: This is not necessarily a total amount of nodes in the system.
  164. return (int)highest_node_number + 1;
  165. }
  166. bool numaAPI_IsNodeAvailable(int node) {
  167. // Trick to detect whether the node is usable or not: check whether
  168. // there are any processors associated with it.
  169. //
  170. // This is needed because numaApiGetNumNodes() is not guaranteed to
  171. // give total amount of nodes and some nodes might be unavailable.
  172. ULONGLONG processor_mask;
  173. if (!_GetNumaNodeProcessorMask(node, &processor_mask)) {
  174. return false;
  175. }
  176. if (processor_mask == 0) {
  177. return false;
  178. }
  179. return true;
  180. }
  181. int numaAPI_GetNumNodeProcessors(int node) {
  182. ULONGLONG processor_mask;
  183. if (!_GetNumaNodeProcessorMask(node, &processor_mask)) {
  184. return 0;
  185. }
  186. return countNumSetBits(processor_mask);
  187. }
  188. ////////////////////////////////////////////////////////////////////////////////
  189. // Topology helpers.
  190. int numaAPI_GetNumCurrentNodesProcessors(void) {
  191. HANDLE thread_handle = GetCurrentThread();
  192. GROUP_AFFINITY group_affinity;
  193. // TODO(sergey): Needs implementation.
  194. if (!_GetThreadGroupAffinity(thread_handle, &group_affinity)) {
  195. return 0;
  196. }
  197. // First, count number of possible bits in the affinity mask.
  198. const int num_processors = countNumSetBits(group_affinity.Mask);
  199. // Then check that it's not exceeding number of processors in tjhe group.
  200. const int num_group_processors =
  201. _GetActiveProcessorCount(group_affinity.Group);
  202. if (num_group_processors < num_processors) {
  203. return num_group_processors;
  204. }
  205. return num_processors;
  206. }
  207. ////////////////////////////////////////////////////////////////////////////////
  208. // Affinities.
  209. bool numaAPI_RunProcessOnNode(int node) {
  210. // TODO(sergey): Make sure requested node is within active CPU group.
  211. // Change affinity of the proces to make it to run on a given node.
  212. HANDLE process_handle = GetCurrentProcess();
  213. ULONGLONG processor_mask;
  214. if (_GetNumaNodeProcessorMask(node, &processor_mask) == 0) {
  215. return false;
  216. }
  217. if (_SetProcessAffinityMask(process_handle, processor_mask) == 0) {
  218. return false;
  219. }
  220. return true;
  221. }
  222. bool numaAPI_RunThreadOnNode(int node) {
  223. HANDLE thread_handle = GetCurrentThread();
  224. GROUP_AFFINITY group_affinity = { 0 };
  225. if (_GetNumaNodeProcessorMaskEx(node, &group_affinity) == 0) {
  226. return false;
  227. }
  228. if (_SetThreadGroupAffinity(thread_handle, &group_affinity, NULL) == 0) {
  229. return false;
  230. }
  231. return true;
  232. }
  233. ////////////////////////////////////////////////////////////////////////////////
  234. // Memory management.
  235. void* numaAPI_AllocateOnNode(size_t size, int node) {
  236. return _VirtualAllocExNuma(GetCurrentProcess(),
  237. NULL,
  238. size,
  239. MEM_RESERVE | MEM_COMMIT,
  240. PAGE_READWRITE,
  241. node);
  242. }
  243. void* numaAPI_AllocateLocal(size_t size) {
  244. UCHAR current_processor = (UCHAR)_GetCurrentProcessorNumber();
  245. UCHAR node;
  246. if (!_GetNumaProcessorNode(current_processor, &node)) {
  247. return NULL;
  248. }
  249. return numaAPI_AllocateOnNode(size, node);
  250. }
  251. void numaAPI_Free(void* start, size_t size) {
  252. if (!_VirtualFree(start, size, MEM_RELEASE)) {
  253. // TODO(sergey): Throw an error!
  254. }
  255. }
  256. #endif // OS_WIN