numaapi_linux.c 9.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295
  1. // Copyright (c) 2016, libnumaapi authors
  2. //
  3. // Permission is hereby granted, free of charge, to any person obtaining a copy
  4. // of this software and associated documentation files (the "Software"), to
  5. // deal in the Software without restriction, including without limitation the
  6. // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
  7. // sell copies of the Software, and to permit persons to whom the Software is
  8. // furnished to do so, subject to the following conditions:
  9. //
  10. // The above copyright notice and this permission notice shall be included in
  11. // all copies or substantial portions of the Software.
  12. //
  13. // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  14. // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  15. // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  16. // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  17. // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  18. // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  19. // IN THE SOFTWARE.
  20. //
  21. // Author: Sergey Sharybin (sergey.vfx@gmail.com)
  22. #include "build_config.h"
  23. #if OS_LINUX
  24. #include "numaapi.h"
  25. #include <stdlib.h>
  26. #ifndef WITH_DYNLOAD
  27. # include <numa.h>
  28. #else
  29. # include <dlfcn.h>
  30. #endif
  31. #ifdef WITH_DYNLOAD
  32. // Descriptor numa library.
  33. static void* numa_lib;
  34. // Types of all symbols which are read from the library.
  35. struct bitmask;
  36. typedef int tnuma_available(void);
  37. typedef int tnuma_max_node(void);
  38. typedef int tnuma_node_to_cpus(int node, struct bitmask* mask);
  39. typedef long tnuma_node_size(int node, long* freep);
  40. typedef int tnuma_run_on_node(int node);
  41. typedef void* tnuma_alloc_onnode(size_t size, int node);
  42. typedef void* tnuma_alloc_local(size_t size);
  43. typedef void tnuma_free(void* start, size_t size);
  44. typedef struct bitmask* tnuma_bitmask_clearall(struct bitmask *bitmask);
  45. typedef int tnuma_bitmask_isbitset(const struct bitmask *bitmask,
  46. unsigned int n);
  47. typedef struct bitmask* tnuma_bitmask_setbit(struct bitmask *bitmask,
  48. unsigned int n);
  49. typedef unsigned int tnuma_bitmask_nbytes(struct bitmask *bitmask);
  50. typedef void tnuma_bitmask_free(struct bitmask *bitmask);
  51. typedef struct bitmask* tnuma_allocate_cpumask(void);
  52. typedef struct bitmask* tnuma_allocate_nodemask(void);
  53. typedef void tnuma_free_cpumask(struct bitmask* bitmask);
  54. typedef void tnuma_free_nodemask(struct bitmask* bitmask);
  55. typedef int tnuma_run_on_node_mask(struct bitmask *nodemask);
  56. typedef int tnuma_run_on_node_mask_all(struct bitmask *nodemask);
  57. typedef struct bitmask *tnuma_get_run_node_mask(void);
  58. typedef void tnuma_set_interleave_mask(struct bitmask *nodemask);
  59. typedef void tnuma_set_localalloc(void);
  60. // Actual symbols.
  61. static tnuma_available* numa_available;
  62. static tnuma_max_node* numa_max_node;
  63. static tnuma_node_to_cpus* numa_node_to_cpus;
  64. static tnuma_node_size* numa_node_size;
  65. static tnuma_run_on_node* numa_run_on_node;
  66. static tnuma_alloc_onnode* numa_alloc_onnode;
  67. static tnuma_alloc_local* numa_alloc_local;
  68. static tnuma_free* numa_free;
  69. static tnuma_bitmask_clearall* numa_bitmask_clearall;
  70. static tnuma_bitmask_isbitset* numa_bitmask_isbitset;
  71. static tnuma_bitmask_setbit* numa_bitmask_setbit;
  72. static tnuma_bitmask_nbytes* numa_bitmask_nbytes;
  73. static tnuma_bitmask_free* numa_bitmask_free;
  74. static tnuma_allocate_cpumask* numa_allocate_cpumask;
  75. static tnuma_allocate_nodemask* numa_allocate_nodemask;
  76. static tnuma_free_nodemask* numa_free_nodemask;
  77. static tnuma_free_cpumask* numa_free_cpumask;
  78. static tnuma_run_on_node_mask* numa_run_on_node_mask;
  79. static tnuma_run_on_node_mask_all* numa_run_on_node_mask_all;
  80. static tnuma_get_run_node_mask* numa_get_run_node_mask;
  81. static tnuma_set_interleave_mask* numa_set_interleave_mask;
  82. static tnuma_set_localalloc* numa_set_localalloc;
  83. static void* findLibrary(const char** paths) {
  84. int i = 0;
  85. while (paths[i] != NULL) {
  86. void* lib = dlopen(paths[i], RTLD_LAZY);
  87. if (lib != NULL) {
  88. return lib;
  89. }
  90. ++i;
  91. }
  92. return NULL;
  93. }
  94. static void numaExit(void) {
  95. if (numa_lib == NULL) {
  96. return;
  97. }
  98. dlclose(numa_lib);
  99. numa_lib = NULL;
  100. }
  101. static NUMAAPI_Result loadNumaSymbols(void) {
  102. // Prevent multiple initializations.
  103. static bool initialized = false;
  104. static NUMAAPI_Result result = NUMAAPI_NOT_AVAILABLE;
  105. if (initialized) {
  106. return result;
  107. }
  108. initialized = true;
  109. // Find appropriate .so library.
  110. const char* numa_paths[] = {
  111. "libnuma.so.1",
  112. "libnuma.so",
  113. NULL};
  114. // Register de-initialization.
  115. const int error = atexit(numaExit);
  116. if (error) {
  117. result = NUMAAPI_ERROR_ATEXIT;
  118. return result;
  119. }
  120. // Load library.
  121. numa_lib = findLibrary(numa_paths);
  122. if (numa_lib == NULL) {
  123. result = NUMAAPI_NOT_AVAILABLE;
  124. return result;
  125. }
  126. // Load symbols.
  127. #define _LIBRARY_FIND(lib, name) \
  128. do { \
  129. name = (t##name *)dlsym(lib, #name); \
  130. } while (0)
  131. #define NUMA_LIBRARY_FIND(name) _LIBRARY_FIND(numa_lib, name)
  132. NUMA_LIBRARY_FIND(numa_available);
  133. NUMA_LIBRARY_FIND(numa_max_node);
  134. NUMA_LIBRARY_FIND(numa_node_to_cpus);
  135. NUMA_LIBRARY_FIND(numa_node_size);
  136. NUMA_LIBRARY_FIND(numa_run_on_node);
  137. NUMA_LIBRARY_FIND(numa_alloc_onnode);
  138. NUMA_LIBRARY_FIND(numa_alloc_local);
  139. NUMA_LIBRARY_FIND(numa_free);
  140. NUMA_LIBRARY_FIND(numa_bitmask_clearall);
  141. NUMA_LIBRARY_FIND(numa_bitmask_isbitset);
  142. NUMA_LIBRARY_FIND(numa_bitmask_setbit);
  143. NUMA_LIBRARY_FIND(numa_bitmask_nbytes);
  144. NUMA_LIBRARY_FIND(numa_bitmask_free);
  145. NUMA_LIBRARY_FIND(numa_allocate_cpumask);
  146. NUMA_LIBRARY_FIND(numa_allocate_nodemask);
  147. NUMA_LIBRARY_FIND(numa_free_cpumask);
  148. NUMA_LIBRARY_FIND(numa_free_nodemask);
  149. NUMA_LIBRARY_FIND(numa_run_on_node_mask);
  150. NUMA_LIBRARY_FIND(numa_run_on_node_mask_all);
  151. NUMA_LIBRARY_FIND(numa_get_run_node_mask);
  152. NUMA_LIBRARY_FIND(numa_set_interleave_mask);
  153. NUMA_LIBRARY_FIND(numa_set_localalloc);
  154. #undef NUMA_LIBRARY_FIND
  155. #undef _LIBRARY_FIND
  156. result = NUMAAPI_SUCCESS;
  157. return result;
  158. }
  159. #endif
  160. ////////////////////////////////////////////////////////////////////////////////
  161. // Initialization.
  162. NUMAAPI_Result numaAPI_Initialize(void) {
  163. #ifdef WITH_DYNLOAD
  164. NUMAAPI_Result result = loadNumaSymbols();
  165. if (result != NUMAAPI_SUCCESS) {
  166. return result;
  167. }
  168. #endif
  169. if (numa_available() < 0) {
  170. return NUMAAPI_NOT_AVAILABLE;
  171. }
  172. return NUMAAPI_SUCCESS;
  173. }
  174. ////////////////////////////////////////////////////////////////////////////////
  175. // Topology query.
  176. int numaAPI_GetNumNodes(void) {
  177. return numa_max_node() + 1;
  178. }
  179. bool numaAPI_IsNodeAvailable(int node) {
  180. return numaAPI_GetNumNodeProcessors(node) > 0;
  181. }
  182. int numaAPI_GetNumNodeProcessors(int node) {
  183. struct bitmask* cpu_mask = numa_allocate_cpumask();
  184. numa_node_to_cpus(node, cpu_mask);
  185. const unsigned int num_bytes = numa_bitmask_nbytes(cpu_mask);
  186. const unsigned int num_bits = num_bytes * 8;
  187. // TODO(sergey): There might be faster way calculating number of set bits.
  188. int num_processors = 0;
  189. for (unsigned int bit = 0; bit < num_bits; ++bit) {
  190. if (numa_bitmask_isbitset(cpu_mask, bit)) {
  191. ++num_processors;
  192. }
  193. }
  194. #ifdef WITH_DYNLOAD
  195. if (numa_free_cpumask != NULL) {
  196. numa_free_cpumask(cpu_mask);
  197. } else {
  198. numa_bitmask_free(cpu_mask);
  199. }
  200. #else
  201. numa_free_cpumask(cpu_mask);
  202. #endif
  203. return num_processors;
  204. }
  205. ////////////////////////////////////////////////////////////////////////////////
  206. // Topology helpers.
  207. int numaAPI_GetNumCurrentNodesProcessors(void) {
  208. struct bitmask* node_mask = numa_get_run_node_mask();
  209. const unsigned int num_bytes = numa_bitmask_nbytes(node_mask);
  210. const unsigned int num_bits = num_bytes * 8;
  211. int num_processors = 0;
  212. for (unsigned int bit = 0; bit < num_bits; ++bit) {
  213. if (numa_bitmask_isbitset(node_mask, bit)) {
  214. num_processors += numaAPI_GetNumNodeProcessors(bit);
  215. }
  216. }
  217. numa_bitmask_free(node_mask);
  218. return num_processors;
  219. }
  220. ////////////////////////////////////////////////////////////////////////////////
  221. // Affinities.
  222. bool numaAPI_RunProcessOnNode(int node) {
  223. numaAPI_RunThreadOnNode(node);
  224. return true;
  225. }
  226. bool numaAPI_RunThreadOnNode(int node) {
  227. // Construct bit mask from node index.
  228. struct bitmask* node_mask = numa_allocate_nodemask();
  229. numa_bitmask_clearall(node_mask);
  230. numa_bitmask_setbit(node_mask, node);
  231. numa_run_on_node_mask_all(node_mask);
  232. // TODO(sergey): The following commands are based on x265 code, we might want
  233. // to make those optional, or require to call those explicitly.
  234. //
  235. // Current assumption is that this is similar to SetThreadGroupAffinity().
  236. if (numa_node_size(node, NULL) > 0) {
  237. numa_set_interleave_mask(node_mask);
  238. numa_set_localalloc();
  239. }
  240. #ifdef WITH_DYNLOAD
  241. if (numa_free_nodemask != NULL) {
  242. numa_free_nodemask(node_mask);
  243. } else {
  244. numa_bitmask_free(node_mask);
  245. }
  246. #else
  247. numa_free_nodemask(node_mask);
  248. #endif
  249. return true;
  250. }
  251. ////////////////////////////////////////////////////////////////////////////////
  252. // Memory management.
  253. void* numaAPI_AllocateOnNode(size_t size, int node) {
  254. return numa_alloc_onnode(size, node);
  255. }
  256. void* numaAPI_AllocateLocal(size_t size) {
  257. return numa_alloc_local(size);
  258. }
  259. void numaAPI_Free(void* start, size_t size) {
  260. numa_free(start, size);
  261. }
  262. #endif // OS_LINUX