util_system.cpp 8.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376
  1. /*
  2. * Copyright 2011-2013 Blender Foundation
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "util/util_system.h"
  17. #include "util/util_logging.h"
  18. #include "util/util_types.h"
  19. #include "util/util_string.h"
  20. #include <numaapi.h>
  21. #include <OpenImageIO/sysutil.h>
  22. OIIO_NAMESPACE_USING
  23. #ifdef _WIN32
  24. # if (!defined(FREE_WINDOWS))
  25. # include <intrin.h>
  26. # endif
  27. # include "util_windows.h"
  28. #elif defined(__APPLE__)
  29. # include <sys/ioctl.h>
  30. # include <sys/sysctl.h>
  31. # include <sys/types.h>
  32. #else
  33. # include <unistd.h>
  34. # include <sys/ioctl.h>
  35. #endif
  36. CCL_NAMESPACE_BEGIN
  37. bool system_cpu_ensure_initialized()
  38. {
  39. static bool is_initialized = false;
  40. static bool result = false;
  41. if (is_initialized) {
  42. return result;
  43. }
  44. is_initialized = true;
  45. const NUMAAPI_Result numa_result = numaAPI_Initialize();
  46. result = (numa_result == NUMAAPI_SUCCESS);
  47. return result;
  48. }
  49. /* Fallback solution, which doesn't use NUMA/CPU groups. */
  50. static int system_cpu_thread_count_fallback()
  51. {
  52. #ifdef _WIN32
  53. SYSTEM_INFO info;
  54. GetSystemInfo(&info);
  55. return info.dwNumberOfProcessors;
  56. #elif defined(__APPLE__)
  57. int count;
  58. size_t len = sizeof(count);
  59. int mib[2] = {CTL_HW, HW_NCPU};
  60. sysctl(mib, 2, &count, &len, NULL, 0);
  61. return count;
  62. #else
  63. return sysconf(_SC_NPROCESSORS_ONLN);
  64. #endif
  65. }
  66. int system_cpu_thread_count()
  67. {
  68. const int num_nodes = system_cpu_num_numa_nodes();
  69. int num_threads = 0;
  70. for (int node = 0; node < num_nodes; ++node) {
  71. if (!system_cpu_is_numa_node_available(node)) {
  72. continue;
  73. }
  74. num_threads += system_cpu_num_numa_node_processors(node);
  75. }
  76. return num_threads;
  77. }
  78. int system_cpu_num_numa_nodes()
  79. {
  80. if (!system_cpu_ensure_initialized()) {
  81. /* Fallback to a single node with all the threads. */
  82. return 1;
  83. }
  84. return numaAPI_GetNumNodes();
  85. }
  86. bool system_cpu_is_numa_node_available(int node)
  87. {
  88. if (!system_cpu_ensure_initialized()) {
  89. return true;
  90. }
  91. return numaAPI_IsNodeAvailable(node);
  92. }
  93. int system_cpu_num_numa_node_processors(int node)
  94. {
  95. if (!system_cpu_ensure_initialized()) {
  96. return system_cpu_thread_count_fallback();
  97. }
  98. return numaAPI_GetNumNodeProcessors(node);
  99. }
  100. bool system_cpu_run_thread_on_node(int node)
  101. {
  102. if (!system_cpu_ensure_initialized()) {
  103. return true;
  104. }
  105. return numaAPI_RunThreadOnNode(node);
  106. }
  107. int system_console_width()
  108. {
  109. int columns = 0;
  110. #ifdef _WIN32
  111. CONSOLE_SCREEN_BUFFER_INFO csbi;
  112. if (GetConsoleScreenBufferInfo(GetStdHandle(STD_OUTPUT_HANDLE), &csbi)) {
  113. columns = csbi.dwSize.X;
  114. }
  115. #else
  116. struct winsize w;
  117. if (ioctl(STDOUT_FILENO, TIOCGWINSZ, &w) == 0) {
  118. columns = w.ws_col;
  119. }
  120. #endif
  121. return (columns > 0) ? columns : 80;
  122. }
  123. int system_cpu_num_active_group_processors()
  124. {
  125. if (!system_cpu_ensure_initialized()) {
  126. return system_cpu_thread_count_fallback();
  127. }
  128. return numaAPI_GetNumCurrentNodesProcessors();
  129. }
  130. #if !defined(_WIN32) || defined(FREE_WINDOWS)
  131. static void __cpuid(int data[4], int selector)
  132. {
  133. # if defined(__x86_64__)
  134. asm("cpuid" : "=a"(data[0]), "=b"(data[1]), "=c"(data[2]), "=d"(data[3]) : "a"(selector));
  135. # elif defined(__i386__)
  136. asm("pushl %%ebx \n\t"
  137. "cpuid \n\t"
  138. "movl %%ebx, %1 \n\t"
  139. "popl %%ebx \n\t"
  140. : "=a"(data[0]), "=r"(data[1]), "=c"(data[2]), "=d"(data[3])
  141. : "a"(selector)
  142. : "ebx");
  143. # else
  144. data[0] = data[1] = data[2] = data[3] = 0;
  145. # endif
  146. }
  147. #endif
  148. string system_cpu_brand_string()
  149. {
  150. char buf[48] = {0};
  151. int result[4] = {0};
  152. __cpuid(result, 0x80000000);
  153. if (result[0] >= (int)0x80000004) {
  154. __cpuid((int *)(buf + 0), 0x80000002);
  155. __cpuid((int *)(buf + 16), 0x80000003);
  156. __cpuid((int *)(buf + 32), 0x80000004);
  157. string brand = buf;
  158. /* make it a bit more presentable */
  159. brand = string_remove_trademark(brand);
  160. return brand;
  161. }
  162. return "Unknown CPU";
  163. }
  164. int system_cpu_bits()
  165. {
  166. return (sizeof(void *) * 8);
  167. }
  168. #if defined(__x86_64__) || defined(_M_X64) || defined(i386) || defined(_M_IX86)
  169. struct CPUCapabilities {
  170. bool x64;
  171. bool mmx;
  172. bool sse;
  173. bool sse2;
  174. bool sse3;
  175. bool ssse3;
  176. bool sse41;
  177. bool sse42;
  178. bool sse4a;
  179. bool avx;
  180. bool f16c;
  181. bool avx2;
  182. bool xop;
  183. bool fma3;
  184. bool fma4;
  185. bool bmi1;
  186. bool bmi2;
  187. };
  188. static CPUCapabilities &system_cpu_capabilities()
  189. {
  190. static CPUCapabilities caps;
  191. static bool caps_init = false;
  192. if (!caps_init) {
  193. int result[4], num;
  194. memset(&caps, 0, sizeof(caps));
  195. __cpuid(result, 0);
  196. num = result[0];
  197. if (num >= 1) {
  198. __cpuid(result, 0x00000001);
  199. caps.mmx = (result[3] & ((int)1 << 23)) != 0;
  200. caps.sse = (result[3] & ((int)1 << 25)) != 0;
  201. caps.sse2 = (result[3] & ((int)1 << 26)) != 0;
  202. caps.sse3 = (result[2] & ((int)1 << 0)) != 0;
  203. caps.ssse3 = (result[2] & ((int)1 << 9)) != 0;
  204. caps.sse41 = (result[2] & ((int)1 << 19)) != 0;
  205. caps.sse42 = (result[2] & ((int)1 << 20)) != 0;
  206. caps.fma3 = (result[2] & ((int)1 << 12)) != 0;
  207. caps.avx = false;
  208. bool os_uses_xsave_xrestore = (result[2] & ((int)1 << 27)) != 0;
  209. bool cpu_avx_support = (result[2] & ((int)1 << 28)) != 0;
  210. if (os_uses_xsave_xrestore && cpu_avx_support) {
  211. // Check if the OS will save the YMM registers
  212. uint32_t xcr_feature_mask;
  213. # if defined(__GNUC__)
  214. int edx; /* not used */
  215. /* actual opcode for xgetbv */
  216. __asm__(".byte 0x0f, 0x01, 0xd0" : "=a"(xcr_feature_mask), "=d"(edx) : "c"(0));
  217. # elif defined(_MSC_VER) && defined(_XCR_XFEATURE_ENABLED_MASK)
  218. xcr_feature_mask = (uint32_t)_xgetbv(
  219. _XCR_XFEATURE_ENABLED_MASK); /* min VS2010 SP1 compiler is required */
  220. # else
  221. xcr_feature_mask = 0;
  222. # endif
  223. caps.avx = (xcr_feature_mask & 0x6) == 0x6;
  224. }
  225. caps.f16c = (result[2] & ((int)1 << 29)) != 0;
  226. __cpuid(result, 0x00000007);
  227. caps.bmi1 = (result[1] & ((int)1 << 3)) != 0;
  228. caps.bmi2 = (result[1] & ((int)1 << 8)) != 0;
  229. caps.avx2 = (result[1] & ((int)1 << 5)) != 0;
  230. }
  231. caps_init = true;
  232. }
  233. return caps;
  234. }
  235. bool system_cpu_support_sse2()
  236. {
  237. CPUCapabilities &caps = system_cpu_capabilities();
  238. return caps.sse && caps.sse2;
  239. }
  240. bool system_cpu_support_sse3()
  241. {
  242. CPUCapabilities &caps = system_cpu_capabilities();
  243. return caps.sse && caps.sse2 && caps.sse3 && caps.ssse3;
  244. }
  245. bool system_cpu_support_sse41()
  246. {
  247. CPUCapabilities &caps = system_cpu_capabilities();
  248. return caps.sse && caps.sse2 && caps.sse3 && caps.ssse3 && caps.sse41;
  249. }
  250. bool system_cpu_support_avx()
  251. {
  252. CPUCapabilities &caps = system_cpu_capabilities();
  253. return caps.sse && caps.sse2 && caps.sse3 && caps.ssse3 && caps.sse41 && caps.avx;
  254. }
  255. bool system_cpu_support_avx2()
  256. {
  257. CPUCapabilities &caps = system_cpu_capabilities();
  258. return caps.sse && caps.sse2 && caps.sse3 && caps.ssse3 && caps.sse41 && caps.avx && caps.f16c &&
  259. caps.avx2 && caps.fma3 && caps.bmi1 && caps.bmi2;
  260. }
  261. #else
  262. bool system_cpu_support_sse2()
  263. {
  264. return false;
  265. }
  266. bool system_cpu_support_sse3()
  267. {
  268. return false;
  269. }
  270. bool system_cpu_support_sse41()
  271. {
  272. return false;
  273. }
  274. bool system_cpu_support_avx()
  275. {
  276. return false;
  277. }
  278. bool system_cpu_support_avx2()
  279. {
  280. return false;
  281. }
  282. #endif
  283. bool system_call_self(const vector<string> &args)
  284. {
  285. /* Escape program and arguments in case they contain spaces. */
  286. string cmd = "\"" + Sysutil::this_program_path() + "\"";
  287. for (int i = 0; i < args.size(); i++) {
  288. cmd += " \"" + args[i] + "\"";
  289. }
  290. #ifdef _WIN32
  291. /* Use cmd /S to avoid issues with spaces in arguments. */
  292. cmd = "cmd /S /C \"" + cmd + " > nul \"";
  293. #else
  294. /* Quiet output. */
  295. cmd += " > /dev/null";
  296. #endif
  297. return (system(cmd.c_str()) == 0);
  298. }
  299. size_t system_physical_ram()
  300. {
  301. #ifdef _WIN32
  302. MEMORYSTATUSEX ram;
  303. ram.dwLength = sizeof(ram);
  304. GlobalMemoryStatusEx(&ram);
  305. return ram.ullTotalPhys * 1024;
  306. #elif defined(__APPLE__)
  307. uint64_t ram = 0;
  308. size_t len = sizeof(ram);
  309. if (sysctlbyname("hw.memsize", &ram, &len, NULL, 0) == 0) {
  310. return ram;
  311. }
  312. return 0;
  313. #else
  314. size_t ps = sysconf(_SC_PAGESIZE);
  315. size_t pn = sysconf(_SC_PHYS_PAGES);
  316. return ps * pn;
  317. #endif
  318. }
  319. CCL_NAMESPACE_END