opencl_util.cpp 36 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258
  1. /*
  2. * Copyright 2011-2013 Blender Foundation
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #ifdef WITH_OPENCL
  17. # include "device/opencl/opencl.h"
  18. # include "device/device_intern.h"
  19. # include "util/util_debug.h"
  20. # include "util/util_logging.h"
  21. # include "util/util_md5.h"
  22. # include "util/util_path.h"
  23. # include "util/util_time.h"
  24. # include "util/util_system.h"
  25. using std::cerr;
  26. using std::endl;
  27. CCL_NAMESPACE_BEGIN
  28. OpenCLCache::Slot::ProgramEntry::ProgramEntry() : program(NULL), mutex(NULL)
  29. {
  30. }
  31. OpenCLCache::Slot::ProgramEntry::ProgramEntry(const ProgramEntry &rhs)
  32. : program(rhs.program), mutex(NULL)
  33. {
  34. }
  35. OpenCLCache::Slot::ProgramEntry::~ProgramEntry()
  36. {
  37. delete mutex;
  38. }
  39. OpenCLCache::Slot::Slot() : context_mutex(NULL), context(NULL)
  40. {
  41. }
  42. OpenCLCache::Slot::Slot(const Slot &rhs)
  43. : context_mutex(NULL), context(NULL), programs(rhs.programs)
  44. {
  45. }
  46. OpenCLCache::Slot::~Slot()
  47. {
  48. delete context_mutex;
  49. }
  50. OpenCLCache &OpenCLCache::global_instance()
  51. {
  52. static OpenCLCache instance;
  53. return instance;
  54. }
  55. cl_context OpenCLCache::get_context(cl_platform_id platform,
  56. cl_device_id device,
  57. thread_scoped_lock &slot_locker)
  58. {
  59. assert(platform != NULL);
  60. OpenCLCache &self = global_instance();
  61. thread_scoped_lock cache_lock(self.cache_lock);
  62. pair<CacheMap::iterator, bool> ins = self.cache.insert(
  63. CacheMap::value_type(PlatformDevicePair(platform, device), Slot()));
  64. Slot &slot = ins.first->second;
  65. /* create slot lock only while holding cache lock */
  66. if (!slot.context_mutex)
  67. slot.context_mutex = new thread_mutex;
  68. /* need to unlock cache before locking slot, to allow store to complete */
  69. cache_lock.unlock();
  70. /* lock the slot */
  71. slot_locker = thread_scoped_lock(*slot.context_mutex);
  72. /* If the thing isn't cached */
  73. if (slot.context == NULL) {
  74. /* return with the caller's lock holder holding the slot lock */
  75. return NULL;
  76. }
  77. /* the item was already cached, release the slot lock */
  78. slot_locker.unlock();
  79. cl_int ciErr = clRetainContext(slot.context);
  80. assert(ciErr == CL_SUCCESS);
  81. (void)ciErr;
  82. return slot.context;
  83. }
  84. cl_program OpenCLCache::get_program(cl_platform_id platform,
  85. cl_device_id device,
  86. ustring key,
  87. thread_scoped_lock &slot_locker)
  88. {
  89. assert(platform != NULL);
  90. OpenCLCache &self = global_instance();
  91. thread_scoped_lock cache_lock(self.cache_lock);
  92. pair<CacheMap::iterator, bool> ins = self.cache.insert(
  93. CacheMap::value_type(PlatformDevicePair(platform, device), Slot()));
  94. Slot &slot = ins.first->second;
  95. pair<Slot::EntryMap::iterator, bool> ins2 = slot.programs.insert(
  96. Slot::EntryMap::value_type(key, Slot::ProgramEntry()));
  97. Slot::ProgramEntry &entry = ins2.first->second;
  98. /* create slot lock only while holding cache lock */
  99. if (!entry.mutex)
  100. entry.mutex = new thread_mutex;
  101. /* need to unlock cache before locking slot, to allow store to complete */
  102. cache_lock.unlock();
  103. /* lock the slot */
  104. slot_locker = thread_scoped_lock(*entry.mutex);
  105. /* If the thing isn't cached */
  106. if (entry.program == NULL) {
  107. /* return with the caller's lock holder holding the slot lock */
  108. return NULL;
  109. }
  110. /* the item was already cached, release the slot lock */
  111. slot_locker.unlock();
  112. cl_int ciErr = clRetainProgram(entry.program);
  113. assert(ciErr == CL_SUCCESS);
  114. (void)ciErr;
  115. return entry.program;
  116. }
  117. void OpenCLCache::store_context(cl_platform_id platform,
  118. cl_device_id device,
  119. cl_context context,
  120. thread_scoped_lock &slot_locker)
  121. {
  122. assert(platform != NULL);
  123. assert(device != NULL);
  124. assert(context != NULL);
  125. OpenCLCache &self = global_instance();
  126. thread_scoped_lock cache_lock(self.cache_lock);
  127. CacheMap::iterator i = self.cache.find(PlatformDevicePair(platform, device));
  128. cache_lock.unlock();
  129. Slot &slot = i->second;
  130. /* sanity check */
  131. assert(i != self.cache.end());
  132. assert(slot.context == NULL);
  133. slot.context = context;
  134. /* unlock the slot */
  135. slot_locker.unlock();
  136. /* increment reference count in OpenCL.
  137. * The caller is going to release the object when done with it. */
  138. cl_int ciErr = clRetainContext(context);
  139. assert(ciErr == CL_SUCCESS);
  140. (void)ciErr;
  141. }
  142. void OpenCLCache::store_program(cl_platform_id platform,
  143. cl_device_id device,
  144. cl_program program,
  145. ustring key,
  146. thread_scoped_lock &slot_locker)
  147. {
  148. assert(platform != NULL);
  149. assert(device != NULL);
  150. assert(program != NULL);
  151. OpenCLCache &self = global_instance();
  152. thread_scoped_lock cache_lock(self.cache_lock);
  153. CacheMap::iterator i = self.cache.find(PlatformDevicePair(platform, device));
  154. assert(i != self.cache.end());
  155. Slot &slot = i->second;
  156. Slot::EntryMap::iterator i2 = slot.programs.find(key);
  157. assert(i2 != slot.programs.end());
  158. Slot::ProgramEntry &entry = i2->second;
  159. assert(entry.program == NULL);
  160. cache_lock.unlock();
  161. entry.program = program;
  162. /* unlock the slot */
  163. slot_locker.unlock();
  164. /* Increment reference count in OpenCL.
  165. * The caller is going to release the object when done with it.
  166. */
  167. cl_int ciErr = clRetainProgram(program);
  168. assert(ciErr == CL_SUCCESS);
  169. (void)ciErr;
  170. }
  171. string OpenCLCache::get_kernel_md5()
  172. {
  173. OpenCLCache &self = global_instance();
  174. thread_scoped_lock lock(self.kernel_md5_lock);
  175. if (self.kernel_md5.empty()) {
  176. self.kernel_md5 = path_files_md5_hash(path_get("source"));
  177. }
  178. return self.kernel_md5;
  179. }
  180. static string get_program_source(const string &kernel_file)
  181. {
  182. string source = "#include \"kernel/kernels/opencl/" + kernel_file + "\"\n";
  183. /* We compile kernels consisting of many files. unfortunately OpenCL
  184. * kernel caches do not seem to recognize changes in included files.
  185. * so we force recompile on changes by adding the md5 hash of all files.
  186. */
  187. source = path_source_replace_includes(source, path_get("source"));
  188. source += "\n// " + util_md5_string(source) + "\n";
  189. return source;
  190. }
  191. OpenCLDevice::OpenCLProgram::OpenCLProgram(OpenCLDevice *device,
  192. const string &program_name,
  193. const string &kernel_file,
  194. const string &kernel_build_options,
  195. bool use_stdout)
  196. : device(device),
  197. program_name(program_name),
  198. kernel_file(kernel_file),
  199. kernel_build_options(kernel_build_options),
  200. use_stdout(use_stdout)
  201. {
  202. loaded = false;
  203. needs_compiling = true;
  204. program = NULL;
  205. }
  206. OpenCLDevice::OpenCLProgram::~OpenCLProgram()
  207. {
  208. release();
  209. }
  210. void OpenCLDevice::OpenCLProgram::release()
  211. {
  212. for (map<ustring, cl_kernel>::iterator kernel = kernels.begin(); kernel != kernels.end();
  213. ++kernel) {
  214. if (kernel->second) {
  215. clReleaseKernel(kernel->second);
  216. kernel->second = NULL;
  217. }
  218. }
  219. if (program) {
  220. clReleaseProgram(program);
  221. program = NULL;
  222. }
  223. }
  224. void OpenCLDevice::OpenCLProgram::add_log(const string &msg, bool debug)
  225. {
  226. if (!use_stdout) {
  227. log += msg + "\n";
  228. }
  229. else if (!debug) {
  230. printf("%s\n", msg.c_str());
  231. fflush(stdout);
  232. }
  233. else {
  234. VLOG(2) << msg;
  235. }
  236. }
  237. void OpenCLDevice::OpenCLProgram::add_error(const string &msg)
  238. {
  239. if (use_stdout) {
  240. fprintf(stderr, "%s\n", msg.c_str());
  241. }
  242. if (error_msg == "") {
  243. error_msg += "\n";
  244. }
  245. error_msg += msg;
  246. }
  247. void OpenCLDevice::OpenCLProgram::add_kernel(ustring name)
  248. {
  249. if (!kernels.count(name)) {
  250. kernels[name] = NULL;
  251. }
  252. }
  253. bool OpenCLDevice::OpenCLProgram::build_kernel(const string *debug_src)
  254. {
  255. string build_options;
  256. build_options = device->kernel_build_options(debug_src) + kernel_build_options;
  257. VLOG(1) << "Build options passed to clBuildProgram: '" << build_options << "'.";
  258. cl_int ciErr = clBuildProgram(program, 0, NULL, build_options.c_str(), NULL, NULL);
  259. /* show warnings even if build is successful */
  260. size_t ret_val_size = 0;
  261. clGetProgramBuildInfo(program, device->cdDevice, CL_PROGRAM_BUILD_LOG, 0, NULL, &ret_val_size);
  262. if (ciErr != CL_SUCCESS) {
  263. add_error(string("OpenCL build failed with error ") + clewErrorString(ciErr) +
  264. ", errors in console.");
  265. }
  266. if (ret_val_size > 1) {
  267. vector<char> build_log(ret_val_size + 1);
  268. clGetProgramBuildInfo(
  269. program, device->cdDevice, CL_PROGRAM_BUILD_LOG, ret_val_size, &build_log[0], NULL);
  270. build_log[ret_val_size] = '\0';
  271. /* Skip meaningless empty output from the NVidia compiler. */
  272. if (!(ret_val_size == 2 && build_log[0] == '\n')) {
  273. add_log(string("OpenCL program ") + program_name + " build output: " + string(&build_log[0]),
  274. ciErr == CL_SUCCESS);
  275. }
  276. }
  277. return (ciErr == CL_SUCCESS);
  278. }
  279. bool OpenCLDevice::OpenCLProgram::compile_kernel(const string *debug_src)
  280. {
  281. string source = get_program_source(kernel_file);
  282. if (debug_src) {
  283. path_write_text(*debug_src, source);
  284. }
  285. size_t source_len = source.size();
  286. const char *source_str = source.c_str();
  287. cl_int ciErr;
  288. program = clCreateProgramWithSource(device->cxContext, 1, &source_str, &source_len, &ciErr);
  289. if (ciErr != CL_SUCCESS) {
  290. add_error(string("OpenCL program creation failed: ") + clewErrorString(ciErr));
  291. return false;
  292. }
  293. double starttime = time_dt();
  294. add_log(string("Cycles: compiling OpenCL program ") + program_name + "...", false);
  295. add_log(string("Build flags: ") + kernel_build_options, true);
  296. if (!build_kernel(debug_src))
  297. return false;
  298. double elapsed = time_dt() - starttime;
  299. add_log(
  300. string_printf("Kernel compilation of %s finished in %.2lfs.", program_name.c_str(), elapsed),
  301. false);
  302. return true;
  303. }
  304. static void escape_python_string(string &str)
  305. {
  306. /* Escape string to be passed as a Python raw string with '' quotes'. */
  307. string_replace(str, "'", "\'");
  308. }
  309. bool OpenCLDevice::OpenCLProgram::compile_separate(const string &clbin)
  310. {
  311. vector<string> args;
  312. args.push_back("--background");
  313. args.push_back("--factory-startup");
  314. args.push_back("--python-expr");
  315. int device_platform_id = device->device_num;
  316. string device_name = device->device_name;
  317. string platform_name = device->platform_name;
  318. string build_options = device->kernel_build_options(NULL) + kernel_build_options;
  319. string kernel_file_escaped = kernel_file;
  320. string clbin_escaped = clbin;
  321. escape_python_string(device_name);
  322. escape_python_string(platform_name);
  323. escape_python_string(build_options);
  324. escape_python_string(kernel_file_escaped);
  325. escape_python_string(clbin_escaped);
  326. args.push_back(string_printf(
  327. "import _cycles; _cycles.opencl_compile(r'%d', r'%s', r'%s', r'%s', r'%s', r'%s')",
  328. device_platform_id,
  329. device_name.c_str(),
  330. platform_name.c_str(),
  331. build_options.c_str(),
  332. kernel_file_escaped.c_str(),
  333. clbin_escaped.c_str()));
  334. double starttime = time_dt();
  335. add_log(string("Cycles: compiling OpenCL program ") + program_name + "...", false);
  336. add_log(string("Build flags: ") + kernel_build_options, true);
  337. if (!system_call_self(args) || !path_exists(clbin)) {
  338. return false;
  339. }
  340. double elapsed = time_dt() - starttime;
  341. add_log(
  342. string_printf("Kernel compilation of %s finished in %.2lfs.", program_name.c_str(), elapsed),
  343. false);
  344. return load_binary(clbin);
  345. }
  346. /* Compile opencl kernel. This method is called from the _cycles Python
  347. * module compile kernels. Parameters must match function above. */
  348. bool device_opencl_compile_kernel(const vector<string> &parameters)
  349. {
  350. int device_platform_id = std::stoi(parameters[0]);
  351. const string &device_name = parameters[1];
  352. const string &platform_name = parameters[2];
  353. const string &build_options = parameters[3];
  354. const string &kernel_file = parameters[4];
  355. const string &binary_path = parameters[5];
  356. if (clewInit() != CLEW_SUCCESS) {
  357. return false;
  358. }
  359. vector<OpenCLPlatformDevice> usable_devices;
  360. OpenCLInfo::get_usable_devices(&usable_devices);
  361. if (device_platform_id >= usable_devices.size()) {
  362. return false;
  363. }
  364. OpenCLPlatformDevice &platform_device = usable_devices[device_platform_id];
  365. if (platform_device.platform_name != platform_name ||
  366. platform_device.device_name != device_name) {
  367. return false;
  368. }
  369. cl_platform_id platform = platform_device.platform_id;
  370. cl_device_id device = platform_device.device_id;
  371. const cl_context_properties context_props[] = {
  372. CL_CONTEXT_PLATFORM, (cl_context_properties)platform, 0, 0};
  373. cl_int err;
  374. cl_context context = clCreateContext(context_props, 1, &device, NULL, NULL, &err);
  375. if (err != CL_SUCCESS) {
  376. return false;
  377. }
  378. string source = get_program_source(kernel_file);
  379. size_t source_len = source.size();
  380. const char *source_str = source.c_str();
  381. cl_program program = clCreateProgramWithSource(context, 1, &source_str, &source_len, &err);
  382. bool result = false;
  383. if (err == CL_SUCCESS) {
  384. err = clBuildProgram(program, 0, NULL, build_options.c_str(), NULL, NULL);
  385. if (err == CL_SUCCESS) {
  386. size_t size = 0;
  387. clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &size, NULL);
  388. if (size > 0) {
  389. vector<uint8_t> binary(size);
  390. uint8_t *bytes = &binary[0];
  391. clGetProgramInfo(program, CL_PROGRAM_BINARIES, sizeof(uint8_t *), &bytes, NULL);
  392. result = path_write_binary(binary_path, binary);
  393. }
  394. }
  395. clReleaseProgram(program);
  396. }
  397. clReleaseContext(context);
  398. return result;
  399. }
  400. bool OpenCLDevice::OpenCLProgram::load_binary(const string &clbin, const string *debug_src)
  401. {
  402. /* read binary into memory */
  403. vector<uint8_t> binary;
  404. if (!path_read_binary(clbin, binary)) {
  405. add_error(string_printf("OpenCL failed to read cached binary %s.", clbin.c_str()));
  406. return false;
  407. }
  408. /* create program */
  409. cl_int status, ciErr;
  410. size_t size = binary.size();
  411. const uint8_t *bytes = &binary[0];
  412. program = clCreateProgramWithBinary(
  413. device->cxContext, 1, &device->cdDevice, &size, &bytes, &status, &ciErr);
  414. if (status != CL_SUCCESS || ciErr != CL_SUCCESS) {
  415. add_error(string("OpenCL failed create program from cached binary ") + clbin + ": " +
  416. clewErrorString(status) + " " + clewErrorString(ciErr));
  417. return false;
  418. }
  419. if (!build_kernel(debug_src))
  420. return false;
  421. return true;
  422. }
  423. bool OpenCLDevice::OpenCLProgram::save_binary(const string &clbin)
  424. {
  425. size_t size = 0;
  426. clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &size, NULL);
  427. if (!size)
  428. return false;
  429. vector<uint8_t> binary(size);
  430. uint8_t *bytes = &binary[0];
  431. clGetProgramInfo(program, CL_PROGRAM_BINARIES, sizeof(uint8_t *), &bytes, NULL);
  432. return path_write_binary(clbin, binary);
  433. }
  434. bool OpenCLDevice::OpenCLProgram::load()
  435. {
  436. loaded = false;
  437. string device_md5 = device->device_md5_hash(kernel_build_options);
  438. /* Try to use cached kernel. */
  439. thread_scoped_lock cache_locker;
  440. ustring cache_key(program_name + device_md5);
  441. program = device->load_cached_kernel(cache_key, cache_locker);
  442. if (!program) {
  443. add_log(string("OpenCL program ") + program_name + " not found in cache.", true);
  444. /* need to create source to get md5 */
  445. string source = get_program_source(kernel_file);
  446. string basename = "cycles_kernel_" + program_name + "_" + device_md5 + "_" +
  447. util_md5_string(source);
  448. basename = path_cache_get(path_join("kernels", basename));
  449. string clbin = basename + ".clbin";
  450. /* If binary kernel exists already, try use it. */
  451. if (path_exists(clbin) && load_binary(clbin)) {
  452. /* Kernel loaded from binary, nothing to do. */
  453. add_log(string("Loaded program from ") + clbin + ".", true);
  454. /* Cache the program. */
  455. device->store_cached_kernel(program, cache_key, cache_locker);
  456. }
  457. else {
  458. add_log(string("OpenCL program ") + program_name + " not found on disk.", true);
  459. cache_locker.unlock();
  460. }
  461. }
  462. if (program) {
  463. create_kernels();
  464. loaded = true;
  465. needs_compiling = false;
  466. }
  467. return loaded;
  468. }
  469. void OpenCLDevice::OpenCLProgram::compile()
  470. {
  471. assert(device);
  472. string device_md5 = device->device_md5_hash(kernel_build_options);
  473. /* Try to use cached kernel. */
  474. thread_scoped_lock cache_locker;
  475. ustring cache_key(program_name + device_md5);
  476. program = device->load_cached_kernel(cache_key, cache_locker);
  477. if (!program) {
  478. add_log(string("OpenCL program ") + program_name + " not found in cache.", true);
  479. /* need to create source to get md5 */
  480. string source = get_program_source(kernel_file);
  481. string basename = "cycles_kernel_" + program_name + "_" + device_md5 + "_" +
  482. util_md5_string(source);
  483. basename = path_cache_get(path_join("kernels", basename));
  484. string clbin = basename + ".clbin";
  485. /* path to preprocessed source for debugging */
  486. string clsrc, *debug_src = NULL;
  487. if (OpenCLInfo::use_debug()) {
  488. clsrc = basename + ".cl";
  489. debug_src = &clsrc;
  490. }
  491. /* If binary kernel exists already, try use it. */
  492. if (compile_separate(clbin)) {
  493. add_log(string("Built and loaded program from ") + clbin + ".", true);
  494. loaded = true;
  495. }
  496. else {
  497. add_log(string("Separate-process building of ") + clbin +
  498. " failed, will fall back to regular building.",
  499. true);
  500. /* If does not exist or loading binary failed, compile kernel. */
  501. if (!compile_kernel(debug_src)) {
  502. needs_compiling = false;
  503. return;
  504. }
  505. /* Save binary for reuse. */
  506. if (!save_binary(clbin)) {
  507. add_log(string("Saving compiled OpenCL kernel to ") + clbin + " failed!", true);
  508. }
  509. }
  510. /* Cache the program. */
  511. device->store_cached_kernel(program, cache_key, cache_locker);
  512. }
  513. create_kernels();
  514. needs_compiling = false;
  515. loaded = true;
  516. }
  517. void OpenCLDevice::OpenCLProgram::create_kernels()
  518. {
  519. for (map<ustring, cl_kernel>::iterator kernel = kernels.begin(); kernel != kernels.end();
  520. ++kernel) {
  521. assert(kernel->second == NULL);
  522. cl_int ciErr;
  523. string name = "kernel_ocl_" + kernel->first.string();
  524. kernel->second = clCreateKernel(program, name.c_str(), &ciErr);
  525. if (device->opencl_error(ciErr)) {
  526. add_error(string("Error getting kernel ") + name + " from program " + program_name + ": " +
  527. clewErrorString(ciErr));
  528. return;
  529. }
  530. }
  531. }
  532. bool OpenCLDevice::OpenCLProgram::wait_for_availability()
  533. {
  534. add_log(string("Waiting for availability of ") + program_name + ".", true);
  535. while (needs_compiling) {
  536. time_sleep(0.1);
  537. }
  538. return loaded;
  539. }
  540. void OpenCLDevice::OpenCLProgram::report_error()
  541. {
  542. /* If loaded is true, there was no error. */
  543. if (loaded)
  544. return;
  545. /* if use_stdout is true, the error was already reported. */
  546. if (use_stdout)
  547. return;
  548. cerr << error_msg << endl;
  549. if (!compile_output.empty()) {
  550. cerr << "OpenCL kernel build output for " << program_name << ":" << endl;
  551. cerr << compile_output << endl;
  552. }
  553. }
  554. cl_kernel OpenCLDevice::OpenCLProgram::operator()()
  555. {
  556. assert(kernels.size() == 1);
  557. return kernels.begin()->second;
  558. }
  559. cl_kernel OpenCLDevice::OpenCLProgram::operator()(ustring name)
  560. {
  561. assert(kernels.count(name));
  562. return kernels[name];
  563. }
  564. cl_device_type OpenCLInfo::device_type()
  565. {
  566. switch (DebugFlags().opencl.device_type) {
  567. case DebugFlags::OpenCL::DEVICE_NONE:
  568. return 0;
  569. case DebugFlags::OpenCL::DEVICE_ALL:
  570. return CL_DEVICE_TYPE_ALL;
  571. case DebugFlags::OpenCL::DEVICE_DEFAULT:
  572. return CL_DEVICE_TYPE_DEFAULT;
  573. case DebugFlags::OpenCL::DEVICE_CPU:
  574. return CL_DEVICE_TYPE_CPU;
  575. case DebugFlags::OpenCL::DEVICE_GPU:
  576. return CL_DEVICE_TYPE_GPU;
  577. case DebugFlags::OpenCL::DEVICE_ACCELERATOR:
  578. return CL_DEVICE_TYPE_ACCELERATOR;
  579. default:
  580. return CL_DEVICE_TYPE_ALL;
  581. }
  582. }
  583. bool OpenCLInfo::use_debug()
  584. {
  585. return DebugFlags().opencl.debug;
  586. }
  587. bool OpenCLInfo::device_supported(const string &platform_name, const cl_device_id device_id)
  588. {
  589. cl_device_type device_type;
  590. if (!get_device_type(device_id, &device_type)) {
  591. return false;
  592. }
  593. string device_name;
  594. if (!get_device_name(device_id, &device_name)) {
  595. return false;
  596. }
  597. int driver_major = 0;
  598. int driver_minor = 0;
  599. if (!get_driver_version(device_id, &driver_major, &driver_minor)) {
  600. return false;
  601. }
  602. VLOG(3) << "OpenCL driver version " << driver_major << "." << driver_minor;
  603. /* It is possible tyo have Iris GPU on AMD/Apple OpenCL framework
  604. * (aka, it will not be on Intel framework). This isn't supported
  605. * and needs an explicit blacklist.
  606. */
  607. if (strstr(device_name.c_str(), "Iris")) {
  608. return false;
  609. }
  610. if (platform_name == "AMD Accelerated Parallel Processing" &&
  611. device_type == CL_DEVICE_TYPE_GPU) {
  612. if (driver_major < 2236) {
  613. VLOG(1) << "AMD driver version " << driver_major << "." << driver_minor << " not supported.";
  614. return false;
  615. }
  616. const char *blacklist[] = {/* GCN 1 */
  617. "Tahiti",
  618. "Pitcairn",
  619. "Capeverde",
  620. "Oland",
  621. "Hainan",
  622. NULL};
  623. for (int i = 0; blacklist[i] != NULL; i++) {
  624. if (device_name == blacklist[i]) {
  625. VLOG(1) << "AMD device " << device_name << " not supported";
  626. return false;
  627. }
  628. }
  629. return true;
  630. }
  631. if (platform_name == "Apple" && device_type == CL_DEVICE_TYPE_GPU) {
  632. return false;
  633. }
  634. return false;
  635. }
  636. bool OpenCLInfo::platform_version_check(cl_platform_id platform, string *error)
  637. {
  638. const int req_major = 1, req_minor = 1;
  639. int major, minor;
  640. char version[256];
  641. clGetPlatformInfo(platform, CL_PLATFORM_VERSION, sizeof(version), &version, NULL);
  642. if (sscanf(version, "OpenCL %d.%d", &major, &minor) < 2) {
  643. if (error != NULL) {
  644. *error = string_printf("OpenCL: failed to parse platform version string (%s).", version);
  645. }
  646. return false;
  647. }
  648. if (!((major == req_major && minor >= req_minor) || (major > req_major))) {
  649. if (error != NULL) {
  650. *error = string_printf(
  651. "OpenCL: platform version 1.1 or later required, found %d.%d", major, minor);
  652. }
  653. return false;
  654. }
  655. if (error != NULL) {
  656. *error = "";
  657. }
  658. return true;
  659. }
  660. bool OpenCLInfo::device_version_check(cl_device_id device, string *error)
  661. {
  662. const int req_major = 1, req_minor = 1;
  663. int major, minor;
  664. char version[256];
  665. clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_VERSION, sizeof(version), &version, NULL);
  666. if (sscanf(version, "OpenCL C %d.%d", &major, &minor) < 2) {
  667. if (error != NULL) {
  668. *error = string_printf("OpenCL: failed to parse OpenCL C version string (%s).", version);
  669. }
  670. return false;
  671. }
  672. if (!((major == req_major && minor >= req_minor) || (major > req_major))) {
  673. if (error != NULL) {
  674. *error = string_printf("OpenCL: C version 1.1 or later required, found %d.%d", major, minor);
  675. }
  676. return false;
  677. }
  678. if (error != NULL) {
  679. *error = "";
  680. }
  681. return true;
  682. }
  683. string OpenCLInfo::get_hardware_id(const string &platform_name, cl_device_id device_id)
  684. {
  685. if (platform_name == "AMD Accelerated Parallel Processing" || platform_name == "Apple") {
  686. /* Use cl_amd_device_topology extension. */
  687. cl_char topology[24];
  688. if (clGetDeviceInfo(device_id, 0x4037, sizeof(topology), topology, NULL) == CL_SUCCESS &&
  689. topology[0] == 1) {
  690. return string_printf("%02x:%02x.%01x",
  691. (unsigned int)topology[21],
  692. (unsigned int)topology[22],
  693. (unsigned int)topology[23]);
  694. }
  695. }
  696. else if (platform_name == "NVIDIA CUDA") {
  697. /* Use two undocumented options of the cl_nv_device_attribute_query extension. */
  698. cl_int bus_id, slot_id;
  699. if (clGetDeviceInfo(device_id, 0x4008, sizeof(cl_int), &bus_id, NULL) == CL_SUCCESS &&
  700. clGetDeviceInfo(device_id, 0x4009, sizeof(cl_int), &slot_id, NULL) == CL_SUCCESS) {
  701. return string_printf("%02x:%02x.%01x",
  702. (unsigned int)(bus_id),
  703. (unsigned int)(slot_id >> 3),
  704. (unsigned int)(slot_id & 0x7));
  705. }
  706. }
  707. /* No general way to get a hardware ID from OpenCL => give up. */
  708. return "";
  709. }
  710. void OpenCLInfo::get_usable_devices(vector<OpenCLPlatformDevice> *usable_devices, bool force_all)
  711. {
  712. const cl_device_type device_type = OpenCLInfo::device_type();
  713. static bool first_time = true;
  714. # define FIRST_VLOG(severity) \
  715. if (first_time) \
  716. VLOG(severity)
  717. usable_devices->clear();
  718. if (device_type == 0) {
  719. FIRST_VLOG(2) << "OpenCL devices are forced to be disabled.";
  720. first_time = false;
  721. return;
  722. }
  723. cl_int error;
  724. vector<cl_device_id> device_ids;
  725. vector<cl_platform_id> platform_ids;
  726. /* Get platforms. */
  727. if (!get_platforms(&platform_ids, &error)) {
  728. FIRST_VLOG(2) << "Error fetching platforms:" << string(clewErrorString(error));
  729. first_time = false;
  730. return;
  731. }
  732. if (platform_ids.size() == 0) {
  733. FIRST_VLOG(2) << "No OpenCL platforms were found.";
  734. first_time = false;
  735. return;
  736. }
  737. /* Devices are numbered consecutively across platforms. */
  738. for (int platform = 0; platform < platform_ids.size(); platform++) {
  739. cl_platform_id platform_id = platform_ids[platform];
  740. string platform_name;
  741. if (!get_platform_name(platform_id, &platform_name)) {
  742. FIRST_VLOG(2) << "Failed to get platform name, ignoring.";
  743. continue;
  744. }
  745. FIRST_VLOG(2) << "Enumerating devices for platform " << platform_name << ".";
  746. if (!platform_version_check(platform_id)) {
  747. FIRST_VLOG(2) << "Ignoring platform " << platform_name
  748. << " due to too old compiler version.";
  749. continue;
  750. }
  751. if (!get_platform_devices(platform_id, device_type, &device_ids, &error)) {
  752. FIRST_VLOG(2) << "Ignoring platform " << platform_name
  753. << ", failed to fetch of devices: " << string(clewErrorString(error));
  754. continue;
  755. }
  756. if (device_ids.size() == 0) {
  757. FIRST_VLOG(2) << "Ignoring platform " << platform_name << ", it has no devices.";
  758. continue;
  759. }
  760. for (int num = 0; num < device_ids.size(); num++) {
  761. const cl_device_id device_id = device_ids[num];
  762. string device_name;
  763. if (!get_device_name(device_id, &device_name, &error)) {
  764. FIRST_VLOG(2) << "Failed to fetch device name: " << string(clewErrorString(error))
  765. << ", ignoring.";
  766. continue;
  767. }
  768. if (!device_version_check(device_id)) {
  769. FIRST_VLOG(2) << "Ignoring device " << device_name << " due to old compiler version.";
  770. continue;
  771. }
  772. if (force_all || device_supported(platform_name, device_id)) {
  773. cl_device_type device_type;
  774. if (!get_device_type(device_id, &device_type, &error)) {
  775. FIRST_VLOG(2) << "Ignoring device " << device_name
  776. << ", failed to fetch device type:" << string(clewErrorString(error));
  777. continue;
  778. }
  779. string readable_device_name = get_readable_device_name(device_id);
  780. if (readable_device_name != device_name) {
  781. FIRST_VLOG(2) << "Using more readable device name: " << readable_device_name;
  782. }
  783. FIRST_VLOG(2) << "Adding new device " << readable_device_name << ".";
  784. string hardware_id = get_hardware_id(platform_name, device_id);
  785. string device_extensions = get_device_extensions(device_id);
  786. usable_devices->push_back(OpenCLPlatformDevice(platform_id,
  787. platform_name,
  788. device_id,
  789. device_type,
  790. readable_device_name,
  791. hardware_id,
  792. device_extensions));
  793. }
  794. else {
  795. FIRST_VLOG(2) << "Ignoring device " << device_name << ", not officially supported yet.";
  796. }
  797. }
  798. }
  799. first_time = false;
  800. }
  801. bool OpenCLInfo::get_platforms(vector<cl_platform_id> *platform_ids, cl_int *error)
  802. {
  803. /* Reset from possible previous state. */
  804. platform_ids->resize(0);
  805. cl_uint num_platforms;
  806. if (!get_num_platforms(&num_platforms, error)) {
  807. return false;
  808. }
  809. /* Get actual platforms. */
  810. cl_int err;
  811. platform_ids->resize(num_platforms);
  812. if ((err = clGetPlatformIDs(num_platforms, &platform_ids->at(0), NULL)) != CL_SUCCESS) {
  813. if (error != NULL) {
  814. *error = err;
  815. }
  816. return false;
  817. }
  818. if (error != NULL) {
  819. *error = CL_SUCCESS;
  820. }
  821. return true;
  822. }
  823. vector<cl_platform_id> OpenCLInfo::get_platforms()
  824. {
  825. vector<cl_platform_id> platform_ids;
  826. get_platforms(&platform_ids);
  827. return platform_ids;
  828. }
  829. bool OpenCLInfo::get_num_platforms(cl_uint *num_platforms, cl_int *error)
  830. {
  831. cl_int err;
  832. if ((err = clGetPlatformIDs(0, NULL, num_platforms)) != CL_SUCCESS) {
  833. if (error != NULL) {
  834. *error = err;
  835. }
  836. *num_platforms = 0;
  837. return false;
  838. }
  839. if (error != NULL) {
  840. *error = CL_SUCCESS;
  841. }
  842. return true;
  843. }
  844. cl_uint OpenCLInfo::get_num_platforms()
  845. {
  846. cl_uint num_platforms;
  847. if (!get_num_platforms(&num_platforms)) {
  848. return 0;
  849. }
  850. return num_platforms;
  851. }
  852. bool OpenCLInfo::get_platform_name(cl_platform_id platform_id, string *platform_name)
  853. {
  854. char buffer[256];
  855. if (clGetPlatformInfo(platform_id, CL_PLATFORM_NAME, sizeof(buffer), &buffer, NULL) !=
  856. CL_SUCCESS) {
  857. *platform_name = "";
  858. return false;
  859. }
  860. *platform_name = buffer;
  861. return true;
  862. }
  863. string OpenCLInfo::get_platform_name(cl_platform_id platform_id)
  864. {
  865. string platform_name;
  866. if (!get_platform_name(platform_id, &platform_name)) {
  867. return "";
  868. }
  869. return platform_name;
  870. }
  871. bool OpenCLInfo::get_num_platform_devices(cl_platform_id platform_id,
  872. cl_device_type device_type,
  873. cl_uint *num_devices,
  874. cl_int *error)
  875. {
  876. cl_int err;
  877. if ((err = clGetDeviceIDs(platform_id, device_type, 0, NULL, num_devices)) != CL_SUCCESS) {
  878. if (error != NULL) {
  879. *error = err;
  880. }
  881. *num_devices = 0;
  882. return false;
  883. }
  884. if (error != NULL) {
  885. *error = CL_SUCCESS;
  886. }
  887. return true;
  888. }
  889. cl_uint OpenCLInfo::get_num_platform_devices(cl_platform_id platform_id,
  890. cl_device_type device_type)
  891. {
  892. cl_uint num_devices;
  893. if (!get_num_platform_devices(platform_id, device_type, &num_devices)) {
  894. return 0;
  895. }
  896. return num_devices;
  897. }
  898. bool OpenCLInfo::get_platform_devices(cl_platform_id platform_id,
  899. cl_device_type device_type,
  900. vector<cl_device_id> *device_ids,
  901. cl_int *error)
  902. {
  903. /* Reset from possible previous state. */
  904. device_ids->resize(0);
  905. /* Get number of devices to pre-allocate memory. */
  906. cl_uint num_devices;
  907. if (!get_num_platform_devices(platform_id, device_type, &num_devices, error)) {
  908. return false;
  909. }
  910. /* Get actual device list. */
  911. device_ids->resize(num_devices);
  912. cl_int err;
  913. if ((err = clGetDeviceIDs(platform_id, device_type, num_devices, &device_ids->at(0), NULL)) !=
  914. CL_SUCCESS) {
  915. if (error != NULL) {
  916. *error = err;
  917. }
  918. return false;
  919. }
  920. if (error != NULL) {
  921. *error = CL_SUCCESS;
  922. }
  923. return true;
  924. }
  925. vector<cl_device_id> OpenCLInfo::get_platform_devices(cl_platform_id platform_id,
  926. cl_device_type device_type)
  927. {
  928. vector<cl_device_id> devices;
  929. get_platform_devices(platform_id, device_type, &devices);
  930. return devices;
  931. }
  932. bool OpenCLInfo::get_device_name(cl_device_id device_id, string *device_name, cl_int *error)
  933. {
  934. char buffer[1024];
  935. cl_int err;
  936. if ((err = clGetDeviceInfo(device_id, CL_DEVICE_NAME, sizeof(buffer), &buffer, NULL)) !=
  937. CL_SUCCESS) {
  938. if (error != NULL) {
  939. *error = err;
  940. }
  941. *device_name = "";
  942. return false;
  943. }
  944. if (error != NULL) {
  945. *error = CL_SUCCESS;
  946. }
  947. *device_name = buffer;
  948. return true;
  949. }
  950. string OpenCLInfo::get_device_name(cl_device_id device_id)
  951. {
  952. string device_name;
  953. if (!get_device_name(device_id, &device_name)) {
  954. return "";
  955. }
  956. return device_name;
  957. }
  958. bool OpenCLInfo::get_device_extensions(cl_device_id device_id,
  959. string *device_extensions,
  960. cl_int *error)
  961. {
  962. char buffer[1024];
  963. cl_int err;
  964. if ((err = clGetDeviceInfo(device_id, CL_DEVICE_EXTENSIONS, sizeof(buffer), &buffer, NULL)) !=
  965. CL_SUCCESS) {
  966. if (error != NULL) {
  967. *error = err;
  968. }
  969. *device_extensions = "";
  970. return false;
  971. }
  972. if (error != NULL) {
  973. *error = CL_SUCCESS;
  974. }
  975. *device_extensions = buffer;
  976. return true;
  977. }
  978. string OpenCLInfo::get_device_extensions(cl_device_id device_id)
  979. {
  980. string device_extensions;
  981. if (!get_device_extensions(device_id, &device_extensions)) {
  982. return "";
  983. }
  984. return device_extensions;
  985. }
  986. bool OpenCLInfo::get_device_type(cl_device_id device_id,
  987. cl_device_type *device_type,
  988. cl_int *error)
  989. {
  990. cl_int err;
  991. if ((err = clGetDeviceInfo(
  992. device_id, CL_DEVICE_TYPE, sizeof(cl_device_type), device_type, NULL)) != CL_SUCCESS) {
  993. if (error != NULL) {
  994. *error = err;
  995. }
  996. *device_type = 0;
  997. return false;
  998. }
  999. if (error != NULL) {
  1000. *error = CL_SUCCESS;
  1001. }
  1002. return true;
  1003. }
  1004. cl_device_type OpenCLInfo::get_device_type(cl_device_id device_id)
  1005. {
  1006. cl_device_type device_type;
  1007. if (!get_device_type(device_id, &device_type)) {
  1008. return 0;
  1009. }
  1010. return device_type;
  1011. }
  1012. string OpenCLInfo::get_readable_device_name(cl_device_id device_id)
  1013. {
  1014. string name = "";
  1015. char board_name[1024];
  1016. size_t length = 0;
  1017. if (clGetDeviceInfo(
  1018. device_id, CL_DEVICE_BOARD_NAME_AMD, sizeof(board_name), &board_name, &length) ==
  1019. CL_SUCCESS) {
  1020. if (length != 0 && board_name[0] != '\0') {
  1021. name = board_name;
  1022. }
  1023. }
  1024. /* Fallback to standard device name API. */
  1025. if (name.empty()) {
  1026. name = get_device_name(device_id);
  1027. }
  1028. /* Special exception for AMD Vega, need to be able to tell
  1029. * Vega 56 from 64 apart.
  1030. */
  1031. if (name == "Radeon RX Vega") {
  1032. cl_int max_compute_units = 0;
  1033. if (clGetDeviceInfo(device_id,
  1034. CL_DEVICE_MAX_COMPUTE_UNITS,
  1035. sizeof(max_compute_units),
  1036. &max_compute_units,
  1037. NULL) == CL_SUCCESS) {
  1038. name += " " + to_string(max_compute_units);
  1039. }
  1040. }
  1041. /* Distinguish from our native CPU device. */
  1042. if (get_device_type(device_id) & CL_DEVICE_TYPE_CPU) {
  1043. name += " (OpenCL)";
  1044. }
  1045. return name;
  1046. }
  1047. bool OpenCLInfo::get_driver_version(cl_device_id device_id, int *major, int *minor, cl_int *error)
  1048. {
  1049. char buffer[1024];
  1050. cl_int err;
  1051. if ((err = clGetDeviceInfo(device_id, CL_DRIVER_VERSION, sizeof(buffer), &buffer, NULL)) !=
  1052. CL_SUCCESS) {
  1053. if (error != NULL) {
  1054. *error = err;
  1055. }
  1056. return false;
  1057. }
  1058. if (error != NULL) {
  1059. *error = CL_SUCCESS;
  1060. }
  1061. if (sscanf(buffer, "%d.%d", major, minor) < 2) {
  1062. VLOG(1) << string_printf("OpenCL: failed to parse driver version string (%s).", buffer);
  1063. return false;
  1064. }
  1065. return true;
  1066. }
  1067. int OpenCLInfo::mem_sub_ptr_alignment(cl_device_id device_id)
  1068. {
  1069. int base_align_bits;
  1070. if (clGetDeviceInfo(
  1071. device_id, CL_DEVICE_MEM_BASE_ADDR_ALIGN, sizeof(int), &base_align_bits, NULL) ==
  1072. CL_SUCCESS) {
  1073. return base_align_bits / 8;
  1074. }
  1075. return 1;
  1076. }
  1077. CCL_NAMESPACE_END
  1078. #endif