opencl.h 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658
  1. /*
  2. * Copyright 2011-2013 Blender Foundation
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #ifdef WITH_OPENCL
  17. # include "device/device.h"
  18. # include "device/device_denoising.h"
  19. # include "device/device_split_kernel.h"
  20. # include "util/util_map.h"
  21. # include "util/util_param.h"
  22. # include "util/util_string.h"
  23. # include "clew.h"
  24. # include "device/opencl/memory_manager.h"
  25. CCL_NAMESPACE_BEGIN
  26. /* Disable workarounds, seems to be working fine on latest drivers. */
  27. # define CYCLES_DISABLE_DRIVER_WORKAROUNDS
  28. /* Define CYCLES_DISABLE_DRIVER_WORKAROUNDS to disable workaounds for testing */
  29. # ifndef CYCLES_DISABLE_DRIVER_WORKAROUNDS
  30. /* Work around AMD driver hangs by ensuring each command is finished before doing anything else. */
  31. # undef clEnqueueNDRangeKernel
  32. # define clEnqueueNDRangeKernel(a, b, c, d, e, f, g, h, i) \
  33. CLEW_GET_FUN(__clewEnqueueNDRangeKernel)(a, b, c, d, e, f, g, h, i); \
  34. clFinish(a);
  35. # undef clEnqueueWriteBuffer
  36. # define clEnqueueWriteBuffer(a, b, c, d, e, f, g, h, i) \
  37. CLEW_GET_FUN(__clewEnqueueWriteBuffer)(a, b, c, d, e, f, g, h, i); \
  38. clFinish(a);
  39. # undef clEnqueueReadBuffer
  40. # define clEnqueueReadBuffer(a, b, c, d, e, f, g, h, i) \
  41. CLEW_GET_FUN(__clewEnqueueReadBuffer)(a, b, c, d, e, f, g, h, i); \
  42. clFinish(a);
  43. # endif /* CYCLES_DISABLE_DRIVER_WORKAROUNDS */
  44. # define CL_MEM_PTR(p) ((cl_mem)(uintptr_t)(p))
  45. struct OpenCLPlatformDevice {
  46. OpenCLPlatformDevice(cl_platform_id platform_id,
  47. const string &platform_name,
  48. cl_device_id device_id,
  49. cl_device_type device_type,
  50. const string &device_name,
  51. const string &hardware_id,
  52. const string &device_extensions)
  53. : platform_id(platform_id),
  54. platform_name(platform_name),
  55. device_id(device_id),
  56. device_type(device_type),
  57. device_name(device_name),
  58. hardware_id(hardware_id),
  59. device_extensions(device_extensions)
  60. {
  61. }
  62. cl_platform_id platform_id;
  63. string platform_name;
  64. cl_device_id device_id;
  65. cl_device_type device_type;
  66. string device_name;
  67. string hardware_id;
  68. string device_extensions;
  69. };
  70. /* Contains all static OpenCL helper functions. */
  71. class OpenCLInfo {
  72. public:
  73. static cl_device_type device_type();
  74. static bool use_debug();
  75. static bool device_supported(const string &platform_name, const cl_device_id device_id);
  76. static bool platform_version_check(cl_platform_id platform, string *error = NULL);
  77. static bool device_version_check(cl_device_id device, string *error = NULL);
  78. static string get_hardware_id(const string &platform_name, cl_device_id device_id);
  79. static void get_usable_devices(vector<OpenCLPlatformDevice> *usable_devices,
  80. bool force_all = false);
  81. /* ** Some handy shortcuts to low level cl*GetInfo() functions. ** */
  82. /* Platform information. */
  83. static bool get_num_platforms(cl_uint *num_platforms, cl_int *error = NULL);
  84. static cl_uint get_num_platforms();
  85. static bool get_platforms(vector<cl_platform_id> *platform_ids, cl_int *error = NULL);
  86. static vector<cl_platform_id> get_platforms();
  87. static bool get_platform_name(cl_platform_id platform_id, string *platform_name);
  88. static string get_platform_name(cl_platform_id platform_id);
  89. static bool get_num_platform_devices(cl_platform_id platform_id,
  90. cl_device_type device_type,
  91. cl_uint *num_devices,
  92. cl_int *error = NULL);
  93. static cl_uint get_num_platform_devices(cl_platform_id platform_id, cl_device_type device_type);
  94. static bool get_platform_devices(cl_platform_id platform_id,
  95. cl_device_type device_type,
  96. vector<cl_device_id> *device_ids,
  97. cl_int *error = NULL);
  98. static vector<cl_device_id> get_platform_devices(cl_platform_id platform_id,
  99. cl_device_type device_type);
  100. /* Device information. */
  101. static bool get_device_name(cl_device_id device_id, string *device_name, cl_int *error = NULL);
  102. static string get_device_name(cl_device_id device_id);
  103. static bool get_device_extensions(cl_device_id device_id,
  104. string *device_extensions,
  105. cl_int *error = NULL);
  106. static string get_device_extensions(cl_device_id device_id);
  107. static bool get_device_type(cl_device_id device_id,
  108. cl_device_type *device_type,
  109. cl_int *error = NULL);
  110. static cl_device_type get_device_type(cl_device_id device_id);
  111. static bool get_driver_version(cl_device_id device_id,
  112. int *major,
  113. int *minor,
  114. cl_int *error = NULL);
  115. static int mem_sub_ptr_alignment(cl_device_id device_id);
  116. /* Get somewhat more readable device name.
  117. * Main difference is AMD OpenCL here which only gives code name
  118. * for the regular device name. This will give more sane device
  119. * name using some extensions.
  120. */
  121. static string get_readable_device_name(cl_device_id device_id);
  122. };
  123. /* Thread safe cache for contexts and programs.
  124. */
  125. class OpenCLCache {
  126. struct Slot {
  127. struct ProgramEntry {
  128. ProgramEntry();
  129. ProgramEntry(const ProgramEntry &rhs);
  130. ~ProgramEntry();
  131. cl_program program;
  132. thread_mutex *mutex;
  133. };
  134. Slot();
  135. Slot(const Slot &rhs);
  136. ~Slot();
  137. thread_mutex *context_mutex;
  138. cl_context context;
  139. typedef map<ustring, ProgramEntry> EntryMap;
  140. EntryMap programs;
  141. };
  142. /* key is combination of platform ID and device ID */
  143. typedef pair<cl_platform_id, cl_device_id> PlatformDevicePair;
  144. /* map of Slot objects */
  145. typedef map<PlatformDevicePair, Slot> CacheMap;
  146. CacheMap cache;
  147. /* MD5 hash of the kernel source. */
  148. string kernel_md5;
  149. thread_mutex cache_lock;
  150. thread_mutex kernel_md5_lock;
  151. /* lazy instantiate */
  152. static OpenCLCache &global_instance();
  153. public:
  154. enum ProgramName {
  155. OCL_DEV_BASE_PROGRAM,
  156. OCL_DEV_MEGAKERNEL_PROGRAM,
  157. };
  158. /* Lookup context in the cache. If this returns NULL, slot_locker
  159. * will be holding a lock for the cache. slot_locker should refer to a
  160. * default constructed thread_scoped_lock. */
  161. static cl_context get_context(cl_platform_id platform,
  162. cl_device_id device,
  163. thread_scoped_lock &slot_locker);
  164. /* Same as above. */
  165. static cl_program get_program(cl_platform_id platform,
  166. cl_device_id device,
  167. ustring key,
  168. thread_scoped_lock &slot_locker);
  169. /* Store context in the cache. You MUST have tried to get the item before storing to it. */
  170. static void store_context(cl_platform_id platform,
  171. cl_device_id device,
  172. cl_context context,
  173. thread_scoped_lock &slot_locker);
  174. /* Same as above. */
  175. static void store_program(cl_platform_id platform,
  176. cl_device_id device,
  177. cl_program program,
  178. ustring key,
  179. thread_scoped_lock &slot_locker);
  180. static string get_kernel_md5();
  181. };
  182. # define opencl_device_assert(device, stmt) \
  183. { \
  184. cl_int err = stmt; \
  185. \
  186. if (err != CL_SUCCESS) { \
  187. string message = string_printf( \
  188. "OpenCL error: %s in %s (%s:%d)", clewErrorString(err), #stmt, __FILE__, __LINE__); \
  189. if ((device)->error_message() == "") \
  190. (device)->set_error(message); \
  191. fprintf(stderr, "%s\n", message.c_str()); \
  192. } \
  193. } \
  194. (void)0
  195. # define opencl_assert(stmt) \
  196. { \
  197. cl_int err = stmt; \
  198. \
  199. if (err != CL_SUCCESS) { \
  200. string message = string_printf( \
  201. "OpenCL error: %s in %s (%s:%d)", clewErrorString(err), #stmt, __FILE__, __LINE__); \
  202. if (error_msg == "") \
  203. error_msg = message; \
  204. fprintf(stderr, "%s\n", message.c_str()); \
  205. } \
  206. } \
  207. (void)0
  208. class OpenCLDevice : public Device {
  209. public:
  210. DedicatedTaskPool task_pool;
  211. /* Task pool for required kernels (base, AO kernels during foreground rendering) */
  212. TaskPool load_required_kernel_task_pool;
  213. /* Task pool for optional kernels (feature kernels during foreground rendering) */
  214. TaskPool load_kernel_task_pool;
  215. cl_context cxContext;
  216. cl_command_queue cqCommandQueue;
  217. cl_platform_id cpPlatform;
  218. cl_device_id cdDevice;
  219. cl_int ciErr;
  220. int device_num;
  221. bool use_preview_kernels;
  222. class OpenCLProgram {
  223. public:
  224. OpenCLProgram() : loaded(false), needs_compiling(true), program(NULL), device(NULL)
  225. {
  226. }
  227. OpenCLProgram(OpenCLDevice *device,
  228. const string &program_name,
  229. const string &kernel_name,
  230. const string &kernel_build_options,
  231. bool use_stdout = true);
  232. ~OpenCLProgram();
  233. void add_kernel(ustring name);
  234. /* Try to load the program from device cache or disk */
  235. bool load();
  236. /* Compile the kernel (first separate, failback to local) */
  237. void compile();
  238. /* Create the OpenCL kernels after loading or compiling */
  239. void create_kernels();
  240. bool is_loaded() const
  241. {
  242. return loaded;
  243. }
  244. const string &get_log() const
  245. {
  246. return log;
  247. }
  248. void report_error();
  249. /* Wait until this kernel is available to be used
  250. * It will return true when the kernel is available.
  251. * It will return false when the kernel is not available
  252. * or could not be loaded. */
  253. bool wait_for_availability();
  254. cl_kernel operator()();
  255. cl_kernel operator()(ustring name);
  256. void release();
  257. private:
  258. bool build_kernel(const string *debug_src);
  259. /* Build the program by calling the own process.
  260. * This is required for multithreaded OpenCL compilation, since most Frameworks serialize
  261. * build calls internally if they come from the same process.
  262. * If that is not supported, this function just returns false.
  263. */
  264. bool compile_separate(const string &clbin);
  265. /* Build the program by calling OpenCL directly. */
  266. bool compile_kernel(const string *debug_src);
  267. /* Loading and saving the program from/to disk. */
  268. bool load_binary(const string &clbin, const string *debug_src = NULL);
  269. bool save_binary(const string &clbin);
  270. void add_log(const string &msg, bool is_debug);
  271. void add_error(const string &msg);
  272. bool loaded;
  273. bool needs_compiling;
  274. cl_program program;
  275. OpenCLDevice *device;
  276. /* Used for the OpenCLCache key. */
  277. string program_name;
  278. string kernel_file, kernel_build_options, device_md5;
  279. bool use_stdout;
  280. string log, error_msg;
  281. string compile_output;
  282. map<ustring, cl_kernel> kernels;
  283. };
  284. /* Container for all types of split programs. */
  285. class OpenCLSplitPrograms {
  286. public:
  287. OpenCLDevice *device;
  288. OpenCLProgram program_split;
  289. OpenCLProgram program_lamp_emission;
  290. OpenCLProgram program_do_volume;
  291. OpenCLProgram program_indirect_background;
  292. OpenCLProgram program_shader_eval;
  293. OpenCLProgram program_holdout_emission_blurring_pathtermination_ao;
  294. OpenCLProgram program_subsurface_scatter;
  295. OpenCLProgram program_direct_lighting;
  296. OpenCLProgram program_shadow_blocked_ao;
  297. OpenCLProgram program_shadow_blocked_dl;
  298. OpenCLSplitPrograms(OpenCLDevice *device);
  299. ~OpenCLSplitPrograms();
  300. /* Load the kernels and put the created kernels in the given
  301. * `programs` parameter. */
  302. void load_kernels(vector<OpenCLProgram *> &programs,
  303. const DeviceRequestedFeatures &requested_features,
  304. bool is_preview = false);
  305. };
  306. DeviceSplitKernel *split_kernel;
  307. OpenCLProgram base_program;
  308. OpenCLProgram bake_program;
  309. OpenCLProgram displace_program;
  310. OpenCLProgram background_program;
  311. OpenCLProgram denoising_program;
  312. OpenCLSplitPrograms kernel_programs;
  313. OpenCLSplitPrograms preview_programs;
  314. typedef map<string, device_vector<uchar> *> ConstMemMap;
  315. typedef map<string, device_ptr> MemMap;
  316. ConstMemMap const_mem_map;
  317. MemMap mem_map;
  318. device_ptr null_mem;
  319. bool device_initialized;
  320. string platform_name;
  321. string device_name;
  322. bool opencl_error(cl_int err);
  323. void opencl_error(const string &message);
  324. void opencl_assert_err(cl_int err, const char *where);
  325. OpenCLDevice(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background);
  326. ~OpenCLDevice();
  327. static void CL_CALLBACK context_notify_callback(const char *err_info,
  328. const void * /*private_info*/,
  329. size_t /*cb*/,
  330. void *user_data);
  331. bool opencl_version_check();
  332. OpenCLSplitPrograms *get_split_programs();
  333. string device_md5_hash(string kernel_custom_build_options = "");
  334. bool load_kernels(const DeviceRequestedFeatures &requested_features);
  335. void load_required_kernels(const DeviceRequestedFeatures &requested_features);
  336. void load_preview_kernels();
  337. bool wait_for_availability(const DeviceRequestedFeatures &requested_features);
  338. DeviceKernelStatus get_active_kernel_switch_state();
  339. /* Get the name of the opencl program for the given kernel */
  340. const string get_opencl_program_name(const string &kernel_name);
  341. /* Get the program file name to compile (*.cl) for the given kernel */
  342. const string get_opencl_program_filename(const string &kernel_name);
  343. string get_build_options(const DeviceRequestedFeatures &requested_features,
  344. const string &opencl_program_name,
  345. bool preview_kernel = false);
  346. /* Enable the default features to reduce recompilation events */
  347. void enable_default_features(DeviceRequestedFeatures &features);
  348. void mem_alloc(device_memory &mem);
  349. void mem_copy_to(device_memory &mem);
  350. void mem_copy_from(device_memory &mem, int y, int w, int h, int elem);
  351. void mem_zero(device_memory &mem);
  352. void mem_free(device_memory &mem);
  353. int mem_sub_ptr_alignment();
  354. void const_copy_to(const char *name, void *host, size_t size);
  355. void tex_alloc(device_memory &mem);
  356. void tex_free(device_memory &mem);
  357. size_t global_size_round_up(int group_size, int global_size);
  358. void enqueue_kernel(cl_kernel kernel,
  359. size_t w,
  360. size_t h,
  361. bool x_workgroups = false,
  362. size_t max_workgroup_size = -1);
  363. void set_kernel_arg_mem(cl_kernel kernel, cl_uint *narg, const char *name);
  364. void set_kernel_arg_buffers(cl_kernel kernel, cl_uint *narg);
  365. void film_convert(DeviceTask &task,
  366. device_ptr buffer,
  367. device_ptr rgba_byte,
  368. device_ptr rgba_half);
  369. void shader(DeviceTask &task);
  370. void denoise(RenderTile &tile, DenoisingTask &denoising);
  371. class OpenCLDeviceTask : public DeviceTask {
  372. public:
  373. OpenCLDeviceTask(OpenCLDevice *device, DeviceTask &task) : DeviceTask(task)
  374. {
  375. run = function_bind(&OpenCLDevice::thread_run, device, this);
  376. }
  377. };
  378. int get_split_task_count(DeviceTask & /*task*/)
  379. {
  380. return 1;
  381. }
  382. void task_add(DeviceTask &task)
  383. {
  384. task_pool.push(new OpenCLDeviceTask(this, task));
  385. }
  386. void task_wait()
  387. {
  388. task_pool.wait();
  389. }
  390. void task_cancel()
  391. {
  392. task_pool.cancel();
  393. }
  394. void thread_run(DeviceTask *task);
  395. virtual BVHLayoutMask get_bvh_layout_mask() const
  396. {
  397. return BVH_LAYOUT_BVH2;
  398. }
  399. virtual bool show_samples() const
  400. {
  401. return true;
  402. }
  403. protected:
  404. string kernel_build_options(const string *debug_src = NULL);
  405. void mem_zero_kernel(device_ptr ptr, size_t size);
  406. bool denoising_non_local_means(device_ptr image_ptr,
  407. device_ptr guide_ptr,
  408. device_ptr variance_ptr,
  409. device_ptr out_ptr,
  410. DenoisingTask *task);
  411. bool denoising_construct_transform(DenoisingTask *task);
  412. bool denoising_accumulate(device_ptr color_ptr,
  413. device_ptr color_variance_ptr,
  414. device_ptr scale_ptr,
  415. int frame,
  416. DenoisingTask *task);
  417. bool denoising_solve(device_ptr output_ptr, DenoisingTask *task);
  418. bool denoising_combine_halves(device_ptr a_ptr,
  419. device_ptr b_ptr,
  420. device_ptr mean_ptr,
  421. device_ptr variance_ptr,
  422. int r,
  423. int4 rect,
  424. DenoisingTask *task);
  425. bool denoising_divide_shadow(device_ptr a_ptr,
  426. device_ptr b_ptr,
  427. device_ptr sample_variance_ptr,
  428. device_ptr sv_variance_ptr,
  429. device_ptr buffer_variance_ptr,
  430. DenoisingTask *task);
  431. bool denoising_get_feature(int mean_offset,
  432. int variance_offset,
  433. device_ptr mean_ptr,
  434. device_ptr variance_ptr,
  435. float scale,
  436. DenoisingTask *task);
  437. bool denoising_write_feature(int to_offset,
  438. device_ptr from_ptr,
  439. device_ptr buffer_ptr,
  440. DenoisingTask *task);
  441. bool denoising_detect_outliers(device_ptr image_ptr,
  442. device_ptr variance_ptr,
  443. device_ptr depth_ptr,
  444. device_ptr output_ptr,
  445. DenoisingTask *task);
  446. device_ptr mem_alloc_sub_ptr(device_memory &mem, int offset, int size);
  447. void mem_free_sub_ptr(device_ptr ptr);
  448. class ArgumentWrapper {
  449. public:
  450. ArgumentWrapper() : size(0), pointer(NULL)
  451. {
  452. }
  453. ArgumentWrapper(device_memory &argument)
  454. : size(sizeof(void *)), pointer((void *)(&argument.device_pointer))
  455. {
  456. }
  457. template<typename T>
  458. ArgumentWrapper(device_vector<T> &argument)
  459. : size(sizeof(void *)), pointer((void *)(&argument.device_pointer))
  460. {
  461. }
  462. template<typename T>
  463. ArgumentWrapper(device_only_memory<T> &argument)
  464. : size(sizeof(void *)), pointer((void *)(&argument.device_pointer))
  465. {
  466. }
  467. template<typename T> ArgumentWrapper(T &argument) : size(sizeof(argument)), pointer(&argument)
  468. {
  469. }
  470. ArgumentWrapper(int argument) : size(sizeof(int)), int_value(argument), pointer(&int_value)
  471. {
  472. }
  473. ArgumentWrapper(float argument)
  474. : size(sizeof(float)), float_value(argument), pointer(&float_value)
  475. {
  476. }
  477. size_t size;
  478. int int_value;
  479. float float_value;
  480. void *pointer;
  481. };
  482. /* TODO(sergey): In the future we can use variadic templates, once
  483. * C++0x is allowed. Should allow to clean this up a bit.
  484. */
  485. int kernel_set_args(cl_kernel kernel,
  486. int start_argument_index,
  487. const ArgumentWrapper &arg1 = ArgumentWrapper(),
  488. const ArgumentWrapper &arg2 = ArgumentWrapper(),
  489. const ArgumentWrapper &arg3 = ArgumentWrapper(),
  490. const ArgumentWrapper &arg4 = ArgumentWrapper(),
  491. const ArgumentWrapper &arg5 = ArgumentWrapper(),
  492. const ArgumentWrapper &arg6 = ArgumentWrapper(),
  493. const ArgumentWrapper &arg7 = ArgumentWrapper(),
  494. const ArgumentWrapper &arg8 = ArgumentWrapper(),
  495. const ArgumentWrapper &arg9 = ArgumentWrapper(),
  496. const ArgumentWrapper &arg10 = ArgumentWrapper(),
  497. const ArgumentWrapper &arg11 = ArgumentWrapper(),
  498. const ArgumentWrapper &arg12 = ArgumentWrapper(),
  499. const ArgumentWrapper &arg13 = ArgumentWrapper(),
  500. const ArgumentWrapper &arg14 = ArgumentWrapper(),
  501. const ArgumentWrapper &arg15 = ArgumentWrapper(),
  502. const ArgumentWrapper &arg16 = ArgumentWrapper(),
  503. const ArgumentWrapper &arg17 = ArgumentWrapper(),
  504. const ArgumentWrapper &arg18 = ArgumentWrapper(),
  505. const ArgumentWrapper &arg19 = ArgumentWrapper(),
  506. const ArgumentWrapper &arg20 = ArgumentWrapper(),
  507. const ArgumentWrapper &arg21 = ArgumentWrapper(),
  508. const ArgumentWrapper &arg22 = ArgumentWrapper(),
  509. const ArgumentWrapper &arg23 = ArgumentWrapper(),
  510. const ArgumentWrapper &arg24 = ArgumentWrapper(),
  511. const ArgumentWrapper &arg25 = ArgumentWrapper(),
  512. const ArgumentWrapper &arg26 = ArgumentWrapper(),
  513. const ArgumentWrapper &arg27 = ArgumentWrapper(),
  514. const ArgumentWrapper &arg28 = ArgumentWrapper(),
  515. const ArgumentWrapper &arg29 = ArgumentWrapper(),
  516. const ArgumentWrapper &arg30 = ArgumentWrapper(),
  517. const ArgumentWrapper &arg31 = ArgumentWrapper(),
  518. const ArgumentWrapper &arg32 = ArgumentWrapper(),
  519. const ArgumentWrapper &arg33 = ArgumentWrapper());
  520. void release_kernel_safe(cl_kernel kernel);
  521. void release_mem_object_safe(cl_mem mem);
  522. void release_program_safe(cl_program program);
  523. /* ** Those guys are for workign around some compiler-specific bugs ** */
  524. cl_program load_cached_kernel(ustring key, thread_scoped_lock &cache_locker);
  525. void store_cached_kernel(cl_program program, ustring key, thread_scoped_lock &cache_locker);
  526. private:
  527. MemoryManager memory_manager;
  528. friend class MemoryManager;
  529. static_assert_align(TextureInfo, 16);
  530. device_vector<TextureInfo> texture_info;
  531. typedef map<string, device_memory *> TexturesMap;
  532. TexturesMap textures;
  533. bool textures_need_update;
  534. protected:
  535. void flush_texture_buffers();
  536. friend class OpenCLSplitKernel;
  537. friend class OpenCLSplitKernelFunction;
  538. };
  539. Device *opencl_create_split_device(DeviceInfo &info,
  540. Stats &stats,
  541. Profiler &profiler,
  542. bool background);
  543. CCL_NAMESPACE_END
  544. #endif