device.cpp 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622
  1. /*
  2. * Copyright 2011-2013 Blender Foundation
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include <stdlib.h>
  17. #include <string.h>
  18. #include "device/device.h"
  19. #include "device/device_intern.h"
  20. #include "util/util_foreach.h"
  21. #include "util/util_half.h"
  22. #include "util/util_logging.h"
  23. #include "util/util_math.h"
  24. #include "util/util_opengl.h"
  25. #include "util/util_time.h"
  26. #include "util/util_system.h"
  27. #include "util/util_types.h"
  28. #include "util/util_vector.h"
  29. #include "util/util_string.h"
  30. CCL_NAMESPACE_BEGIN
  31. bool Device::need_types_update = true;
  32. bool Device::need_devices_update = true;
  33. thread_mutex Device::device_mutex;
  34. vector<DeviceInfo> Device::opencl_devices;
  35. vector<DeviceInfo> Device::cuda_devices;
  36. vector<DeviceInfo> Device::cpu_devices;
  37. vector<DeviceInfo> Device::network_devices;
  38. uint Device::devices_initialized_mask = 0;
  39. /* Device Requested Features */
  40. std::ostream &operator<<(std::ostream &os, const DeviceRequestedFeatures &requested_features)
  41. {
  42. os << "Experimental features: " << (requested_features.experimental ? "On" : "Off") << std::endl;
  43. os << "Max nodes group: " << requested_features.max_nodes_group << std::endl;
  44. /* TODO(sergey): Decode bitflag into list of names. */
  45. os << "Nodes features: " << requested_features.nodes_features << std::endl;
  46. os << "Use Hair: " << string_from_bool(requested_features.use_hair) << std::endl;
  47. os << "Use Object Motion: " << string_from_bool(requested_features.use_object_motion)
  48. << std::endl;
  49. os << "Use Camera Motion: " << string_from_bool(requested_features.use_camera_motion)
  50. << std::endl;
  51. os << "Use Baking: " << string_from_bool(requested_features.use_baking) << std::endl;
  52. os << "Use Subsurface: " << string_from_bool(requested_features.use_subsurface) << std::endl;
  53. os << "Use Volume: " << string_from_bool(requested_features.use_volume) << std::endl;
  54. os << "Use Branched Integrator: " << string_from_bool(requested_features.use_integrator_branched)
  55. << std::endl;
  56. os << "Use Patch Evaluation: " << string_from_bool(requested_features.use_patch_evaluation)
  57. << std::endl;
  58. os << "Use Transparent Shadows: " << string_from_bool(requested_features.use_transparent)
  59. << std::endl;
  60. os << "Use Principled BSDF: " << string_from_bool(requested_features.use_principled)
  61. << std::endl;
  62. os << "Use Denoising: " << string_from_bool(requested_features.use_denoising) << std::endl;
  63. os << "Use Displacement: " << string_from_bool(requested_features.use_true_displacement)
  64. << std::endl;
  65. os << "Use Background Light: " << string_from_bool(requested_features.use_background_light)
  66. << std::endl;
  67. return os;
  68. }
  69. /* Device */
  70. Device::~Device()
  71. {
  72. if (!background) {
  73. if (vertex_buffer != 0) {
  74. glDeleteBuffers(1, &vertex_buffer);
  75. }
  76. if (fallback_shader_program != 0) {
  77. glDeleteProgram(fallback_shader_program);
  78. }
  79. }
  80. }
  81. /* TODO move shaders to standalone .glsl file. */
  82. const char *FALLBACK_VERTEX_SHADER =
  83. "#version 330\n"
  84. "uniform vec2 fullscreen;\n"
  85. "in vec2 texCoord;\n"
  86. "in vec2 pos;\n"
  87. "out vec2 texCoord_interp;\n"
  88. "\n"
  89. "vec2 normalize_coordinates()\n"
  90. "{\n"
  91. " return (vec2(2.0) * (pos / fullscreen)) - vec2(1.0);\n"
  92. "}\n"
  93. "\n"
  94. "void main()\n"
  95. "{\n"
  96. " gl_Position = vec4(normalize_coordinates(), 0.0, 1.0);\n"
  97. " texCoord_interp = texCoord;\n"
  98. "}\n\0";
  99. const char *FALLBACK_FRAGMENT_SHADER =
  100. "#version 330\n"
  101. "uniform sampler2D image_texture;\n"
  102. "in vec2 texCoord_interp;\n"
  103. "out vec4 fragColor;\n"
  104. "\n"
  105. "void main()\n"
  106. "{\n"
  107. " fragColor = texture(image_texture, texCoord_interp);\n"
  108. "}\n\0";
  109. static void shader_print_errors(const char *task, const char *log, const char *code)
  110. {
  111. LOG(ERROR) << "Shader: " << task << " error:";
  112. LOG(ERROR) << "===== shader string ====";
  113. stringstream stream(code);
  114. string partial;
  115. int line = 1;
  116. while (getline(stream, partial, '\n')) {
  117. if (line < 10) {
  118. LOG(ERROR) << " " << line << " " << partial;
  119. }
  120. else {
  121. LOG(ERROR) << line << " " << partial;
  122. }
  123. line++;
  124. }
  125. LOG(ERROR) << log;
  126. }
  127. static int bind_fallback_shader(void)
  128. {
  129. GLint status;
  130. GLchar log[5000];
  131. GLsizei length = 0;
  132. GLuint program = 0;
  133. struct Shader {
  134. const char *source;
  135. GLenum type;
  136. } shaders[2] = {{FALLBACK_VERTEX_SHADER, GL_VERTEX_SHADER},
  137. {FALLBACK_FRAGMENT_SHADER, GL_FRAGMENT_SHADER}};
  138. program = glCreateProgram();
  139. for (int i = 0; i < 2; i++) {
  140. GLuint shader = glCreateShader(shaders[i].type);
  141. string source_str = shaders[i].source;
  142. const char *c_str = source_str.c_str();
  143. glShaderSource(shader, 1, &c_str, NULL);
  144. glCompileShader(shader);
  145. glGetShaderiv(shader, GL_COMPILE_STATUS, &status);
  146. if (!status) {
  147. glGetShaderInfoLog(shader, sizeof(log), &length, log);
  148. shader_print_errors("compile", log, c_str);
  149. return 0;
  150. }
  151. glAttachShader(program, shader);
  152. }
  153. /* Link output. */
  154. glBindFragDataLocation(program, 0, "fragColor");
  155. /* Link and error check. */
  156. glLinkProgram(program);
  157. glGetProgramiv(program, GL_LINK_STATUS, &status);
  158. if (!status) {
  159. glGetShaderInfoLog(program, sizeof(log), &length, log);
  160. shader_print_errors("linking", log, FALLBACK_VERTEX_SHADER);
  161. shader_print_errors("linking", log, FALLBACK_FRAGMENT_SHADER);
  162. return 0;
  163. }
  164. return program;
  165. }
  166. bool Device::bind_fallback_display_space_shader(const float width, const float height)
  167. {
  168. if (fallback_status == FALLBACK_SHADER_STATUS_ERROR) {
  169. return false;
  170. }
  171. if (fallback_status == FALLBACK_SHADER_STATUS_NONE) {
  172. fallback_shader_program = bind_fallback_shader();
  173. fallback_status = FALLBACK_SHADER_STATUS_ERROR;
  174. if (fallback_shader_program == 0) {
  175. return false;
  176. }
  177. glUseProgram(fallback_shader_program);
  178. image_texture_location = glGetUniformLocation(fallback_shader_program, "image_texture");
  179. if (image_texture_location < 0) {
  180. LOG(ERROR) << "Shader doesn't containt the 'image_texture' uniform.";
  181. return false;
  182. }
  183. fullscreen_location = glGetUniformLocation(fallback_shader_program, "fullscreen");
  184. if (fullscreen_location < 0) {
  185. LOG(ERROR) << "Shader doesn't containt the 'fullscreen' uniform.";
  186. return false;
  187. }
  188. fallback_status = FALLBACK_SHADER_STATUS_SUCCESS;
  189. }
  190. /* Run this every time. */
  191. glUseProgram(fallback_shader_program);
  192. glUniform1i(image_texture_location, 0);
  193. glUniform2f(fullscreen_location, width, height);
  194. return true;
  195. }
  196. void Device::draw_pixels(device_memory &rgba,
  197. int y,
  198. int w,
  199. int h,
  200. int width,
  201. int height,
  202. int dx,
  203. int dy,
  204. int dw,
  205. int dh,
  206. bool transparent,
  207. const DeviceDrawParams &draw_params)
  208. {
  209. const bool use_fallback_shader = (draw_params.bind_display_space_shader_cb == NULL);
  210. assert(rgba.type == MEM_PIXELS);
  211. mem_copy_from(rgba, y, w, h, rgba.memory_elements_size(1));
  212. GLuint texid;
  213. glActiveTexture(GL_TEXTURE0);
  214. glGenTextures(1, &texid);
  215. glBindTexture(GL_TEXTURE_2D, texid);
  216. if (rgba.data_type == TYPE_HALF) {
  217. GLhalf *data_pointer = (GLhalf *)rgba.host_pointer;
  218. data_pointer += 4 * y * w;
  219. glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA16F, w, h, 0, GL_RGBA, GL_HALF_FLOAT, data_pointer);
  220. }
  221. else {
  222. uint8_t *data_pointer = (uint8_t *)rgba.host_pointer;
  223. data_pointer += 4 * y * w;
  224. glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, w, h, 0, GL_RGBA, GL_UNSIGNED_BYTE, data_pointer);
  225. }
  226. glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
  227. glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
  228. if (transparent) {
  229. glEnable(GL_BLEND);
  230. glBlendFunc(GL_ONE, GL_ONE_MINUS_SRC_ALPHA);
  231. }
  232. GLint shader_program;
  233. if (use_fallback_shader) {
  234. if (!bind_fallback_display_space_shader(dw, dh)) {
  235. return;
  236. }
  237. shader_program = fallback_shader_program;
  238. }
  239. else {
  240. draw_params.bind_display_space_shader_cb();
  241. glGetIntegerv(GL_CURRENT_PROGRAM, &shader_program);
  242. }
  243. if (!vertex_buffer) {
  244. glGenBuffers(1, &vertex_buffer);
  245. }
  246. glBindBuffer(GL_ARRAY_BUFFER, vertex_buffer);
  247. /* invalidate old contents - avoids stalling if buffer is still waiting in queue to be rendered
  248. */
  249. glBufferData(GL_ARRAY_BUFFER, 16 * sizeof(float), NULL, GL_STREAM_DRAW);
  250. float *vpointer = (float *)glMapBuffer(GL_ARRAY_BUFFER, GL_WRITE_ONLY);
  251. if (vpointer) {
  252. /* texture coordinate - vertex pair */
  253. vpointer[0] = 0.0f;
  254. vpointer[1] = 0.0f;
  255. vpointer[2] = dx;
  256. vpointer[3] = dy;
  257. vpointer[4] = 1.0f;
  258. vpointer[5] = 0.0f;
  259. vpointer[6] = (float)width + dx;
  260. vpointer[7] = dy;
  261. vpointer[8] = 1.0f;
  262. vpointer[9] = 1.0f;
  263. vpointer[10] = (float)width + dx;
  264. vpointer[11] = (float)height + dy;
  265. vpointer[12] = 0.0f;
  266. vpointer[13] = 1.0f;
  267. vpointer[14] = dx;
  268. vpointer[15] = (float)height + dy;
  269. if (vertex_buffer) {
  270. glUnmapBuffer(GL_ARRAY_BUFFER);
  271. }
  272. }
  273. GLuint vertex_array_object;
  274. GLuint position_attribute, texcoord_attribute;
  275. glGenVertexArrays(1, &vertex_array_object);
  276. glBindVertexArray(vertex_array_object);
  277. texcoord_attribute = glGetAttribLocation(shader_program, "texCoord");
  278. position_attribute = glGetAttribLocation(shader_program, "pos");
  279. glEnableVertexAttribArray(texcoord_attribute);
  280. glEnableVertexAttribArray(position_attribute);
  281. glVertexAttribPointer(
  282. texcoord_attribute, 2, GL_FLOAT, GL_FALSE, 4 * sizeof(float), (const GLvoid *)0);
  283. glVertexAttribPointer(position_attribute,
  284. 2,
  285. GL_FLOAT,
  286. GL_FALSE,
  287. 4 * sizeof(float),
  288. (const GLvoid *)(sizeof(float) * 2));
  289. glDrawArrays(GL_TRIANGLE_FAN, 0, 4);
  290. if (vertex_buffer) {
  291. glBindBuffer(GL_ARRAY_BUFFER, 0);
  292. }
  293. if (use_fallback_shader) {
  294. glUseProgram(0);
  295. }
  296. else {
  297. draw_params.unbind_display_space_shader_cb();
  298. }
  299. glDeleteVertexArrays(1, &vertex_array_object);
  300. glBindTexture(GL_TEXTURE_2D, 0);
  301. glDeleteTextures(1, &texid);
  302. if (transparent) {
  303. glDisable(GL_BLEND);
  304. }
  305. }
  306. Device *Device::create(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background)
  307. {
  308. Device *device;
  309. switch (info.type) {
  310. case DEVICE_CPU:
  311. device = device_cpu_create(info, stats, profiler, background);
  312. break;
  313. #ifdef WITH_CUDA
  314. case DEVICE_CUDA:
  315. if (device_cuda_init())
  316. device = device_cuda_create(info, stats, profiler, background);
  317. else
  318. device = NULL;
  319. break;
  320. #endif
  321. #ifdef WITH_MULTI
  322. case DEVICE_MULTI:
  323. device = device_multi_create(info, stats, profiler, background);
  324. break;
  325. #endif
  326. #ifdef WITH_NETWORK
  327. case DEVICE_NETWORK:
  328. device = device_network_create(info, stats, profiler, "127.0.0.1");
  329. break;
  330. #endif
  331. #ifdef WITH_OPENCL
  332. case DEVICE_OPENCL:
  333. if (device_opencl_init())
  334. device = device_opencl_create(info, stats, profiler, background);
  335. else
  336. device = NULL;
  337. break;
  338. #endif
  339. default:
  340. return NULL;
  341. }
  342. return device;
  343. }
  344. DeviceType Device::type_from_string(const char *name)
  345. {
  346. if (strcmp(name, "CPU") == 0)
  347. return DEVICE_CPU;
  348. else if (strcmp(name, "CUDA") == 0)
  349. return DEVICE_CUDA;
  350. else if (strcmp(name, "OPENCL") == 0)
  351. return DEVICE_OPENCL;
  352. else if (strcmp(name, "NETWORK") == 0)
  353. return DEVICE_NETWORK;
  354. else if (strcmp(name, "MULTI") == 0)
  355. return DEVICE_MULTI;
  356. return DEVICE_NONE;
  357. }
  358. string Device::string_from_type(DeviceType type)
  359. {
  360. if (type == DEVICE_CPU)
  361. return "CPU";
  362. else if (type == DEVICE_CUDA)
  363. return "CUDA";
  364. else if (type == DEVICE_OPENCL)
  365. return "OPENCL";
  366. else if (type == DEVICE_NETWORK)
  367. return "NETWORK";
  368. else if (type == DEVICE_MULTI)
  369. return "MULTI";
  370. return "";
  371. }
  372. vector<DeviceType> Device::available_types()
  373. {
  374. vector<DeviceType> types;
  375. types.push_back(DEVICE_CPU);
  376. #ifdef WITH_CUDA
  377. types.push_back(DEVICE_CUDA);
  378. #endif
  379. #ifdef WITH_OPENCL
  380. types.push_back(DEVICE_OPENCL);
  381. #endif
  382. #ifdef WITH_NETWORK
  383. types.push_back(DEVICE_NETWORK);
  384. #endif
  385. return types;
  386. }
  387. vector<DeviceInfo> Device::available_devices(uint mask)
  388. {
  389. /* Lazy initialize devices. On some platforms OpenCL or CUDA drivers can
  390. * be broken and cause crashes when only trying to get device info, so
  391. * we don't want to do any initialization until the user chooses to. */
  392. thread_scoped_lock lock(device_mutex);
  393. vector<DeviceInfo> devices;
  394. #ifdef WITH_OPENCL
  395. if (mask & DEVICE_MASK_OPENCL) {
  396. if (!(devices_initialized_mask & DEVICE_MASK_OPENCL)) {
  397. if (device_opencl_init()) {
  398. device_opencl_info(opencl_devices);
  399. }
  400. devices_initialized_mask |= DEVICE_MASK_OPENCL;
  401. }
  402. foreach (DeviceInfo &info, opencl_devices) {
  403. devices.push_back(info);
  404. }
  405. }
  406. #endif
  407. #ifdef WITH_CUDA
  408. if (mask & DEVICE_MASK_CUDA) {
  409. if (!(devices_initialized_mask & DEVICE_MASK_CUDA)) {
  410. if (device_cuda_init()) {
  411. device_cuda_info(cuda_devices);
  412. }
  413. devices_initialized_mask |= DEVICE_MASK_CUDA;
  414. }
  415. foreach (DeviceInfo &info, cuda_devices) {
  416. devices.push_back(info);
  417. }
  418. }
  419. #endif
  420. if (mask & DEVICE_MASK_CPU) {
  421. if (!(devices_initialized_mask & DEVICE_MASK_CPU)) {
  422. device_cpu_info(cpu_devices);
  423. devices_initialized_mask |= DEVICE_MASK_CPU;
  424. }
  425. foreach (DeviceInfo &info, cpu_devices) {
  426. devices.push_back(info);
  427. }
  428. }
  429. #ifdef WITH_NETWORK
  430. if (mask & DEVICE_MASK_NETWORK) {
  431. if (!(devices_initialized_mask & DEVICE_MASK_NETWORK)) {
  432. device_network_info(network_devices);
  433. devices_initialized_mask |= DEVICE_MASK_NETWORK;
  434. }
  435. foreach (DeviceInfo &info, network_devices) {
  436. devices.push_back(info);
  437. }
  438. }
  439. #endif
  440. return devices;
  441. }
  442. string Device::device_capabilities(uint mask)
  443. {
  444. thread_scoped_lock lock(device_mutex);
  445. string capabilities = "";
  446. if (mask & DEVICE_MASK_CPU) {
  447. capabilities += "\nCPU device capabilities: ";
  448. capabilities += device_cpu_capabilities() + "\n";
  449. }
  450. #ifdef WITH_OPENCL
  451. if (mask & DEVICE_MASK_OPENCL) {
  452. if (device_opencl_init()) {
  453. capabilities += "\nOpenCL device capabilities:\n";
  454. capabilities += device_opencl_capabilities();
  455. }
  456. }
  457. #endif
  458. #ifdef WITH_CUDA
  459. if (mask & DEVICE_MASK_CUDA) {
  460. if (device_cuda_init()) {
  461. capabilities += "\nCUDA device capabilities:\n";
  462. capabilities += device_cuda_capabilities();
  463. }
  464. }
  465. #endif
  466. return capabilities;
  467. }
  468. DeviceInfo Device::get_multi_device(const vector<DeviceInfo> &subdevices,
  469. int threads,
  470. bool background)
  471. {
  472. assert(subdevices.size() > 0);
  473. if (subdevices.size() == 1) {
  474. /* No multi device needed. */
  475. return subdevices.front();
  476. }
  477. DeviceInfo info;
  478. info.type = DEVICE_MULTI;
  479. info.id = "MULTI";
  480. info.description = "Multi Device";
  481. info.num = 0;
  482. info.has_half_images = true;
  483. info.has_volume_decoupled = true;
  484. info.has_osl = true;
  485. info.has_profiling = true;
  486. foreach (const DeviceInfo &device, subdevices) {
  487. /* Ensure CPU device does not slow down GPU. */
  488. if (device.type == DEVICE_CPU && subdevices.size() > 1) {
  489. if (background) {
  490. int orig_cpu_threads = (threads) ? threads : system_cpu_thread_count();
  491. int cpu_threads = max(orig_cpu_threads - (subdevices.size() - 1), 0);
  492. VLOG(1) << "CPU render threads reduced from " << orig_cpu_threads << " to " << cpu_threads
  493. << ", to dedicate to GPU.";
  494. if (cpu_threads >= 1) {
  495. DeviceInfo cpu_device = device;
  496. cpu_device.cpu_threads = cpu_threads;
  497. info.multi_devices.push_back(cpu_device);
  498. }
  499. else {
  500. continue;
  501. }
  502. }
  503. else {
  504. VLOG(1) << "CPU render threads disabled for interactive render.";
  505. continue;
  506. }
  507. }
  508. else {
  509. info.multi_devices.push_back(device);
  510. }
  511. /* Accumulate device info. */
  512. info.has_half_images &= device.has_half_images;
  513. info.has_volume_decoupled &= device.has_volume_decoupled;
  514. info.has_osl &= device.has_osl;
  515. info.has_profiling &= device.has_profiling;
  516. }
  517. return info;
  518. }
  519. void Device::tag_update()
  520. {
  521. free_memory();
  522. }
  523. void Device::free_memory()
  524. {
  525. devices_initialized_mask = 0;
  526. cuda_devices.free_memory();
  527. opencl_devices.free_memory();
  528. cpu_devices.free_memory();
  529. network_devices.free_memory();
  530. }
  531. CCL_NAMESPACE_END