device_split_kernel.h 5.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142
  1. /*
  2. * Copyright 2011-2016 Blender Foundation
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #ifndef __DEVICE_SPLIT_KERNEL_H__
  17. #define __DEVICE_SPLIT_KERNEL_H__
  18. #include "device/device.h"
  19. #include "render/buffers.h"
  20. CCL_NAMESPACE_BEGIN
  21. /* When allocate global memory in chunks. We may not be able to
  22. * allocate exactly "CL_DEVICE_MAX_MEM_ALLOC_SIZE" bytes in chunks;
  23. * Since some bytes may be needed for aligning chunks of memory;
  24. * This is the amount of memory that we dedicate for that purpose.
  25. */
  26. #define DATA_ALLOCATION_MEM_FACTOR 5000000 // 5MB
  27. /* Types used for split kernel */
  28. class KernelDimensions {
  29. public:
  30. size_t global_size[2];
  31. size_t local_size[2];
  32. KernelDimensions(size_t global_size_[2], size_t local_size_[2])
  33. {
  34. memcpy(global_size, global_size_, sizeof(global_size));
  35. memcpy(local_size, local_size_, sizeof(local_size));
  36. }
  37. };
  38. class SplitKernelFunction {
  39. public:
  40. virtual ~SplitKernelFunction()
  41. {
  42. }
  43. /* enqueue the kernel, returns false if there is an error */
  44. virtual bool enqueue(const KernelDimensions &dim, device_memory &kg, device_memory &data) = 0;
  45. };
  46. class DeviceSplitKernel {
  47. private:
  48. Device *device;
  49. SplitKernelFunction *kernel_path_init;
  50. SplitKernelFunction *kernel_scene_intersect;
  51. SplitKernelFunction *kernel_lamp_emission;
  52. SplitKernelFunction *kernel_do_volume;
  53. SplitKernelFunction *kernel_queue_enqueue;
  54. SplitKernelFunction *kernel_indirect_background;
  55. SplitKernelFunction *kernel_shader_setup;
  56. SplitKernelFunction *kernel_shader_sort;
  57. SplitKernelFunction *kernel_shader_eval;
  58. SplitKernelFunction *kernel_holdout_emission_blurring_pathtermination_ao;
  59. SplitKernelFunction *kernel_subsurface_scatter;
  60. SplitKernelFunction *kernel_direct_lighting;
  61. SplitKernelFunction *kernel_shadow_blocked_ao;
  62. SplitKernelFunction *kernel_shadow_blocked_dl;
  63. SplitKernelFunction *kernel_enqueue_inactive;
  64. SplitKernelFunction *kernel_next_iteration_setup;
  65. SplitKernelFunction *kernel_indirect_subsurface;
  66. SplitKernelFunction *kernel_buffer_update;
  67. /* Global memory variables [porting]; These memory is used for
  68. * co-operation between different kernels; Data written by one
  69. * kernel will be available to another kernel via this global
  70. * memory.
  71. */
  72. device_only_memory<uchar> split_data;
  73. device_vector<uchar> ray_state;
  74. device_only_memory<int>
  75. queue_index; /* Array of size num_queues that tracks the size of each queue. */
  76. /* Flag to make sceneintersect and lampemission kernel use queues. */
  77. device_only_memory<char> use_queues_flag;
  78. /* Approximate time it takes to complete one sample */
  79. double avg_time_per_sample;
  80. /* Work pool with respect to each work group. */
  81. device_only_memory<unsigned int> work_pool_wgs;
  82. /* Cached kernel-dependent data, initialized once. */
  83. bool kernel_data_initialized;
  84. size_t local_size[2];
  85. size_t global_size[2];
  86. public:
  87. explicit DeviceSplitKernel(Device *device);
  88. virtual ~DeviceSplitKernel();
  89. bool load_kernels(const DeviceRequestedFeatures &requested_features);
  90. bool path_trace(DeviceTask *task,
  91. RenderTile &rtile,
  92. device_memory &kgbuffer,
  93. device_memory &kernel_data);
  94. virtual uint64_t state_buffer_size(device_memory &kg,
  95. device_memory &data,
  96. size_t num_threads) = 0;
  97. size_t max_elements_for_max_buffer_size(device_memory &kg,
  98. device_memory &data,
  99. uint64_t max_buffer_size);
  100. virtual bool enqueue_split_kernel_data_init(const KernelDimensions &dim,
  101. RenderTile &rtile,
  102. int num_global_elements,
  103. device_memory &kernel_globals,
  104. device_memory &kernel_data_,
  105. device_memory &split_data,
  106. device_memory &ray_state,
  107. device_memory &queue_index,
  108. device_memory &use_queues_flag,
  109. device_memory &work_pool_wgs) = 0;
  110. virtual SplitKernelFunction *get_split_kernel_function(const string &kernel_name,
  111. const DeviceRequestedFeatures &) = 0;
  112. virtual int2 split_kernel_local_size() = 0;
  113. virtual int2 split_kernel_global_size(device_memory &kg,
  114. device_memory &data,
  115. DeviceTask *task) = 0;
  116. };
  117. CCL_NAMESPACE_END
  118. #endif /* __DEVICE_SPLIT_KERNEL_H__ */