kernel_queues.h 5.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148
  1. /*
  2. * Copyright 2011-2015 Blender Foundation
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #ifndef __KERNEL_QUEUE_H__
  17. #define __KERNEL_QUEUE_H__
  18. CCL_NAMESPACE_BEGIN
  19. /*
  20. * Queue utility functions for split kernel
  21. */
  22. #ifdef __KERNEL_OPENCL__
  23. # pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable
  24. # pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable
  25. #endif
  26. /*
  27. * Enqueue ray index into the queue
  28. */
  29. ccl_device void enqueue_ray_index(
  30. int ray_index, /* Ray index to be enqueued. */
  31. int queue_number, /* Queue in which the ray index should be enqueued. */
  32. ccl_global int *queues, /* Buffer of all queues. */
  33. int queue_size, /* Size of each queue. */
  34. ccl_global int *queue_index) /* Array of size num_queues; Used for atomic increment. */
  35. {
  36. /* This thread's queue index. */
  37. int my_queue_index = atomic_fetch_and_inc_uint32((ccl_global uint *)&queue_index[queue_number]) +
  38. (queue_number * queue_size);
  39. queues[my_queue_index] = ray_index;
  40. }
  41. /*
  42. * Get the ray index for this thread
  43. * Returns a positive ray_index for threads that have to do some work;
  44. * Returns 'QUEUE_EMPTY_SLOT' for threads that don't have any work
  45. * i.e All ray's in the queue has been successfully allocated and there
  46. * is no more ray to allocate to other threads.
  47. */
  48. ccl_device int get_ray_index(
  49. KernelGlobals *kg,
  50. int thread_index, /* Global thread index. */
  51. int queue_number, /* Queue to operate on. */
  52. ccl_global int *queues, /* Buffer of all queues. */
  53. int queuesize, /* Size of a queue. */
  54. int empty_queue) /* Empty the queue slot as soon as we fetch the ray index. */
  55. {
  56. int ray_index = queues[queue_number * queuesize + thread_index];
  57. if (empty_queue && ray_index != QUEUE_EMPTY_SLOT) {
  58. queues[queue_number * queuesize + thread_index] = QUEUE_EMPTY_SLOT;
  59. }
  60. return ray_index;
  61. }
  62. /* The following functions are to realize Local memory variant of enqueue ray index function. */
  63. /* All threads should call this function. */
  64. ccl_device void enqueue_ray_index_local(
  65. int ray_index, /* Ray index to enqueue. */
  66. int queue_number, /* Queue in which to enqueue ray index. */
  67. char enqueue_flag, /* True for threads whose ray index has to be enqueued. */
  68. int queuesize, /* queue size. */
  69. ccl_local_param unsigned int *local_queue_atomics, /* To to local queue atomics. */
  70. ccl_global int *Queue_data, /* Queues. */
  71. ccl_global int *Queue_index) /* To do global queue atomics. */
  72. {
  73. int lidx = ccl_local_id(1) * ccl_local_size(0) + ccl_local_id(0);
  74. /* Get local queue id .*/
  75. unsigned int lqidx;
  76. if (enqueue_flag) {
  77. lqidx = atomic_fetch_and_inc_uint32(local_queue_atomics);
  78. }
  79. ccl_barrier(CCL_LOCAL_MEM_FENCE);
  80. /* Get global queue offset. */
  81. if (lidx == 0) {
  82. *local_queue_atomics = atomic_fetch_and_add_uint32(
  83. (ccl_global uint *)&Queue_index[queue_number], *local_queue_atomics);
  84. }
  85. ccl_barrier(CCL_LOCAL_MEM_FENCE);
  86. /* Get global queue index and enqueue ray. */
  87. if (enqueue_flag) {
  88. unsigned int my_gqidx = queue_number * queuesize + (*local_queue_atomics) + lqidx;
  89. Queue_data[my_gqidx] = ray_index;
  90. }
  91. }
  92. ccl_device unsigned int get_local_queue_index(
  93. int queue_number, /* Queue in which to enqueue the ray; -1 if no queue */
  94. ccl_local_param unsigned int *local_queue_atomics)
  95. {
  96. int my_lqidx = atomic_fetch_and_inc_uint32(&local_queue_atomics[queue_number]);
  97. return my_lqidx;
  98. }
  99. ccl_device unsigned int get_global_per_queue_offset(
  100. int queue_number,
  101. ccl_local_param unsigned int *local_queue_atomics,
  102. ccl_global int *global_queue_atomics)
  103. {
  104. unsigned int queue_offset = atomic_fetch_and_add_uint32(
  105. (ccl_global uint *)&global_queue_atomics[queue_number], local_queue_atomics[queue_number]);
  106. return queue_offset;
  107. }
  108. ccl_device unsigned int get_global_queue_index(
  109. int queue_number,
  110. int queuesize,
  111. unsigned int lqidx,
  112. ccl_local_param unsigned int *global_per_queue_offset)
  113. {
  114. int my_gqidx = queuesize * queue_number + lqidx + global_per_queue_offset[queue_number];
  115. return my_gqidx;
  116. }
  117. ccl_device int dequeue_ray_index(int queue_number,
  118. ccl_global int *queues,
  119. int queue_size,
  120. ccl_global int *queue_index)
  121. {
  122. int index = atomic_fetch_and_dec_uint32((ccl_global uint *)&queue_index[queue_number]) - 1;
  123. if (index < 0) {
  124. return QUEUE_EMPTY_SLOT;
  125. }
  126. return queues[index + queue_number * queue_size];
  127. }
  128. CCL_NAMESPACE_END
  129. #endif // __KERNEL_QUEUE_H__