kernel_compat_cuda.h 3.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169
  1. /*
  2. * Copyright 2011-2013 Blender Foundation
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #ifndef __KERNEL_COMPAT_CUDA_H__
  17. #define __KERNEL_COMPAT_CUDA_H__
  18. #define __KERNEL_GPU__
  19. #define __KERNEL_CUDA__
  20. #define CCL_NAMESPACE_BEGIN
  21. #define CCL_NAMESPACE_END
  22. /* Selective nodes compilation. */
  23. #ifndef __NODES_MAX_GROUP__
  24. # define __NODES_MAX_GROUP__ NODE_GROUP_LEVEL_MAX
  25. #endif
  26. #ifndef __NODES_FEATURES__
  27. # define __NODES_FEATURES__ NODE_FEATURE_ALL
  28. #endif
  29. /* Manual definitions so we can compile without CUDA toolkit. */
  30. typedef unsigned int uint32_t;
  31. typedef unsigned long long uint64_t;
  32. typedef unsigned short half;
  33. typedef unsigned long long CUtexObject;
  34. #define FLT_MIN 1.175494350822287507969e-38f
  35. #define FLT_MAX 340282346638528859811704183484516925440.0f
  36. __device__ half __float2half(const float f)
  37. {
  38. half val;
  39. asm("{ cvt.rn.f16.f32 %0, %1;}\n" : "=h"(val) : "f"(f));
  40. return val;
  41. }
  42. /* Qualifier wrappers for different names on different devices */
  43. #define ccl_device __device__ __inline__
  44. #if __CUDA_ARCH__ < 500
  45. # define ccl_device_inline __device__ __forceinline__
  46. # define ccl_device_forceinline __device__ __forceinline__
  47. #else
  48. # define ccl_device_inline __device__ __inline__
  49. # define ccl_device_forceinline __device__ __forceinline__
  50. #endif
  51. #define ccl_device_noinline __device__ __noinline__
  52. #define ccl_global
  53. #define ccl_static_constant __constant__
  54. #define ccl_constant const
  55. #define ccl_local __shared__
  56. #define ccl_local_param
  57. #define ccl_private
  58. #define ccl_may_alias
  59. #define ccl_addr_space
  60. #define ccl_restrict __restrict__
  61. /* TODO(sergey): In theory we might use references with CUDA, however
  62. * performance impact yet to be investigated.
  63. */
  64. #define ccl_ref
  65. #define ccl_align(n) __align__(n)
  66. #define ATTR_FALLTHROUGH
  67. #define CCL_MAX_LOCAL_SIZE (CUDA_THREADS_BLOCK_WIDTH * CUDA_THREADS_BLOCK_WIDTH)
  68. /* No assert supported for CUDA */
  69. #define kernel_assert(cond)
  70. /* Types */
  71. #include "util/util_half.h"
  72. #include "util/util_types.h"
  73. /* Work item functions */
  74. ccl_device_inline uint ccl_local_id(uint d)
  75. {
  76. switch (d) {
  77. case 0:
  78. return threadIdx.x;
  79. case 1:
  80. return threadIdx.y;
  81. case 2:
  82. return threadIdx.z;
  83. default:
  84. return 0;
  85. }
  86. }
  87. #define ccl_global_id(d) (ccl_group_id(d) * ccl_local_size(d) + ccl_local_id(d))
  88. ccl_device_inline uint ccl_local_size(uint d)
  89. {
  90. switch (d) {
  91. case 0:
  92. return blockDim.x;
  93. case 1:
  94. return blockDim.y;
  95. case 2:
  96. return blockDim.z;
  97. default:
  98. return 0;
  99. }
  100. }
  101. #define ccl_global_size(d) (ccl_num_groups(d) * ccl_local_size(d))
  102. ccl_device_inline uint ccl_group_id(uint d)
  103. {
  104. switch (d) {
  105. case 0:
  106. return blockIdx.x;
  107. case 1:
  108. return blockIdx.y;
  109. case 2:
  110. return blockIdx.z;
  111. default:
  112. return 0;
  113. }
  114. }
  115. ccl_device_inline uint ccl_num_groups(uint d)
  116. {
  117. switch (d) {
  118. case 0:
  119. return gridDim.x;
  120. case 1:
  121. return gridDim.y;
  122. case 2:
  123. return gridDim.z;
  124. default:
  125. return 0;
  126. }
  127. }
  128. /* Textures */
  129. /* Use arrays for regular data. */
  130. #define kernel_tex_fetch(t, index) t[(index)]
  131. #define kernel_tex_array(t) (t)
  132. #define kernel_data __data
  133. /* Use fast math functions */
  134. #define cosf(x) __cosf(((float)(x)))
  135. #define sinf(x) __sinf(((float)(x)))
  136. #define powf(x, y) __powf(((float)(x)), ((float)(y)))
  137. #define tanf(x) __tanf(((float)(x)))
  138. #define logf(x) __logf(((float)(x)))
  139. #define expf(x) __expf(((float)(x)))
  140. #endif /* __KERNEL_COMPAT_CUDA_H__ */