kernel_compat_cpu.h 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157
  1. /*
  2. * Copyright 2011-2013 Blender Foundation
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #ifndef __KERNEL_COMPAT_CPU_H__
  17. #define __KERNEL_COMPAT_CPU_H__
  18. #define __KERNEL_CPU__
  19. /* Release kernel has too much false-positive maybe-uninitialized warnings,
  20. * which makes it possible to miss actual warnings.
  21. */
  22. #if (defined(__GNUC__) && !defined(__clang__)) && defined(NDEBUG)
  23. # pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
  24. # pragma GCC diagnostic ignored "-Wuninitialized"
  25. #endif
  26. /* Selective nodes compilation. */
  27. #ifndef __NODES_MAX_GROUP__
  28. # define __NODES_MAX_GROUP__ NODE_GROUP_LEVEL_MAX
  29. #endif
  30. #ifndef __NODES_FEATURES__
  31. # define __NODES_FEATURES__ NODE_FEATURE_ALL
  32. #endif
  33. #include "util/util_math.h"
  34. #include "util/util_simd.h"
  35. #include "util/util_half.h"
  36. #include "util/util_types.h"
  37. #include "util/util_texture.h"
  38. #define ccl_addr_space
  39. #define ccl_local_id(d) 0
  40. #define ccl_global_id(d) (kg->global_id[d])
  41. #define ccl_local_size(d) 1
  42. #define ccl_global_size(d) (kg->global_size[d])
  43. #define ccl_group_id(d) ccl_global_id(d)
  44. #define ccl_num_groups(d) ccl_global_size(d)
  45. /* On x86_64, versions of glibc < 2.16 have an issue where expf is
  46. * much slower than the double version. This was fixed in glibc 2.16.
  47. */
  48. #if !defined(__KERNEL_GPU__) && defined(__x86_64__) && defined(__x86_64__) && \
  49. defined(__GNU_LIBRARY__) && defined(__GLIBC__) && defined(__GLIBC_MINOR__) && \
  50. (__GLIBC__ <= 2 && __GLIBC_MINOR__ < 16)
  51. # define expf(x) ((float)exp((double)(x)))
  52. #endif
  53. CCL_NAMESPACE_BEGIN
  54. /* Assertions inside the kernel only work for the CPU device, so we wrap it in
  55. * a macro which is empty for other devices */
  56. #define kernel_assert(cond) assert(cond)
  57. /* Texture types to be compatible with CUDA textures. These are really just
  58. * simple arrays and after inlining fetch hopefully revert to being a simple
  59. * pointer lookup. */
  60. template<typename T> struct texture {
  61. ccl_always_inline const T &fetch(int index)
  62. {
  63. kernel_assert(index >= 0 && index < width);
  64. return data[index];
  65. }
  66. #if defined(__KERNEL_AVX__) || defined(__KERNEL_AVX2__)
  67. /* Reads 256 bytes but indexes in blocks of 128 bytes to maintain
  68. * compatibility with existing indices and data structures.
  69. */
  70. ccl_always_inline avxf fetch_avxf(const int index)
  71. {
  72. kernel_assert(index >= 0 && (index + 1) < width);
  73. ssef *ssef_data = (ssef *)data;
  74. ssef *ssef_node_data = &ssef_data[index];
  75. return _mm256_loadu_ps((float *)ssef_node_data);
  76. }
  77. #endif
  78. #ifdef __KERNEL_SSE2__
  79. ccl_always_inline ssef fetch_ssef(int index)
  80. {
  81. kernel_assert(index >= 0 && index < width);
  82. return ((ssef *)data)[index];
  83. }
  84. ccl_always_inline ssei fetch_ssei(int index)
  85. {
  86. kernel_assert(index >= 0 && index < width);
  87. return ((ssei *)data)[index];
  88. }
  89. #endif
  90. T *data;
  91. int width;
  92. };
  93. /* Macros to handle different memory storage on different devices */
  94. #define kernel_tex_fetch(tex, index) (kg->tex.fetch(index))
  95. #define kernel_tex_fetch_avxf(tex, index) (kg->tex.fetch_avxf(index))
  96. #define kernel_tex_fetch_ssef(tex, index) (kg->tex.fetch_ssef(index))
  97. #define kernel_tex_fetch_ssei(tex, index) (kg->tex.fetch_ssei(index))
  98. #define kernel_tex_lookup(tex, t, offset, size) (kg->tex.lookup(t, offset, size))
  99. #define kernel_tex_array(tex) (kg->tex.data)
  100. #define kernel_data (kg->__data)
  101. #ifdef __KERNEL_SSE2__
  102. typedef vector3<sseb> sse3b;
  103. typedef vector3<ssef> sse3f;
  104. typedef vector3<ssei> sse3i;
  105. ccl_device_inline void print_sse3b(const char *label, sse3b &a)
  106. {
  107. print_sseb(label, a.x);
  108. print_sseb(label, a.y);
  109. print_sseb(label, a.z);
  110. }
  111. ccl_device_inline void print_sse3f(const char *label, sse3f &a)
  112. {
  113. print_ssef(label, a.x);
  114. print_ssef(label, a.y);
  115. print_ssef(label, a.z);
  116. }
  117. ccl_device_inline void print_sse3i(const char *label, sse3i &a)
  118. {
  119. print_ssei(label, a.x);
  120. print_ssei(label, a.y);
  121. print_ssei(label, a.z);
  122. }
  123. # if defined(__KERNEL_AVX__) || defined(__KERNEL_AVX2__)
  124. typedef vector3<avxf> avx3f;
  125. # endif
  126. #endif
  127. CCL_NAMESPACE_END
  128. #endif /* __KERNEL_COMPAT_CPU_H__ */