kernel_cuda_image.h 5.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182
  1. /*
  2. * Copyright 2017 Blender Foundation
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. /* w0, w1, w2, and w3 are the four cubic B-spline basis functions. */
  17. ccl_device float cubic_w0(float a)
  18. {
  19. return (1.0f / 6.0f) * (a * (a * (-a + 3.0f) - 3.0f) + 1.0f);
  20. }
  21. ccl_device float cubic_w1(float a)
  22. {
  23. return (1.0f / 6.0f) * (a * a * (3.0f * a - 6.0f) + 4.0f);
  24. }
  25. ccl_device float cubic_w2(float a)
  26. {
  27. return (1.0f / 6.0f) * (a * (a * (-3.0f * a + 3.0f) + 3.0f) + 1.0f);
  28. }
  29. ccl_device float cubic_w3(float a)
  30. {
  31. return (1.0f / 6.0f) * (a * a * a);
  32. }
  33. /* g0 and g1 are the two amplitude functions. */
  34. ccl_device float cubic_g0(float a)
  35. {
  36. return cubic_w0(a) + cubic_w1(a);
  37. }
  38. ccl_device float cubic_g1(float a)
  39. {
  40. return cubic_w2(a) + cubic_w3(a);
  41. }
  42. /* h0 and h1 are the two offset functions */
  43. ccl_device float cubic_h0(float a)
  44. {
  45. /* Note +0.5 offset to compensate for CUDA linear filtering convention. */
  46. return -1.0f + cubic_w1(a) / (cubic_w0(a) + cubic_w1(a)) + 0.5f;
  47. }
  48. ccl_device float cubic_h1(float a)
  49. {
  50. return 1.0f + cubic_w3(a) / (cubic_w2(a) + cubic_w3(a)) + 0.5f;
  51. }
  52. /* Fast bicubic texture lookup using 4 bilinear lookups, adapted from CUDA samples. */
  53. template<typename T>
  54. ccl_device T
  55. kernel_tex_image_interp_bicubic(const TextureInfo &info, CUtexObject tex, float x, float y)
  56. {
  57. x = (x * info.width) - 0.5f;
  58. y = (y * info.height) - 0.5f;
  59. float px = floor(x);
  60. float py = floor(y);
  61. float fx = x - px;
  62. float fy = y - py;
  63. float g0x = cubic_g0(fx);
  64. float g1x = cubic_g1(fx);
  65. float x0 = (px + cubic_h0(fx)) / info.width;
  66. float x1 = (px + cubic_h1(fx)) / info.width;
  67. float y0 = (py + cubic_h0(fy)) / info.height;
  68. float y1 = (py + cubic_h1(fy)) / info.height;
  69. return cubic_g0(fy) * (g0x * tex2D<T>(tex, x0, y0) + g1x * tex2D<T>(tex, x1, y0)) +
  70. cubic_g1(fy) * (g0x * tex2D<T>(tex, x0, y1) + g1x * tex2D<T>(tex, x1, y1));
  71. }
  72. /* Fast tricubic texture lookup using 8 trilinear lookups. */
  73. template<typename T>
  74. ccl_device T kernel_tex_image_interp_bicubic_3d(
  75. const TextureInfo &info, CUtexObject tex, float x, float y, float z)
  76. {
  77. x = (x * info.width) - 0.5f;
  78. y = (y * info.height) - 0.5f;
  79. z = (z * info.depth) - 0.5f;
  80. float px = floor(x);
  81. float py = floor(y);
  82. float pz = floor(z);
  83. float fx = x - px;
  84. float fy = y - py;
  85. float fz = z - pz;
  86. float g0x = cubic_g0(fx);
  87. float g1x = cubic_g1(fx);
  88. float g0y = cubic_g0(fy);
  89. float g1y = cubic_g1(fy);
  90. float g0z = cubic_g0(fz);
  91. float g1z = cubic_g1(fz);
  92. float x0 = (px + cubic_h0(fx)) / info.width;
  93. float x1 = (px + cubic_h1(fx)) / info.width;
  94. float y0 = (py + cubic_h0(fy)) / info.height;
  95. float y1 = (py + cubic_h1(fy)) / info.height;
  96. float z0 = (pz + cubic_h0(fz)) / info.depth;
  97. float z1 = (pz + cubic_h1(fz)) / info.depth;
  98. return g0z * (g0y * (g0x * tex3D<T>(tex, x0, y0, z0) + g1x * tex3D<T>(tex, x1, y0, z0)) +
  99. g1y * (g0x * tex3D<T>(tex, x0, y1, z0) + g1x * tex3D<T>(tex, x1, y1, z0))) +
  100. g1z * (g0y * (g0x * tex3D<T>(tex, x0, y0, z1) + g1x * tex3D<T>(tex, x1, y0, z1)) +
  101. g1y * (g0x * tex3D<T>(tex, x0, y1, z1) + g1x * tex3D<T>(tex, x1, y1, z1)));
  102. }
  103. ccl_device float4 kernel_tex_image_interp(KernelGlobals *kg, int id, float x, float y)
  104. {
  105. const TextureInfo &info = kernel_tex_fetch(__texture_info, id);
  106. CUtexObject tex = (CUtexObject)info.data;
  107. /* float4, byte4, ushort4 and half4 */
  108. const int texture_type = kernel_tex_type(id);
  109. if (texture_type == IMAGE_DATA_TYPE_FLOAT4 || texture_type == IMAGE_DATA_TYPE_BYTE4 ||
  110. texture_type == IMAGE_DATA_TYPE_HALF4 || texture_type == IMAGE_DATA_TYPE_USHORT4) {
  111. if (info.interpolation == INTERPOLATION_CUBIC) {
  112. return kernel_tex_image_interp_bicubic<float4>(info, tex, x, y);
  113. }
  114. else {
  115. return tex2D<float4>(tex, x, y);
  116. }
  117. }
  118. /* float, byte and half */
  119. else {
  120. float f;
  121. if (info.interpolation == INTERPOLATION_CUBIC) {
  122. f = kernel_tex_image_interp_bicubic<float>(info, tex, x, y);
  123. }
  124. else {
  125. f = tex2D<float>(tex, x, y);
  126. }
  127. return make_float4(f, f, f, 1.0f);
  128. }
  129. }
  130. ccl_device float4 kernel_tex_image_interp_3d(
  131. KernelGlobals *kg, int id, float x, float y, float z, InterpolationType interp)
  132. {
  133. const TextureInfo &info = kernel_tex_fetch(__texture_info, id);
  134. CUtexObject tex = (CUtexObject)info.data;
  135. uint interpolation = (interp == INTERPOLATION_NONE) ? info.interpolation : interp;
  136. const int texture_type = kernel_tex_type(id);
  137. if (texture_type == IMAGE_DATA_TYPE_FLOAT4 || texture_type == IMAGE_DATA_TYPE_BYTE4 ||
  138. texture_type == IMAGE_DATA_TYPE_HALF4 || texture_type == IMAGE_DATA_TYPE_USHORT4) {
  139. if (interpolation == INTERPOLATION_CUBIC) {
  140. return kernel_tex_image_interp_bicubic_3d<float4>(info, tex, x, y, z);
  141. }
  142. else {
  143. return tex3D<float4>(tex, x, y, z);
  144. }
  145. }
  146. else {
  147. float f;
  148. if (interpolation == INTERPOLATION_CUBIC) {
  149. f = kernel_tex_image_interp_bicubic_3d<float>(info, tex, x, y, z);
  150. }
  151. else {
  152. f = tex3D<float>(tex, x, y, z);
  153. }
  154. return make_float4(f, f, f, 1.0f);
  155. }
  156. }