obvh_nodes.h 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411
  1. /*
  2. * Copyright 2011-2014, Blender Foundation.
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. *
  16. * Aligned nodes intersection AVX code is adopted from Embree,
  17. */
  18. struct OBVHStackItem {
  19. int addr;
  20. float dist;
  21. };
  22. ccl_device_inline void obvh_near_far_idx_calc(const float3 &idir,
  23. int *ccl_restrict near_x,
  24. int *ccl_restrict near_y,
  25. int *ccl_restrict near_z,
  26. int *ccl_restrict far_x,
  27. int *ccl_restrict far_y,
  28. int *ccl_restrict far_z)
  29. {
  30. #ifdef __KERNEL_SSE__
  31. *near_x = 0;
  32. *far_x = 1;
  33. *near_y = 2;
  34. *far_y = 3;
  35. *near_z = 4;
  36. *far_z = 5;
  37. const size_t mask = movemask(ssef(idir.m128));
  38. const int mask_x = mask & 1;
  39. const int mask_y = (mask & 2) >> 1;
  40. const int mask_z = (mask & 4) >> 2;
  41. *near_x += mask_x;
  42. *far_x -= mask_x;
  43. *near_y += mask_y;
  44. *far_y -= mask_y;
  45. *near_z += mask_z;
  46. *far_z -= mask_z;
  47. #else
  48. if (idir.x >= 0.0f) {
  49. *near_x = 0;
  50. *far_x = 1;
  51. }
  52. else {
  53. *near_x = 1;
  54. *far_x = 0;
  55. }
  56. if (idir.y >= 0.0f) {
  57. *near_y = 2;
  58. *far_y = 3;
  59. }
  60. else {
  61. *near_y = 3;
  62. *far_y = 2;
  63. }
  64. if (idir.z >= 0.0f) {
  65. *near_z = 4;
  66. *far_z = 5;
  67. }
  68. else {
  69. *near_z = 5;
  70. *far_z = 4;
  71. }
  72. #endif
  73. }
  74. ccl_device_inline void obvh_item_swap(OBVHStackItem *ccl_restrict a, OBVHStackItem *ccl_restrict b)
  75. {
  76. OBVHStackItem tmp = *a;
  77. *a = *b;
  78. *b = tmp;
  79. }
  80. ccl_device_inline void obvh_stack_sort(OBVHStackItem *ccl_restrict s1,
  81. OBVHStackItem *ccl_restrict s2,
  82. OBVHStackItem *ccl_restrict s3)
  83. {
  84. if (s2->dist < s1->dist) {
  85. obvh_item_swap(s2, s1);
  86. }
  87. if (s3->dist < s2->dist) {
  88. obvh_item_swap(s3, s2);
  89. }
  90. if (s2->dist < s1->dist) {
  91. obvh_item_swap(s2, s1);
  92. }
  93. }
  94. ccl_device_inline void obvh_stack_sort(OBVHStackItem *ccl_restrict s1,
  95. OBVHStackItem *ccl_restrict s2,
  96. OBVHStackItem *ccl_restrict s3,
  97. OBVHStackItem *ccl_restrict s4)
  98. {
  99. if (s2->dist < s1->dist) {
  100. obvh_item_swap(s2, s1);
  101. }
  102. if (s4->dist < s3->dist) {
  103. obvh_item_swap(s4, s3);
  104. }
  105. if (s3->dist < s1->dist) {
  106. obvh_item_swap(s3, s1);
  107. }
  108. if (s4->dist < s2->dist) {
  109. obvh_item_swap(s4, s2);
  110. }
  111. if (s3->dist < s2->dist) {
  112. obvh_item_swap(s3, s2);
  113. }
  114. }
  115. ccl_device_inline void obvh_stack_sort(OBVHStackItem *ccl_restrict s1,
  116. OBVHStackItem *ccl_restrict s2,
  117. OBVHStackItem *ccl_restrict s3,
  118. OBVHStackItem *ccl_restrict s4,
  119. OBVHStackItem *ccl_restrict s5)
  120. {
  121. obvh_stack_sort(s1, s2, s3, s4);
  122. if (s5->dist < s4->dist) {
  123. obvh_item_swap(s4, s5);
  124. if (s4->dist < s3->dist) {
  125. obvh_item_swap(s3, s4);
  126. if (s3->dist < s2->dist) {
  127. obvh_item_swap(s2, s3);
  128. if (s2->dist < s1->dist) {
  129. obvh_item_swap(s1, s2);
  130. }
  131. }
  132. }
  133. }
  134. }
  135. ccl_device_inline void obvh_stack_sort(OBVHStackItem *ccl_restrict s1,
  136. OBVHStackItem *ccl_restrict s2,
  137. OBVHStackItem *ccl_restrict s3,
  138. OBVHStackItem *ccl_restrict s4,
  139. OBVHStackItem *ccl_restrict s5,
  140. OBVHStackItem *ccl_restrict s6)
  141. {
  142. obvh_stack_sort(s1, s2, s3, s4, s5);
  143. if (s6->dist < s5->dist) {
  144. obvh_item_swap(s5, s6);
  145. if (s5->dist < s4->dist) {
  146. obvh_item_swap(s4, s5);
  147. if (s4->dist < s3->dist) {
  148. obvh_item_swap(s3, s4);
  149. if (s3->dist < s2->dist) {
  150. obvh_item_swap(s2, s3);
  151. if (s2->dist < s1->dist) {
  152. obvh_item_swap(s1, s2);
  153. }
  154. }
  155. }
  156. }
  157. }
  158. }
  159. ccl_device_inline void obvh_stack_sort(OBVHStackItem *ccl_restrict s1,
  160. OBVHStackItem *ccl_restrict s2,
  161. OBVHStackItem *ccl_restrict s3,
  162. OBVHStackItem *ccl_restrict s4,
  163. OBVHStackItem *ccl_restrict s5,
  164. OBVHStackItem *ccl_restrict s6,
  165. OBVHStackItem *ccl_restrict s7)
  166. {
  167. obvh_stack_sort(s1, s2, s3, s4, s5, s6);
  168. if (s7->dist < s6->dist) {
  169. obvh_item_swap(s6, s7);
  170. if (s6->dist < s5->dist) {
  171. obvh_item_swap(s5, s6);
  172. if (s5->dist < s4->dist) {
  173. obvh_item_swap(s4, s5);
  174. if (s4->dist < s3->dist) {
  175. obvh_item_swap(s3, s4);
  176. if (s3->dist < s2->dist) {
  177. obvh_item_swap(s2, s3);
  178. if (s2->dist < s1->dist) {
  179. obvh_item_swap(s1, s2);
  180. }
  181. }
  182. }
  183. }
  184. }
  185. }
  186. }
  187. ccl_device_inline void obvh_stack_sort(OBVHStackItem *ccl_restrict s1,
  188. OBVHStackItem *ccl_restrict s2,
  189. OBVHStackItem *ccl_restrict s3,
  190. OBVHStackItem *ccl_restrict s4,
  191. OBVHStackItem *ccl_restrict s5,
  192. OBVHStackItem *ccl_restrict s6,
  193. OBVHStackItem *ccl_restrict s7,
  194. OBVHStackItem *ccl_restrict s8)
  195. {
  196. obvh_stack_sort(s1, s2, s3, s4, s5, s6, s7);
  197. if (s8->dist < s7->dist) {
  198. obvh_item_swap(s7, s8);
  199. if (s7->dist < s6->dist) {
  200. obvh_item_swap(s6, s7);
  201. if (s6->dist < s5->dist) {
  202. obvh_item_swap(s5, s6);
  203. if (s5->dist < s4->dist) {
  204. obvh_item_swap(s4, s5);
  205. if (s4->dist < s3->dist) {
  206. obvh_item_swap(s3, s4);
  207. if (s3->dist < s2->dist) {
  208. obvh_item_swap(s2, s3);
  209. if (s2->dist < s1->dist) {
  210. obvh_item_swap(s1, s2);
  211. }
  212. }
  213. }
  214. }
  215. }
  216. }
  217. }
  218. }
  219. /* Axis-aligned nodes intersection */
  220. ccl_device_inline int obvh_aligned_node_intersect(KernelGlobals *ccl_restrict kg,
  221. const avxf &isect_near,
  222. const avxf &isect_far,
  223. #ifdef __KERNEL_AVX2__
  224. const avx3f &org_idir,
  225. #else
  226. const avx3f &org,
  227. #endif
  228. const avx3f &idir,
  229. const int near_x,
  230. const int near_y,
  231. const int near_z,
  232. const int far_x,
  233. const int far_y,
  234. const int far_z,
  235. const int node_addr,
  236. avxf *ccl_restrict dist)
  237. {
  238. const int offset = node_addr + 2;
  239. #ifdef __KERNEL_AVX2__
  240. const avxf tnear_x = msub(
  241. kernel_tex_fetch_avxf(__bvh_nodes, offset + near_x * 2), idir.x, org_idir.x);
  242. const avxf tnear_y = msub(
  243. kernel_tex_fetch_avxf(__bvh_nodes, offset + near_y * 2), idir.y, org_idir.y);
  244. const avxf tnear_z = msub(
  245. kernel_tex_fetch_avxf(__bvh_nodes, offset + near_z * 2), idir.z, org_idir.z);
  246. const avxf tfar_x = msub(
  247. kernel_tex_fetch_avxf(__bvh_nodes, offset + far_x * 2), idir.x, org_idir.x);
  248. const avxf tfar_y = msub(
  249. kernel_tex_fetch_avxf(__bvh_nodes, offset + far_y * 2), idir.y, org_idir.y);
  250. const avxf tfar_z = msub(
  251. kernel_tex_fetch_avxf(__bvh_nodes, offset + far_z * 2), idir.z, org_idir.z);
  252. const avxf tnear = max4(tnear_x, tnear_y, tnear_z, isect_near);
  253. const avxf tfar = min4(tfar_x, tfar_y, tfar_z, isect_far);
  254. const avxb vmask = tnear <= tfar;
  255. int mask = (int)movemask(vmask);
  256. *dist = tnear;
  257. return mask;
  258. #else
  259. return 0;
  260. #endif
  261. }
  262. /* Unaligned nodes intersection */
  263. ccl_device_inline int obvh_unaligned_node_intersect(KernelGlobals *ccl_restrict kg,
  264. const avxf &isect_near,
  265. const avxf &isect_far,
  266. #ifdef __KERNEL_AVX2__
  267. const avx3f &org_idir,
  268. #endif
  269. const avx3f &org,
  270. const avx3f &dir,
  271. const avx3f &idir,
  272. const int near_x,
  273. const int near_y,
  274. const int near_z,
  275. const int far_x,
  276. const int far_y,
  277. const int far_z,
  278. const int node_addr,
  279. avxf *ccl_restrict dist)
  280. {
  281. const int offset = node_addr;
  282. const avxf tfm_x_x = kernel_tex_fetch_avxf(__bvh_nodes, offset + 2);
  283. const avxf tfm_x_y = kernel_tex_fetch_avxf(__bvh_nodes, offset + 4);
  284. const avxf tfm_x_z = kernel_tex_fetch_avxf(__bvh_nodes, offset + 6);
  285. const avxf tfm_y_x = kernel_tex_fetch_avxf(__bvh_nodes, offset + 8);
  286. const avxf tfm_y_y = kernel_tex_fetch_avxf(__bvh_nodes, offset + 10);
  287. const avxf tfm_y_z = kernel_tex_fetch_avxf(__bvh_nodes, offset + 12);
  288. const avxf tfm_z_x = kernel_tex_fetch_avxf(__bvh_nodes, offset + 14);
  289. const avxf tfm_z_y = kernel_tex_fetch_avxf(__bvh_nodes, offset + 16);
  290. const avxf tfm_z_z = kernel_tex_fetch_avxf(__bvh_nodes, offset + 18);
  291. const avxf tfm_t_x = kernel_tex_fetch_avxf(__bvh_nodes, offset + 20);
  292. const avxf tfm_t_y = kernel_tex_fetch_avxf(__bvh_nodes, offset + 22);
  293. const avxf tfm_t_z = kernel_tex_fetch_avxf(__bvh_nodes, offset + 24);
  294. const avxf aligned_dir_x = dir.x * tfm_x_x + dir.y * tfm_x_y + dir.z * tfm_x_z,
  295. aligned_dir_y = dir.x * tfm_y_x + dir.y * tfm_y_y + dir.z * tfm_y_z,
  296. aligned_dir_z = dir.x * tfm_z_x + dir.y * tfm_z_y + dir.z * tfm_z_z;
  297. const avxf aligned_P_x = org.x * tfm_x_x + org.y * tfm_x_y + org.z * tfm_x_z + tfm_t_x,
  298. aligned_P_y = org.x * tfm_y_x + org.y * tfm_y_y + org.z * tfm_y_z + tfm_t_y,
  299. aligned_P_z = org.x * tfm_z_x + org.y * tfm_z_y + org.z * tfm_z_z + tfm_t_z;
  300. const avxf neg_one(-1.0f);
  301. const avxf nrdir_x = neg_one / aligned_dir_x, nrdir_y = neg_one / aligned_dir_y,
  302. nrdir_z = neg_one / aligned_dir_z;
  303. const avxf tlower_x = aligned_P_x * nrdir_x, tlower_y = aligned_P_y * nrdir_y,
  304. tlower_z = aligned_P_z * nrdir_z;
  305. const avxf tupper_x = tlower_x - nrdir_x, tupper_y = tlower_y - nrdir_y,
  306. tupper_z = tlower_z - nrdir_z;
  307. const avxf tnear_x = min(tlower_x, tupper_x);
  308. const avxf tnear_y = min(tlower_y, tupper_y);
  309. const avxf tnear_z = min(tlower_z, tupper_z);
  310. const avxf tfar_x = max(tlower_x, tupper_x);
  311. const avxf tfar_y = max(tlower_y, tupper_y);
  312. const avxf tfar_z = max(tlower_z, tupper_z);
  313. const avxf tnear = max4(isect_near, tnear_x, tnear_y, tnear_z);
  314. const avxf tfar = min4(isect_far, tfar_x, tfar_y, tfar_z);
  315. const avxb vmask = tnear <= tfar;
  316. *dist = tnear;
  317. return movemask(vmask);
  318. }
  319. /* Intersectors wrappers.
  320. *
  321. * They'll check node type and call appropriate intersection code.
  322. */
  323. ccl_device_inline int obvh_node_intersect(KernelGlobals *ccl_restrict kg,
  324. const avxf &isect_near,
  325. const avxf &isect_far,
  326. #ifdef __KERNEL_AVX2__
  327. const avx3f &org_idir,
  328. #endif
  329. const avx3f &org,
  330. const avx3f &dir,
  331. const avx3f &idir,
  332. const int near_x,
  333. const int near_y,
  334. const int near_z,
  335. const int far_x,
  336. const int far_y,
  337. const int far_z,
  338. const int node_addr,
  339. avxf *ccl_restrict dist)
  340. {
  341. const int offset = node_addr;
  342. const float4 node = kernel_tex_fetch(__bvh_nodes, offset);
  343. if (__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) {
  344. return obvh_unaligned_node_intersect(kg,
  345. isect_near,
  346. isect_far,
  347. #ifdef __KERNEL_AVX2__
  348. org_idir,
  349. #endif
  350. org,
  351. dir,
  352. idir,
  353. near_x,
  354. near_y,
  355. near_z,
  356. far_x,
  357. far_y,
  358. far_z,
  359. node_addr,
  360. dist);
  361. }
  362. else {
  363. return obvh_aligned_node_intersect(kg,
  364. isect_near,
  365. isect_far,
  366. #ifdef __KERNEL_AVX2__
  367. org_idir,
  368. #else
  369. org,
  370. #endif
  371. idir,
  372. near_x,
  373. near_y,
  374. near_z,
  375. far_x,
  376. far_y,
  377. far_z,
  378. node_addr,
  379. dist);
  380. }
  381. }