node_intersector_packet.h 41 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845
  1. // Copyright 2009-2021 Intel Corporation
  2. // SPDX-License-Identifier: Apache-2.0
  3. #pragma once
  4. #include "node_intersector.h"
  5. namespace embree
  6. {
  7. namespace isa
  8. {
  9. //////////////////////////////////////////////////////////////////////////////////////
  10. // Ray packet structure used in hybrid traversal
  11. //////////////////////////////////////////////////////////////////////////////////////
  12. template<int K, bool robust>
  13. struct TravRayK;
  14. /* Fast variant */
  15. template<int K>
  16. struct TravRayK<K, false>
  17. {
  18. __forceinline TravRayK() {}
  19. __forceinline TravRayK(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir, int N)
  20. {
  21. init(ray_org, ray_dir, N);
  22. }
  23. __forceinline TravRayK(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir, const vfloat<K>& ray_tnear, const vfloat<K>& ray_tfar, int N)
  24. {
  25. init(ray_org, ray_dir, N);
  26. tnear = ray_tnear;
  27. tfar = ray_tfar;
  28. }
  29. __forceinline void init(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir, int N)
  30. {
  31. org = ray_org;
  32. dir = ray_dir;
  33. rdir = rcp_safe(ray_dir);
  34. #if defined(__aarch64__)
  35. neg_org_rdir = -(org * rdir);
  36. #elif defined(__AVX2__)
  37. org_rdir = org * rdir;
  38. #endif
  39. if (N)
  40. {
  41. const int size = sizeof(float)*N;
  42. nearXYZ.x = select(rdir.x >= 0.0f, vint<K>(0*size), vint<K>(1*size));
  43. nearXYZ.y = select(rdir.y >= 0.0f, vint<K>(2*size), vint<K>(3*size));
  44. nearXYZ.z = select(rdir.z >= 0.0f, vint<K>(4*size), vint<K>(5*size));
  45. }
  46. }
  47. Vec3vf<K> org;
  48. Vec3vf<K> dir;
  49. Vec3vf<K> rdir;
  50. #if defined(__aarch64__)
  51. Vec3vf<K> neg_org_rdir;
  52. #elif defined(__AVX2__)
  53. Vec3vf<K> org_rdir;
  54. #endif
  55. Vec3vi<K> nearXYZ;
  56. vfloat<K> tnear;
  57. vfloat<K> tfar;
  58. };
  59. template<int K>
  60. using TravRayKFast = TravRayK<K, false>;
  61. /* Robust variant */
  62. template<int K>
  63. struct TravRayK<K, true>
  64. {
  65. __forceinline TravRayK() {}
  66. __forceinline TravRayK(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir, int N)
  67. {
  68. init(ray_org, ray_dir, N);
  69. }
  70. __forceinline TravRayK(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir, const vfloat<K>& ray_tnear, const vfloat<K>& ray_tfar, int N)
  71. {
  72. init(ray_org, ray_dir, N);
  73. tnear = ray_tnear;
  74. tfar = ray_tfar;
  75. }
  76. __forceinline void init(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir, int N)
  77. {
  78. org = ray_org;
  79. dir = ray_dir;
  80. rdir = vfloat<K>(1.0f)/(zero_fix(ray_dir));
  81. if (N)
  82. {
  83. const int size = sizeof(float)*N;
  84. nearXYZ.x = select(rdir.x >= 0.0f, vint<K>(0*size), vint<K>(1*size));
  85. nearXYZ.y = select(rdir.y >= 0.0f, vint<K>(2*size), vint<K>(3*size));
  86. nearXYZ.z = select(rdir.z >= 0.0f, vint<K>(4*size), vint<K>(5*size));
  87. }
  88. }
  89. Vec3vf<K> org;
  90. Vec3vf<K> dir;
  91. Vec3vf<K> rdir;
  92. Vec3vi<K> nearXYZ;
  93. vfloat<K> tnear;
  94. vfloat<K> tfar;
  95. };
  96. template<int K>
  97. using TravRayKRobust = TravRayK<K, true>;
  98. //////////////////////////////////////////////////////////////////////////////////////
  99. // Fast AABBNode intersection
  100. //////////////////////////////////////////////////////////////////////////////////////
  101. template<int N, int K>
  102. __forceinline vbool<K> intersectNodeK(const typename BVHN<N>::AABBNode* node, size_t i,
  103. const TravRayKFast<K>& ray, vfloat<K>& dist)
  104. {
  105. #if defined(__aarch64__)
  106. const vfloat<K> lclipMinX = madd(node->lower_x[i], ray.rdir.x, ray.neg_org_rdir.x);
  107. const vfloat<K> lclipMinY = madd(node->lower_y[i], ray.rdir.y, ray.neg_org_rdir.y);
  108. const vfloat<K> lclipMinZ = madd(node->lower_z[i], ray.rdir.z, ray.neg_org_rdir.z);
  109. const vfloat<K> lclipMaxX = madd(node->upper_x[i], ray.rdir.x, ray.neg_org_rdir.x);
  110. const vfloat<K> lclipMaxY = madd(node->upper_y[i], ray.rdir.y, ray.neg_org_rdir.y);
  111. const vfloat<K> lclipMaxZ = madd(node->upper_z[i], ray.rdir.z, ray.neg_org_rdir.z);
  112. #elif defined(__AVX2__)
  113. const vfloat<K> lclipMinX = msub(node->lower_x[i], ray.rdir.x, ray.org_rdir.x);
  114. const vfloat<K> lclipMinY = msub(node->lower_y[i], ray.rdir.y, ray.org_rdir.y);
  115. const vfloat<K> lclipMinZ = msub(node->lower_z[i], ray.rdir.z, ray.org_rdir.z);
  116. const vfloat<K> lclipMaxX = msub(node->upper_x[i], ray.rdir.x, ray.org_rdir.x);
  117. const vfloat<K> lclipMaxY = msub(node->upper_y[i], ray.rdir.y, ray.org_rdir.y);
  118. const vfloat<K> lclipMaxZ = msub(node->upper_z[i], ray.rdir.z, ray.org_rdir.z);
  119. #else
  120. const vfloat<K> lclipMinX = (node->lower_x[i] - ray.org.x) * ray.rdir.x;
  121. const vfloat<K> lclipMinY = (node->lower_y[i] - ray.org.y) * ray.rdir.y;
  122. const vfloat<K> lclipMinZ = (node->lower_z[i] - ray.org.z) * ray.rdir.z;
  123. const vfloat<K> lclipMaxX = (node->upper_x[i] - ray.org.x) * ray.rdir.x;
  124. const vfloat<K> lclipMaxY = (node->upper_y[i] - ray.org.y) * ray.rdir.y;
  125. const vfloat<K> lclipMaxZ = (node->upper_z[i] - ray.org.z) * ray.rdir.z;
  126. #endif
  127. #if defined(__AVX512F__) // SKX
  128. if (K == 16)
  129. {
  130. /* use mixed float/int min/max */
  131. const vfloat<K> lnearP = maxi(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY), min(lclipMinZ, lclipMaxZ));
  132. const vfloat<K> lfarP = mini(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY), max(lclipMinZ, lclipMaxZ));
  133. const vbool<K> lhit = asInt(maxi(lnearP, ray.tnear)) <= asInt(mini(lfarP, ray.tfar));
  134. dist = lnearP;
  135. return lhit;
  136. }
  137. else
  138. #endif
  139. {
  140. const vfloat<K> lnearP = maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY), mini(lclipMinZ, lclipMaxZ));
  141. const vfloat<K> lfarP = mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY), maxi(lclipMinZ, lclipMaxZ));
  142. #if defined(__AVX512F__) // SKX
  143. const vbool<K> lhit = asInt(maxi(lnearP, ray.tnear)) <= asInt(mini(lfarP, ray.tfar));
  144. #else
  145. const vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar);
  146. #endif
  147. dist = lnearP;
  148. return lhit;
  149. }
  150. }
  151. //////////////////////////////////////////////////////////////////////////////////////
  152. // Robust AABBNode intersection
  153. //////////////////////////////////////////////////////////////////////////////////////
  154. template<int N, int K>
  155. __forceinline vbool<K> intersectNodeKRobust(const typename BVHN<N>::AABBNode* node, size_t i,
  156. const TravRayKRobust<K>& ray, vfloat<K>& dist)
  157. {
  158. // FIXME: use per instruction rounding for AVX512
  159. const vfloat<K> lclipMinX = (node->lower_x[i] - ray.org.x) * ray.rdir.x;
  160. const vfloat<K> lclipMinY = (node->lower_y[i] - ray.org.y) * ray.rdir.y;
  161. const vfloat<K> lclipMinZ = (node->lower_z[i] - ray.org.z) * ray.rdir.z;
  162. const vfloat<K> lclipMaxX = (node->upper_x[i] - ray.org.x) * ray.rdir.x;
  163. const vfloat<K> lclipMaxY = (node->upper_y[i] - ray.org.y) * ray.rdir.y;
  164. const vfloat<K> lclipMaxZ = (node->upper_z[i] - ray.org.z) * ray.rdir.z;
  165. const float round_up = 1.0f+3.0f*float(ulp);
  166. const float round_down = 1.0f-3.0f*float(ulp);
  167. const vfloat<K> lnearP = round_down*max(max(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY)), min(lclipMinZ, lclipMaxZ));
  168. const vfloat<K> lfarP = round_up *min(min(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY)), max(lclipMinZ, lclipMaxZ));
  169. const vbool<K> lhit = max(lnearP, ray.tnear) <= min(lfarP, ray.tfar);
  170. dist = lnearP;
  171. return lhit;
  172. }
  173. //////////////////////////////////////////////////////////////////////////////////////
  174. // Fast AABBNodeMB intersection
  175. //////////////////////////////////////////////////////////////////////////////////////
  176. template<int N, int K>
  177. __forceinline vbool<K> intersectNodeK(const typename BVHN<N>::AABBNodeMB* node, const size_t i,
  178. const TravRayKFast<K>& ray, const vfloat<K>& time, vfloat<K>& dist)
  179. {
  180. const vfloat<K> vlower_x = madd(time, vfloat<K>(node->lower_dx[i]), vfloat<K>(node->lower_x[i]));
  181. const vfloat<K> vlower_y = madd(time, vfloat<K>(node->lower_dy[i]), vfloat<K>(node->lower_y[i]));
  182. const vfloat<K> vlower_z = madd(time, vfloat<K>(node->lower_dz[i]), vfloat<K>(node->lower_z[i]));
  183. const vfloat<K> vupper_x = madd(time, vfloat<K>(node->upper_dx[i]), vfloat<K>(node->upper_x[i]));
  184. const vfloat<K> vupper_y = madd(time, vfloat<K>(node->upper_dy[i]), vfloat<K>(node->upper_y[i]));
  185. const vfloat<K> vupper_z = madd(time, vfloat<K>(node->upper_dz[i]), vfloat<K>(node->upper_z[i]));
  186. #if defined(__aarch64__)
  187. const vfloat<K> lclipMinX = madd(vlower_x, ray.rdir.x, ray.neg_org_rdir.x);
  188. const vfloat<K> lclipMinY = madd(vlower_y, ray.rdir.y, ray.neg_org_rdir.y);
  189. const vfloat<K> lclipMinZ = madd(vlower_z, ray.rdir.z, ray.neg_org_rdir.z);
  190. const vfloat<K> lclipMaxX = madd(vupper_x, ray.rdir.x, ray.neg_org_rdir.x);
  191. const vfloat<K> lclipMaxY = madd(vupper_y, ray.rdir.y, ray.neg_org_rdir.y);
  192. const vfloat<K> lclipMaxZ = madd(vupper_z, ray.rdir.z, ray.neg_org_rdir.z);
  193. #elif defined(__AVX2__)
  194. const vfloat<K> lclipMinX = msub(vlower_x, ray.rdir.x, ray.org_rdir.x);
  195. const vfloat<K> lclipMinY = msub(vlower_y, ray.rdir.y, ray.org_rdir.y);
  196. const vfloat<K> lclipMinZ = msub(vlower_z, ray.rdir.z, ray.org_rdir.z);
  197. const vfloat<K> lclipMaxX = msub(vupper_x, ray.rdir.x, ray.org_rdir.x);
  198. const vfloat<K> lclipMaxY = msub(vupper_y, ray.rdir.y, ray.org_rdir.y);
  199. const vfloat<K> lclipMaxZ = msub(vupper_z, ray.rdir.z, ray.org_rdir.z);
  200. #else
  201. const vfloat<K> lclipMinX = (vlower_x - ray.org.x) * ray.rdir.x;
  202. const vfloat<K> lclipMinY = (vlower_y - ray.org.y) * ray.rdir.y;
  203. const vfloat<K> lclipMinZ = (vlower_z - ray.org.z) * ray.rdir.z;
  204. const vfloat<K> lclipMaxX = (vupper_x - ray.org.x) * ray.rdir.x;
  205. const vfloat<K> lclipMaxY = (vupper_y - ray.org.y) * ray.rdir.y;
  206. const vfloat<K> lclipMaxZ = (vupper_z - ray.org.z) * ray.rdir.z;
  207. #endif
  208. #if defined(__AVX512F__) // SKX
  209. if (K == 16)
  210. {
  211. /* use mixed float/int min/max */
  212. const vfloat<K> lnearP = maxi(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY), min(lclipMinZ, lclipMaxZ));
  213. const vfloat<K> lfarP = mini(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY), max(lclipMinZ, lclipMaxZ));
  214. const vbool<K> lhit = asInt(maxi(lnearP, ray.tnear)) <= asInt(mini(lfarP, ray.tfar));
  215. dist = lnearP;
  216. return lhit;
  217. }
  218. else
  219. #endif
  220. {
  221. const vfloat<K> lnearP = maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY), mini(lclipMinZ, lclipMaxZ));
  222. const vfloat<K> lfarP = mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY), maxi(lclipMinZ, lclipMaxZ));
  223. #if defined(__AVX512F__) // SKX
  224. const vbool<K> lhit = asInt(maxi(lnearP, ray.tnear)) <= asInt(mini(lfarP, ray.tfar));
  225. #else
  226. const vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar);
  227. #endif
  228. dist = lnearP;
  229. return lhit;
  230. }
  231. }
  232. //////////////////////////////////////////////////////////////////////////////////////
  233. // Robust AABBNodeMB intersection
  234. //////////////////////////////////////////////////////////////////////////////////////
  235. template<int N, int K>
  236. __forceinline vbool<K> intersectNodeKRobust(const typename BVHN<N>::AABBNodeMB* node, const size_t i,
  237. const TravRayKRobust<K>& ray, const vfloat<K>& time, vfloat<K>& dist)
  238. {
  239. const vfloat<K> vlower_x = madd(time, vfloat<K>(node->lower_dx[i]), vfloat<K>(node->lower_x[i]));
  240. const vfloat<K> vlower_y = madd(time, vfloat<K>(node->lower_dy[i]), vfloat<K>(node->lower_y[i]));
  241. const vfloat<K> vlower_z = madd(time, vfloat<K>(node->lower_dz[i]), vfloat<K>(node->lower_z[i]));
  242. const vfloat<K> vupper_x = madd(time, vfloat<K>(node->upper_dx[i]), vfloat<K>(node->upper_x[i]));
  243. const vfloat<K> vupper_y = madd(time, vfloat<K>(node->upper_dy[i]), vfloat<K>(node->upper_y[i]));
  244. const vfloat<K> vupper_z = madd(time, vfloat<K>(node->upper_dz[i]), vfloat<K>(node->upper_z[i]));
  245. const vfloat<K> lclipMinX = (vlower_x - ray.org.x) * ray.rdir.x;
  246. const vfloat<K> lclipMinY = (vlower_y - ray.org.y) * ray.rdir.y;
  247. const vfloat<K> lclipMinZ = (vlower_z - ray.org.z) * ray.rdir.z;
  248. const vfloat<K> lclipMaxX = (vupper_x - ray.org.x) * ray.rdir.x;
  249. const vfloat<K> lclipMaxY = (vupper_y - ray.org.y) * ray.rdir.y;
  250. const vfloat<K> lclipMaxZ = (vupper_z - ray.org.z) * ray.rdir.z;
  251. const float round_up = 1.0f+3.0f*float(ulp);
  252. const float round_down = 1.0f-3.0f*float(ulp);
  253. #if defined(__AVX512F__) // SKX
  254. if (K == 16)
  255. {
  256. const vfloat<K> lnearP = round_down*maxi(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY), min(lclipMinZ, lclipMaxZ));
  257. const vfloat<K> lfarP = round_up *mini(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY), max(lclipMinZ, lclipMaxZ));
  258. const vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar);
  259. dist = lnearP;
  260. return lhit;
  261. }
  262. else
  263. #endif
  264. {
  265. const vfloat<K> lnearP = round_down*maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY), mini(lclipMinZ, lclipMaxZ));
  266. const vfloat<K> lfarP = round_up *mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY), maxi(lclipMinZ, lclipMaxZ));
  267. const vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar);
  268. dist = lnearP;
  269. return lhit;
  270. }
  271. }
  272. //////////////////////////////////////////////////////////////////////////////////////
  273. // Fast AABBNodeMB4D intersection
  274. //////////////////////////////////////////////////////////////////////////////////////
  275. template<int N, int K>
  276. __forceinline vbool<K> intersectNodeKMB4D(const typename BVHN<N>::NodeRef ref, const size_t i,
  277. const TravRayKFast<K>& ray, const vfloat<K>& time, vfloat<K>& dist)
  278. {
  279. const typename BVHN<N>::AABBNodeMB* node = ref.getAABBNodeMB();
  280. const vfloat<K> vlower_x = madd(time, vfloat<K>(node->lower_dx[i]), vfloat<K>(node->lower_x[i]));
  281. const vfloat<K> vlower_y = madd(time, vfloat<K>(node->lower_dy[i]), vfloat<K>(node->lower_y[i]));
  282. const vfloat<K> vlower_z = madd(time, vfloat<K>(node->lower_dz[i]), vfloat<K>(node->lower_z[i]));
  283. const vfloat<K> vupper_x = madd(time, vfloat<K>(node->upper_dx[i]), vfloat<K>(node->upper_x[i]));
  284. const vfloat<K> vupper_y = madd(time, vfloat<K>(node->upper_dy[i]), vfloat<K>(node->upper_y[i]));
  285. const vfloat<K> vupper_z = madd(time, vfloat<K>(node->upper_dz[i]), vfloat<K>(node->upper_z[i]));
  286. #if defined(__aarch64__)
  287. const vfloat<K> lclipMinX = madd(vlower_x, ray.rdir.x, ray.neg_org_rdir.x);
  288. const vfloat<K> lclipMinY = madd(vlower_y, ray.rdir.y, ray.neg_org_rdir.y);
  289. const vfloat<K> lclipMinZ = madd(vlower_z, ray.rdir.z, ray.neg_org_rdir.z);
  290. const vfloat<K> lclipMaxX = madd(vupper_x, ray.rdir.x, ray.neg_org_rdir.x);
  291. const vfloat<K> lclipMaxY = madd(vupper_y, ray.rdir.y, ray.neg_org_rdir.y);
  292. const vfloat<K> lclipMaxZ = madd(vupper_z, ray.rdir.z, ray.neg_org_rdir.z);
  293. #elif defined(__AVX2__)
  294. const vfloat<K> lclipMinX = msub(vlower_x, ray.rdir.x, ray.org_rdir.x);
  295. const vfloat<K> lclipMinY = msub(vlower_y, ray.rdir.y, ray.org_rdir.y);
  296. const vfloat<K> lclipMinZ = msub(vlower_z, ray.rdir.z, ray.org_rdir.z);
  297. const vfloat<K> lclipMaxX = msub(vupper_x, ray.rdir.x, ray.org_rdir.x);
  298. const vfloat<K> lclipMaxY = msub(vupper_y, ray.rdir.y, ray.org_rdir.y);
  299. const vfloat<K> lclipMaxZ = msub(vupper_z, ray.rdir.z, ray.org_rdir.z);
  300. #else
  301. const vfloat<K> lclipMinX = (vlower_x - ray.org.x) * ray.rdir.x;
  302. const vfloat<K> lclipMinY = (vlower_y - ray.org.y) * ray.rdir.y;
  303. const vfloat<K> lclipMinZ = (vlower_z - ray.org.z) * ray.rdir.z;
  304. const vfloat<K> lclipMaxX = (vupper_x - ray.org.x) * ray.rdir.x;
  305. const vfloat<K> lclipMaxY = (vupper_y - ray.org.y) * ray.rdir.y;
  306. const vfloat<K> lclipMaxZ = (vupper_z - ray.org.z) * ray.rdir.z;
  307. #endif
  308. const vfloat<K> lnearP = maxi(maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY)), mini(lclipMinZ, lclipMaxZ));
  309. const vfloat<K> lfarP = mini(mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY)), maxi(lclipMinZ, lclipMaxZ));
  310. vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar);
  311. if (unlikely(ref.isAABBNodeMB4D())) {
  312. const typename BVHN<N>::AABBNodeMB4D* node1 = (const typename BVHN<N>::AABBNodeMB4D*) node;
  313. lhit = lhit & (vfloat<K>(node1->lower_t[i]) <= time) & (time < vfloat<K>(node1->upper_t[i]));
  314. }
  315. dist = lnearP;
  316. return lhit;
  317. }
  318. //////////////////////////////////////////////////////////////////////////////////////
  319. // Robust AABBNodeMB4D intersection
  320. //////////////////////////////////////////////////////////////////////////////////////
  321. template<int N, int K>
  322. __forceinline vbool<K> intersectNodeKMB4DRobust(const typename BVHN<N>::NodeRef ref, const size_t i,
  323. const TravRayKRobust<K>& ray, const vfloat<K>& time, vfloat<K>& dist)
  324. {
  325. const typename BVHN<N>::AABBNodeMB* node = ref.getAABBNodeMB();
  326. const vfloat<K> vlower_x = madd(time, vfloat<K>(node->lower_dx[i]), vfloat<K>(node->lower_x[i]));
  327. const vfloat<K> vlower_y = madd(time, vfloat<K>(node->lower_dy[i]), vfloat<K>(node->lower_y[i]));
  328. const vfloat<K> vlower_z = madd(time, vfloat<K>(node->lower_dz[i]), vfloat<K>(node->lower_z[i]));
  329. const vfloat<K> vupper_x = madd(time, vfloat<K>(node->upper_dx[i]), vfloat<K>(node->upper_x[i]));
  330. const vfloat<K> vupper_y = madd(time, vfloat<K>(node->upper_dy[i]), vfloat<K>(node->upper_y[i]));
  331. const vfloat<K> vupper_z = madd(time, vfloat<K>(node->upper_dz[i]), vfloat<K>(node->upper_z[i]));
  332. const vfloat<K> lclipMinX = (vlower_x - ray.org.x) * ray.rdir.x;
  333. const vfloat<K> lclipMinY = (vlower_y - ray.org.y) * ray.rdir.y;
  334. const vfloat<K> lclipMinZ = (vlower_z - ray.org.z) * ray.rdir.z;
  335. const vfloat<K> lclipMaxX = (vupper_x - ray.org.x) * ray.rdir.x;
  336. const vfloat<K> lclipMaxY = (vupper_y - ray.org.y) * ray.rdir.y;
  337. const vfloat<K> lclipMaxZ = (vupper_z - ray.org.z) * ray.rdir.z;
  338. const float round_up = 1.0f+3.0f*float(ulp);
  339. const float round_down = 1.0f-3.0f*float(ulp);
  340. const vfloat<K> lnearP = round_down*maxi(maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY)), mini(lclipMinZ, lclipMaxZ));
  341. const vfloat<K> lfarP = round_up *mini(mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY)), maxi(lclipMinZ, lclipMaxZ));
  342. vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar);
  343. if (unlikely(ref.isAABBNodeMB4D())) {
  344. const typename BVHN<N>::AABBNodeMB4D* node1 = (const typename BVHN<N>::AABBNodeMB4D*) node;
  345. lhit = lhit & (vfloat<K>(node1->lower_t[i]) <= time) & (time < vfloat<K>(node1->upper_t[i]));
  346. }
  347. dist = lnearP;
  348. return lhit;
  349. }
  350. //////////////////////////////////////////////////////////////////////////////////////
  351. // Fast OBBNode intersection
  352. //////////////////////////////////////////////////////////////////////////////////////
  353. template<int N, int K, bool robust>
  354. __forceinline vbool<K> intersectNodeK(const typename BVHN<N>::OBBNode* node, const size_t i,
  355. const TravRayK<K,robust>& ray, vfloat<K>& dist)
  356. {
  357. const AffineSpace3vf<K> naabb(Vec3f(node->naabb.l.vx.x[i], node->naabb.l.vx.y[i], node->naabb.l.vx.z[i]),
  358. Vec3f(node->naabb.l.vy.x[i], node->naabb.l.vy.y[i], node->naabb.l.vy.z[i]),
  359. Vec3f(node->naabb.l.vz.x[i], node->naabb.l.vz.y[i], node->naabb.l.vz.z[i]),
  360. Vec3f(node->naabb.p .x[i], node->naabb.p .y[i], node->naabb.p .z[i]));
  361. const Vec3vf<K> dir = xfmVector(naabb, ray.dir);
  362. const Vec3vf<K> nrdir = Vec3vf<K>(vfloat<K>(-1.0f)) * rcp_safe(dir); // FIXME: negate instead of mul with -1?
  363. const Vec3vf<K> org = xfmPoint(naabb, ray.org);
  364. const vfloat<K> lclipMinX = org.x * nrdir.x; // (Vec3fa(zero) - org) * rdir;
  365. const vfloat<K> lclipMinY = org.y * nrdir.y;
  366. const vfloat<K> lclipMinZ = org.z * nrdir.z;
  367. const vfloat<K> lclipMaxX = lclipMinX - nrdir.x; // (Vec3fa(one) - org) * rdir;
  368. const vfloat<K> lclipMaxY = lclipMinY - nrdir.y;
  369. const vfloat<K> lclipMaxZ = lclipMinZ - nrdir.z;
  370. vfloat<K> lnearP = maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY), mini(lclipMinZ, lclipMaxZ));
  371. vfloat<K> lfarP = mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY), maxi(lclipMinZ, lclipMaxZ));
  372. if (robust) {
  373. lnearP = lnearP*vfloat<K>(1.0f-3.0f*float(ulp));
  374. lfarP = lfarP *vfloat<K>(1.0f+3.0f*float(ulp));
  375. }
  376. const vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar);
  377. dist = lnearP;
  378. return lhit;
  379. }
  380. //////////////////////////////////////////////////////////////////////////////////////
  381. // Fast OBBNodeMB intersection
  382. //////////////////////////////////////////////////////////////////////////////////////
  383. template<int N, int K, bool robust>
  384. __forceinline vbool<K> intersectNodeK(const typename BVHN<N>::OBBNodeMB* node, const size_t i,
  385. const TravRayK<K,robust>& ray, const vfloat<K>& time, vfloat<K>& dist)
  386. {
  387. const AffineSpace3vf<K> xfm(Vec3f(node->space0.l.vx.x[i], node->space0.l.vx.y[i], node->space0.l.vx.z[i]),
  388. Vec3f(node->space0.l.vy.x[i], node->space0.l.vy.y[i], node->space0.l.vy.z[i]),
  389. Vec3f(node->space0.l.vz.x[i], node->space0.l.vz.y[i], node->space0.l.vz.z[i]),
  390. Vec3f(node->space0.p .x[i], node->space0.p .y[i], node->space0.p .z[i]));
  391. const Vec3vf<K> b0_lower = zero;
  392. const Vec3vf<K> b0_upper = one;
  393. const Vec3vf<K> b1_lower(node->b1.lower.x[i], node->b1.lower.y[i], node->b1.lower.z[i]);
  394. const Vec3vf<K> b1_upper(node->b1.upper.x[i], node->b1.upper.y[i], node->b1.upper.z[i]);
  395. const Vec3vf<K> lower = lerp(b0_lower, b1_lower, time);
  396. const Vec3vf<K> upper = lerp(b0_upper, b1_upper, time);
  397. const Vec3vf<K> dir = xfmVector(xfm, ray.dir);
  398. const Vec3vf<K> rdir = rcp_safe(dir);
  399. const Vec3vf<K> org = xfmPoint(xfm, ray.org);
  400. const vfloat<K> lclipMinX = (lower.x - org.x) * rdir.x;
  401. const vfloat<K> lclipMinY = (lower.y - org.y) * rdir.y;
  402. const vfloat<K> lclipMinZ = (lower.z - org.z) * rdir.z;
  403. const vfloat<K> lclipMaxX = (upper.x - org.x) * rdir.x;
  404. const vfloat<K> lclipMaxY = (upper.y - org.y) * rdir.y;
  405. const vfloat<K> lclipMaxZ = (upper.z - org.z) * rdir.z;
  406. vfloat<K> lnearP = maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY), mini(lclipMinZ, lclipMaxZ));
  407. vfloat<K> lfarP = mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY), maxi(lclipMinZ, lclipMaxZ));
  408. if (robust) {
  409. lnearP = lnearP*vfloat<K>(1.0f-3.0f*float(ulp));
  410. lfarP = lfarP *vfloat<K>(1.0f+3.0f*float(ulp));
  411. }
  412. const vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar);
  413. dist = lnearP;
  414. return lhit;
  415. }
  416. //////////////////////////////////////////////////////////////////////////////////////
  417. // QuantizedBaseNode intersection
  418. //////////////////////////////////////////////////////////////////////////////////////
  419. template<int N, int K>
  420. __forceinline vbool<K> intersectQuantizedNodeK(const typename BVHN<N>::QuantizedBaseNode* node, size_t i,
  421. const TravRayK<K,false>& ray, vfloat<K>& dist)
  422. {
  423. assert(movemask(node->validMask()) & ((size_t)1 << i));
  424. const vfloat<N> lower_x = node->dequantizeLowerX();
  425. const vfloat<N> upper_x = node->dequantizeUpperX();
  426. const vfloat<N> lower_y = node->dequantizeLowerY();
  427. const vfloat<N> upper_y = node->dequantizeUpperY();
  428. const vfloat<N> lower_z = node->dequantizeLowerZ();
  429. const vfloat<N> upper_z = node->dequantizeUpperZ();
  430. #if defined(__aarch64__)
  431. const vfloat<K> lclipMinX = madd(lower_x[i], ray.rdir.x, ray.neg_org_rdir.x);
  432. const vfloat<K> lclipMinY = madd(lower_y[i], ray.rdir.y, ray.neg_org_rdir.y);
  433. const vfloat<K> lclipMinZ = madd(lower_z[i], ray.rdir.z, ray.neg_org_rdir.z);
  434. const vfloat<K> lclipMaxX = madd(upper_x[i], ray.rdir.x, ray.neg_org_rdir.x);
  435. const vfloat<K> lclipMaxY = madd(upper_y[i], ray.rdir.y, ray.neg_org_rdir.y);
  436. const vfloat<K> lclipMaxZ = madd(upper_z[i], ray.rdir.z, ray.neg_org_rdir.z);
  437. #elif defined(__AVX2__)
  438. const vfloat<K> lclipMinX = msub(lower_x[i], ray.rdir.x, ray.org_rdir.x);
  439. const vfloat<K> lclipMinY = msub(lower_y[i], ray.rdir.y, ray.org_rdir.y);
  440. const vfloat<K> lclipMinZ = msub(lower_z[i], ray.rdir.z, ray.org_rdir.z);
  441. const vfloat<K> lclipMaxX = msub(upper_x[i], ray.rdir.x, ray.org_rdir.x);
  442. const vfloat<K> lclipMaxY = msub(upper_y[i], ray.rdir.y, ray.org_rdir.y);
  443. const vfloat<K> lclipMaxZ = msub(upper_z[i], ray.rdir.z, ray.org_rdir.z);
  444. #else
  445. const vfloat<K> lclipMinX = (lower_x[i] - ray.org.x) * ray.rdir.x;
  446. const vfloat<K> lclipMinY = (lower_y[i] - ray.org.y) * ray.rdir.y;
  447. const vfloat<K> lclipMinZ = (lower_z[i] - ray.org.z) * ray.rdir.z;
  448. const vfloat<K> lclipMaxX = (upper_x[i] - ray.org.x) * ray.rdir.x;
  449. const vfloat<K> lclipMaxY = (upper_y[i] - ray.org.y) * ray.rdir.y;
  450. const vfloat<K> lclipMaxZ = (upper_z[i] - ray.org.z) * ray.rdir.z;
  451. #endif
  452. #if defined(__AVX512F__) // SKX
  453. if (K == 16)
  454. {
  455. /* use mixed float/int min/max */
  456. const vfloat<K> lnearP = maxi(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY), min(lclipMinZ, lclipMaxZ));
  457. const vfloat<K> lfarP = mini(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY), max(lclipMinZ, lclipMaxZ));
  458. const vbool<K> lhit = asInt(maxi(lnearP, ray.tnear)) <= asInt(mini(lfarP, ray.tfar));
  459. dist = lnearP;
  460. return lhit;
  461. }
  462. else
  463. #endif
  464. {
  465. const vfloat<K> lnearP = maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY), mini(lclipMinZ, lclipMaxZ));
  466. const vfloat<K> lfarP = mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY), maxi(lclipMinZ, lclipMaxZ));
  467. #if defined(__AVX512F__) // SKX
  468. const vbool<K> lhit = asInt(maxi(lnearP, ray.tnear)) <= asInt(mini(lfarP, ray.tfar));
  469. #else
  470. const vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar);
  471. #endif
  472. dist = lnearP;
  473. return lhit;
  474. }
  475. }
  476. template<int N, int K>
  477. __forceinline vbool<K> intersectQuantizedNodeK(const typename BVHN<N>::QuantizedBaseNode* node, size_t i,
  478. const TravRayK<K,true>& ray, vfloat<K>& dist)
  479. {
  480. assert(movemask(node->validMask()) & ((size_t)1 << i));
  481. const vfloat<N> lower_x = node->dequantizeLowerX();
  482. const vfloat<N> upper_x = node->dequantizeUpperX();
  483. const vfloat<N> lower_y = node->dequantizeLowerY();
  484. const vfloat<N> upper_y = node->dequantizeUpperY();
  485. const vfloat<N> lower_z = node->dequantizeLowerZ();
  486. const vfloat<N> upper_z = node->dequantizeUpperZ();
  487. const vfloat<K> lclipMinX = (lower_x[i] - ray.org.x) * ray.rdir.x;
  488. const vfloat<K> lclipMinY = (lower_y[i] - ray.org.y) * ray.rdir.y;
  489. const vfloat<K> lclipMinZ = (lower_z[i] - ray.org.z) * ray.rdir.z;
  490. const vfloat<K> lclipMaxX = (upper_x[i] - ray.org.x) * ray.rdir.x;
  491. const vfloat<K> lclipMaxY = (upper_y[i] - ray.org.y) * ray.rdir.y;
  492. const vfloat<K> lclipMaxZ = (upper_z[i] - ray.org.z) * ray.rdir.z;
  493. const float round_up = 1.0f+3.0f*float(ulp);
  494. const float round_down = 1.0f-3.0f*float(ulp);
  495. const vfloat<K> lnearP = round_down*max(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY), min(lclipMinZ, lclipMaxZ));
  496. const vfloat<K> lfarP = round_up *min(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY), max(lclipMinZ, lclipMaxZ));
  497. const vbool<K> lhit = max(lnearP, ray.tnear) <= min(lfarP, ray.tfar);
  498. dist = lnearP;
  499. return lhit;
  500. }
  501. template<int N, int K>
  502. __forceinline vbool<K> intersectQuantizedNodeMBK(const typename BVHN<N>::QuantizedBaseNodeMB* node, const size_t i,
  503. const TravRayK<K,false>& ray, const vfloat<K>& time, vfloat<K>& dist)
  504. {
  505. assert(movemask(node->validMask()) & ((size_t)1 << i));
  506. const vfloat<K> lower_x = node->template dequantizeLowerX<K>(i,time);
  507. const vfloat<K> upper_x = node->template dequantizeUpperX<K>(i,time);
  508. const vfloat<K> lower_y = node->template dequantizeLowerY<K>(i,time);
  509. const vfloat<K> upper_y = node->template dequantizeUpperY<K>(i,time);
  510. const vfloat<K> lower_z = node->template dequantizeLowerZ<K>(i,time);
  511. const vfloat<K> upper_z = node->template dequantizeUpperZ<K>(i,time);
  512. #if defined(__aarch64__)
  513. const vfloat<K> lclipMinX = madd(lower_x, ray.rdir.x, ray.neg_org_rdir.x);
  514. const vfloat<K> lclipMinY = madd(lower_y, ray.rdir.y, ray.neg_org_rdir.y);
  515. const vfloat<K> lclipMinZ = madd(lower_z, ray.rdir.z, ray.neg_org_rdir.z);
  516. const vfloat<K> lclipMaxX = madd(upper_x, ray.rdir.x, ray.neg_org_rdir.x);
  517. const vfloat<K> lclipMaxY = madd(upper_y, ray.rdir.y, ray.neg_org_rdir.y);
  518. const vfloat<K> lclipMaxZ = madd(upper_z, ray.rdir.z, ray.neg_org_rdir.z);
  519. #elif defined(__AVX2__)
  520. const vfloat<K> lclipMinX = msub(lower_x, ray.rdir.x, ray.org_rdir.x);
  521. const vfloat<K> lclipMinY = msub(lower_y, ray.rdir.y, ray.org_rdir.y);
  522. const vfloat<K> lclipMinZ = msub(lower_z, ray.rdir.z, ray.org_rdir.z);
  523. const vfloat<K> lclipMaxX = msub(upper_x, ray.rdir.x, ray.org_rdir.x);
  524. const vfloat<K> lclipMaxY = msub(upper_y, ray.rdir.y, ray.org_rdir.y);
  525. const vfloat<K> lclipMaxZ = msub(upper_z, ray.rdir.z, ray.org_rdir.z);
  526. #else
  527. const vfloat<K> lclipMinX = (lower_x - ray.org.x) * ray.rdir.x;
  528. const vfloat<K> lclipMinY = (lower_y - ray.org.y) * ray.rdir.y;
  529. const vfloat<K> lclipMinZ = (lower_z - ray.org.z) * ray.rdir.z;
  530. const vfloat<K> lclipMaxX = (upper_x - ray.org.x) * ray.rdir.x;
  531. const vfloat<K> lclipMaxY = (upper_y - ray.org.y) * ray.rdir.y;
  532. const vfloat<K> lclipMaxZ = (upper_z - ray.org.z) * ray.rdir.z;
  533. #endif
  534. const vfloat<K> lnearP = max(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY), min(lclipMinZ, lclipMaxZ));
  535. const vfloat<K> lfarP = min(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY), max(lclipMinZ, lclipMaxZ));
  536. const vbool<K> lhit = max(lnearP, ray.tnear) <= min(lfarP, ray.tfar);
  537. dist = lnearP;
  538. return lhit;
  539. }
  540. template<int N, int K>
  541. __forceinline vbool<K> intersectQuantizedNodeMBK(const typename BVHN<N>::QuantizedBaseNodeMB* node, const size_t i,
  542. const TravRayK<K,true>& ray, const vfloat<K>& time, vfloat<K>& dist)
  543. {
  544. assert(movemask(node->validMask()) & ((size_t)1 << i));
  545. const vfloat<K> lower_x = node->template dequantizeLowerX<K>(i,time);
  546. const vfloat<K> upper_x = node->template dequantizeUpperX<K>(i,time);
  547. const vfloat<K> lower_y = node->template dequantizeLowerY<K>(i,time);
  548. const vfloat<K> upper_y = node->template dequantizeUpperY<K>(i,time);
  549. const vfloat<K> lower_z = node->template dequantizeLowerZ<K>(i,time);
  550. const vfloat<K> upper_z = node->template dequantizeUpperZ<K>(i,time);
  551. const vfloat<K> lclipMinX = (lower_x - ray.org.x) * ray.rdir.x;
  552. const vfloat<K> lclipMinY = (lower_y - ray.org.y) * ray.rdir.y;
  553. const vfloat<K> lclipMinZ = (lower_z - ray.org.z) * ray.rdir.z;
  554. const vfloat<K> lclipMaxX = (upper_x - ray.org.x) * ray.rdir.x;
  555. const vfloat<K> lclipMaxY = (upper_y - ray.org.y) * ray.rdir.y;
  556. const vfloat<K> lclipMaxZ = (upper_z - ray.org.z) * ray.rdir.z;
  557. const float round_up = 1.0f+3.0f*float(ulp);
  558. const float round_down = 1.0f-3.0f*float(ulp);
  559. const vfloat<K> lnearP = round_down*max(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY), min(lclipMinZ, lclipMaxZ));
  560. const vfloat<K> lfarP = round_up *min(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY), max(lclipMinZ, lclipMaxZ));
  561. const vbool<K> lhit = max(lnearP, ray.tnear) <= min(lfarP, ray.tfar);
  562. dist = lnearP;
  563. return lhit;
  564. }
  565. //////////////////////////////////////////////////////////////////////////////////////
  566. // Node intersectors used in hybrid traversal
  567. //////////////////////////////////////////////////////////////////////////////////////
  568. /*! Intersects N nodes with K rays */
  569. template<int N, int K, int types, bool robust>
  570. struct BVHNNodeIntersectorK;
  571. template<int N, int K>
  572. struct BVHNNodeIntersectorK<N, K, BVH_AN1, false>
  573. {
  574. /* vmask is both an input and an output parameter! Its initial value should be the parent node
  575. hit mask, which is used for correctly computing the current hit mask. The parent hit mask
  576. is actually required only for motion blur node intersections (because different rays may
  577. have different times), so for regular nodes vmask is simply overwritten. */
  578. static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
  579. const TravRayKFast<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
  580. {
  581. vmask = intersectNodeK<N,K>(node.getAABBNode(), i, ray, dist);
  582. return true;
  583. }
  584. };
  585. template<int N, int K>
  586. struct BVHNNodeIntersectorK<N, K, BVH_AN1, true>
  587. {
  588. static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
  589. const TravRayKRobust<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
  590. {
  591. vmask = intersectNodeKRobust<N,K>(node.getAABBNode(), i, ray, dist);
  592. return true;
  593. }
  594. };
  595. template<int N, int K>
  596. struct BVHNNodeIntersectorK<N, K, BVH_AN2, false>
  597. {
  598. static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
  599. const TravRayKFast<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
  600. {
  601. vmask = intersectNodeK<N,K>(node.getAABBNodeMB(), i, ray, time, dist);
  602. return true;
  603. }
  604. };
  605. template<int N, int K>
  606. struct BVHNNodeIntersectorK<N, K, BVH_AN2, true>
  607. {
  608. static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
  609. const TravRayKRobust<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
  610. {
  611. vmask = intersectNodeKRobust<N,K>(node.getAABBNodeMB(), i, ray, time, dist);
  612. return true;
  613. }
  614. };
  615. template<int N, int K>
  616. struct BVHNNodeIntersectorK<N, K, BVH_AN1_UN1, false>
  617. {
  618. static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
  619. const TravRayKFast<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
  620. {
  621. if (likely(node.isAABBNode())) vmask = intersectNodeK<N,K>(node.getAABBNode(), i, ray, dist);
  622. else /*if (unlikely(node.isOBBNode()))*/ vmask = intersectNodeK<N,K>(node.ungetAABBNode(), i, ray, dist);
  623. return true;
  624. }
  625. };
  626. template<int N, int K>
  627. struct BVHNNodeIntersectorK<N, K, BVH_AN1_UN1, true>
  628. {
  629. static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
  630. const TravRayKRobust<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
  631. {
  632. if (likely(node.isAABBNode())) vmask = intersectNodeKRobust<N,K>(node.getAABBNode(), i, ray, dist);
  633. else /*if (unlikely(node.isOBBNode()))*/ vmask = intersectNodeK<N,K>(node.ungetAABBNode(), i, ray, dist);
  634. return true;
  635. }
  636. };
  637. template<int N, int K>
  638. struct BVHNNodeIntersectorK<N, K, BVH_AN2_UN2, false>
  639. {
  640. static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
  641. const TravRayKFast<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
  642. {
  643. if (likely(node.isAABBNodeMB())) vmask = intersectNodeK<N,K>(node.getAABBNodeMB(), i, ray, time, dist);
  644. else /*if (unlikely(node.isOBBNodeMB()))*/ vmask = intersectNodeK<N,K>(node.ungetAABBNodeMB(), i, ray, time, dist);
  645. return true;
  646. }
  647. };
  648. template<int N, int K>
  649. struct BVHNNodeIntersectorK<N, K, BVH_AN2_UN2, true>
  650. {
  651. static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
  652. const TravRayKRobust<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
  653. {
  654. if (likely(node.isAABBNodeMB())) vmask = intersectNodeKRobust<N,K>(node.getAABBNodeMB(), i, ray, time, dist);
  655. else /*if (unlikely(node.isOBBNodeMB()))*/ vmask = intersectNodeK<N,K>(node.ungetAABBNodeMB(), i, ray, time, dist);
  656. return true;
  657. }
  658. };
  659. template<int N, int K>
  660. struct BVHNNodeIntersectorK<N, K, BVH_AN2_AN4D, false>
  661. {
  662. static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
  663. const TravRayKFast<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
  664. {
  665. vmask &= intersectNodeKMB4D<N,K>(node, i, ray, time, dist);
  666. return true;
  667. }
  668. };
  669. template<int N, int K>
  670. struct BVHNNodeIntersectorK<N, K, BVH_AN2_AN4D, true>
  671. {
  672. static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
  673. const TravRayKRobust<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
  674. {
  675. vmask &= intersectNodeKMB4DRobust<N,K>(node, i, ray, time, dist);
  676. return true;
  677. }
  678. };
  679. template<int N, int K>
  680. struct BVHNNodeIntersectorK<N, K, BVH_AN2_AN4D_UN2, false>
  681. {
  682. static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
  683. const TravRayKFast<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
  684. {
  685. if (likely(node.isAABBNodeMB() || node.isAABBNodeMB4D())) {
  686. vmask &= intersectNodeKMB4D<N,K>(node, i, ray, time, dist);
  687. } else /*if (unlikely(node.isOBBNodeMB()))*/ {
  688. assert(node.isOBBNodeMB());
  689. vmask &= intersectNodeK<N,K>(node.ungetAABBNodeMB(), i, ray, time, dist);
  690. }
  691. return true;
  692. }
  693. };
  694. template<int N, int K>
  695. struct BVHNNodeIntersectorK<N, K, BVH_AN2_AN4D_UN2, true>
  696. {
  697. static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
  698. const TravRayKRobust<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
  699. {
  700. if (likely(node.isAABBNodeMB() || node.isAABBNodeMB4D())) {
  701. vmask &= intersectNodeKMB4DRobust<N,K>(node, i, ray, time, dist);
  702. } else /*if (unlikely(node.isOBBNodeMB()))*/ {
  703. assert(node.isOBBNodeMB());
  704. vmask &= intersectNodeK<N,K>(node.ungetAABBNodeMB(), i, ray, time, dist);
  705. }
  706. return true;
  707. }
  708. };
  709. /*! Intersects N nodes with K rays */
  710. template<int N, int K, bool robust>
  711. struct BVHNQuantizedBaseNodeIntersectorK;
  712. template<int N, int K>
  713. struct BVHNQuantizedBaseNodeIntersectorK<N, K, false>
  714. {
  715. static __forceinline vbool<K> intersectK(const typename BVHN<N>::QuantizedBaseNode* node, const size_t i,
  716. const TravRayK<K,false>& ray, vfloat<K>& dist)
  717. {
  718. return intersectQuantizedNodeK<N,K>(node,i,ray,dist);
  719. }
  720. static __forceinline vbool<K> intersectK(const typename BVHN<N>::QuantizedBaseNodeMB* node, const size_t i,
  721. const TravRayK<K,false>& ray, const vfloat<K>& time, vfloat<K>& dist)
  722. {
  723. return intersectQuantizedNodeMBK<N,K>(node,i,ray,time,dist);
  724. }
  725. };
  726. template<int N, int K>
  727. struct BVHNQuantizedBaseNodeIntersectorK<N, K, true>
  728. {
  729. static __forceinline vbool<K> intersectK(const typename BVHN<N>::QuantizedBaseNode* node, const size_t i,
  730. const TravRayK<K,true>& ray, vfloat<K>& dist)
  731. {
  732. return intersectQuantizedNodeK<N,K>(node,i,ray,dist);
  733. }
  734. static __forceinline vbool<K> intersectK(const typename BVHN<N>::QuantizedBaseNodeMB* node, const size_t i,
  735. const TravRayK<K,true>& ray, const vfloat<K>& time, vfloat<K>& dist)
  736. {
  737. return intersectQuantizedNodeMBK<N,K>(node,i,ray,time,dist);
  738. }
  739. };
  740. }
  741. }