123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411 |
- /*
- * Copyright 2011-2014, Blender Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *
- * Aligned nodes intersection AVX code is adopted from Embree,
- */
- struct OBVHStackItem {
- int addr;
- float dist;
- };
- ccl_device_inline void obvh_near_far_idx_calc(const float3 &idir,
- int *ccl_restrict near_x,
- int *ccl_restrict near_y,
- int *ccl_restrict near_z,
- int *ccl_restrict far_x,
- int *ccl_restrict far_y,
- int *ccl_restrict far_z)
- {
- #ifdef __KERNEL_SSE__
- *near_x = 0;
- *far_x = 1;
- *near_y = 2;
- *far_y = 3;
- *near_z = 4;
- *far_z = 5;
- const size_t mask = movemask(ssef(idir.m128));
- const int mask_x = mask & 1;
- const int mask_y = (mask & 2) >> 1;
- const int mask_z = (mask & 4) >> 2;
- *near_x += mask_x;
- *far_x -= mask_x;
- *near_y += mask_y;
- *far_y -= mask_y;
- *near_z += mask_z;
- *far_z -= mask_z;
- #else
- if (idir.x >= 0.0f) {
- *near_x = 0;
- *far_x = 1;
- }
- else {
- *near_x = 1;
- *far_x = 0;
- }
- if (idir.y >= 0.0f) {
- *near_y = 2;
- *far_y = 3;
- }
- else {
- *near_y = 3;
- *far_y = 2;
- }
- if (idir.z >= 0.0f) {
- *near_z = 4;
- *far_z = 5;
- }
- else {
- *near_z = 5;
- *far_z = 4;
- }
- #endif
- }
- ccl_device_inline void obvh_item_swap(OBVHStackItem *ccl_restrict a, OBVHStackItem *ccl_restrict b)
- {
- OBVHStackItem tmp = *a;
- *a = *b;
- *b = tmp;
- }
- ccl_device_inline void obvh_stack_sort(OBVHStackItem *ccl_restrict s1,
- OBVHStackItem *ccl_restrict s2,
- OBVHStackItem *ccl_restrict s3)
- {
- if (s2->dist < s1->dist) {
- obvh_item_swap(s2, s1);
- }
- if (s3->dist < s2->dist) {
- obvh_item_swap(s3, s2);
- }
- if (s2->dist < s1->dist) {
- obvh_item_swap(s2, s1);
- }
- }
- ccl_device_inline void obvh_stack_sort(OBVHStackItem *ccl_restrict s1,
- OBVHStackItem *ccl_restrict s2,
- OBVHStackItem *ccl_restrict s3,
- OBVHStackItem *ccl_restrict s4)
- {
- if (s2->dist < s1->dist) {
- obvh_item_swap(s2, s1);
- }
- if (s4->dist < s3->dist) {
- obvh_item_swap(s4, s3);
- }
- if (s3->dist < s1->dist) {
- obvh_item_swap(s3, s1);
- }
- if (s4->dist < s2->dist) {
- obvh_item_swap(s4, s2);
- }
- if (s3->dist < s2->dist) {
- obvh_item_swap(s3, s2);
- }
- }
- ccl_device_inline void obvh_stack_sort(OBVHStackItem *ccl_restrict s1,
- OBVHStackItem *ccl_restrict s2,
- OBVHStackItem *ccl_restrict s3,
- OBVHStackItem *ccl_restrict s4,
- OBVHStackItem *ccl_restrict s5)
- {
- obvh_stack_sort(s1, s2, s3, s4);
- if (s5->dist < s4->dist) {
- obvh_item_swap(s4, s5);
- if (s4->dist < s3->dist) {
- obvh_item_swap(s3, s4);
- if (s3->dist < s2->dist) {
- obvh_item_swap(s2, s3);
- if (s2->dist < s1->dist) {
- obvh_item_swap(s1, s2);
- }
- }
- }
- }
- }
- ccl_device_inline void obvh_stack_sort(OBVHStackItem *ccl_restrict s1,
- OBVHStackItem *ccl_restrict s2,
- OBVHStackItem *ccl_restrict s3,
- OBVHStackItem *ccl_restrict s4,
- OBVHStackItem *ccl_restrict s5,
- OBVHStackItem *ccl_restrict s6)
- {
- obvh_stack_sort(s1, s2, s3, s4, s5);
- if (s6->dist < s5->dist) {
- obvh_item_swap(s5, s6);
- if (s5->dist < s4->dist) {
- obvh_item_swap(s4, s5);
- if (s4->dist < s3->dist) {
- obvh_item_swap(s3, s4);
- if (s3->dist < s2->dist) {
- obvh_item_swap(s2, s3);
- if (s2->dist < s1->dist) {
- obvh_item_swap(s1, s2);
- }
- }
- }
- }
- }
- }
- ccl_device_inline void obvh_stack_sort(OBVHStackItem *ccl_restrict s1,
- OBVHStackItem *ccl_restrict s2,
- OBVHStackItem *ccl_restrict s3,
- OBVHStackItem *ccl_restrict s4,
- OBVHStackItem *ccl_restrict s5,
- OBVHStackItem *ccl_restrict s6,
- OBVHStackItem *ccl_restrict s7)
- {
- obvh_stack_sort(s1, s2, s3, s4, s5, s6);
- if (s7->dist < s6->dist) {
- obvh_item_swap(s6, s7);
- if (s6->dist < s5->dist) {
- obvh_item_swap(s5, s6);
- if (s5->dist < s4->dist) {
- obvh_item_swap(s4, s5);
- if (s4->dist < s3->dist) {
- obvh_item_swap(s3, s4);
- if (s3->dist < s2->dist) {
- obvh_item_swap(s2, s3);
- if (s2->dist < s1->dist) {
- obvh_item_swap(s1, s2);
- }
- }
- }
- }
- }
- }
- }
- ccl_device_inline void obvh_stack_sort(OBVHStackItem *ccl_restrict s1,
- OBVHStackItem *ccl_restrict s2,
- OBVHStackItem *ccl_restrict s3,
- OBVHStackItem *ccl_restrict s4,
- OBVHStackItem *ccl_restrict s5,
- OBVHStackItem *ccl_restrict s6,
- OBVHStackItem *ccl_restrict s7,
- OBVHStackItem *ccl_restrict s8)
- {
- obvh_stack_sort(s1, s2, s3, s4, s5, s6, s7);
- if (s8->dist < s7->dist) {
- obvh_item_swap(s7, s8);
- if (s7->dist < s6->dist) {
- obvh_item_swap(s6, s7);
- if (s6->dist < s5->dist) {
- obvh_item_swap(s5, s6);
- if (s5->dist < s4->dist) {
- obvh_item_swap(s4, s5);
- if (s4->dist < s3->dist) {
- obvh_item_swap(s3, s4);
- if (s3->dist < s2->dist) {
- obvh_item_swap(s2, s3);
- if (s2->dist < s1->dist) {
- obvh_item_swap(s1, s2);
- }
- }
- }
- }
- }
- }
- }
- }
- /* Axis-aligned nodes intersection */
- ccl_device_inline int obvh_aligned_node_intersect(KernelGlobals *ccl_restrict kg,
- const avxf &isect_near,
- const avxf &isect_far,
- #ifdef __KERNEL_AVX2__
- const avx3f &org_idir,
- #else
- const avx3f &org,
- #endif
- const avx3f &idir,
- const int near_x,
- const int near_y,
- const int near_z,
- const int far_x,
- const int far_y,
- const int far_z,
- const int node_addr,
- avxf *ccl_restrict dist)
- {
- const int offset = node_addr + 2;
- #ifdef __KERNEL_AVX2__
- const avxf tnear_x = msub(
- kernel_tex_fetch_avxf(__bvh_nodes, offset + near_x * 2), idir.x, org_idir.x);
- const avxf tnear_y = msub(
- kernel_tex_fetch_avxf(__bvh_nodes, offset + near_y * 2), idir.y, org_idir.y);
- const avxf tnear_z = msub(
- kernel_tex_fetch_avxf(__bvh_nodes, offset + near_z * 2), idir.z, org_idir.z);
- const avxf tfar_x = msub(
- kernel_tex_fetch_avxf(__bvh_nodes, offset + far_x * 2), idir.x, org_idir.x);
- const avxf tfar_y = msub(
- kernel_tex_fetch_avxf(__bvh_nodes, offset + far_y * 2), idir.y, org_idir.y);
- const avxf tfar_z = msub(
- kernel_tex_fetch_avxf(__bvh_nodes, offset + far_z * 2), idir.z, org_idir.z);
- const avxf tnear = max4(tnear_x, tnear_y, tnear_z, isect_near);
- const avxf tfar = min4(tfar_x, tfar_y, tfar_z, isect_far);
- const avxb vmask = tnear <= tfar;
- int mask = (int)movemask(vmask);
- *dist = tnear;
- return mask;
- #else
- return 0;
- #endif
- }
- /* Unaligned nodes intersection */
- ccl_device_inline int obvh_unaligned_node_intersect(KernelGlobals *ccl_restrict kg,
- const avxf &isect_near,
- const avxf &isect_far,
- #ifdef __KERNEL_AVX2__
- const avx3f &org_idir,
- #endif
- const avx3f &org,
- const avx3f &dir,
- const avx3f &idir,
- const int near_x,
- const int near_y,
- const int near_z,
- const int far_x,
- const int far_y,
- const int far_z,
- const int node_addr,
- avxf *ccl_restrict dist)
- {
- const int offset = node_addr;
- const avxf tfm_x_x = kernel_tex_fetch_avxf(__bvh_nodes, offset + 2);
- const avxf tfm_x_y = kernel_tex_fetch_avxf(__bvh_nodes, offset + 4);
- const avxf tfm_x_z = kernel_tex_fetch_avxf(__bvh_nodes, offset + 6);
- const avxf tfm_y_x = kernel_tex_fetch_avxf(__bvh_nodes, offset + 8);
- const avxf tfm_y_y = kernel_tex_fetch_avxf(__bvh_nodes, offset + 10);
- const avxf tfm_y_z = kernel_tex_fetch_avxf(__bvh_nodes, offset + 12);
- const avxf tfm_z_x = kernel_tex_fetch_avxf(__bvh_nodes, offset + 14);
- const avxf tfm_z_y = kernel_tex_fetch_avxf(__bvh_nodes, offset + 16);
- const avxf tfm_z_z = kernel_tex_fetch_avxf(__bvh_nodes, offset + 18);
- const avxf tfm_t_x = kernel_tex_fetch_avxf(__bvh_nodes, offset + 20);
- const avxf tfm_t_y = kernel_tex_fetch_avxf(__bvh_nodes, offset + 22);
- const avxf tfm_t_z = kernel_tex_fetch_avxf(__bvh_nodes, offset + 24);
- const avxf aligned_dir_x = dir.x * tfm_x_x + dir.y * tfm_x_y + dir.z * tfm_x_z,
- aligned_dir_y = dir.x * tfm_y_x + dir.y * tfm_y_y + dir.z * tfm_y_z,
- aligned_dir_z = dir.x * tfm_z_x + dir.y * tfm_z_y + dir.z * tfm_z_z;
- const avxf aligned_P_x = org.x * tfm_x_x + org.y * tfm_x_y + org.z * tfm_x_z + tfm_t_x,
- aligned_P_y = org.x * tfm_y_x + org.y * tfm_y_y + org.z * tfm_y_z + tfm_t_y,
- aligned_P_z = org.x * tfm_z_x + org.y * tfm_z_y + org.z * tfm_z_z + tfm_t_z;
- const avxf neg_one(-1.0f);
- const avxf nrdir_x = neg_one / aligned_dir_x, nrdir_y = neg_one / aligned_dir_y,
- nrdir_z = neg_one / aligned_dir_z;
- const avxf tlower_x = aligned_P_x * nrdir_x, tlower_y = aligned_P_y * nrdir_y,
- tlower_z = aligned_P_z * nrdir_z;
- const avxf tupper_x = tlower_x - nrdir_x, tupper_y = tlower_y - nrdir_y,
- tupper_z = tlower_z - nrdir_z;
- const avxf tnear_x = min(tlower_x, tupper_x);
- const avxf tnear_y = min(tlower_y, tupper_y);
- const avxf tnear_z = min(tlower_z, tupper_z);
- const avxf tfar_x = max(tlower_x, tupper_x);
- const avxf tfar_y = max(tlower_y, tupper_y);
- const avxf tfar_z = max(tlower_z, tupper_z);
- const avxf tnear = max4(isect_near, tnear_x, tnear_y, tnear_z);
- const avxf tfar = min4(isect_far, tfar_x, tfar_y, tfar_z);
- const avxb vmask = tnear <= tfar;
- *dist = tnear;
- return movemask(vmask);
- }
- /* Intersectors wrappers.
- *
- * They'll check node type and call appropriate intersection code.
- */
- ccl_device_inline int obvh_node_intersect(KernelGlobals *ccl_restrict kg,
- const avxf &isect_near,
- const avxf &isect_far,
- #ifdef __KERNEL_AVX2__
- const avx3f &org_idir,
- #endif
- const avx3f &org,
- const avx3f &dir,
- const avx3f &idir,
- const int near_x,
- const int near_y,
- const int near_z,
- const int far_x,
- const int far_y,
- const int far_z,
- const int node_addr,
- avxf *ccl_restrict dist)
- {
- const int offset = node_addr;
- const float4 node = kernel_tex_fetch(__bvh_nodes, offset);
- if (__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) {
- return obvh_unaligned_node_intersect(kg,
- isect_near,
- isect_far,
- #ifdef __KERNEL_AVX2__
- org_idir,
- #endif
- org,
- dir,
- idir,
- near_x,
- near_y,
- near_z,
- far_x,
- far_y,
- far_z,
- node_addr,
- dist);
- }
- else {
- return obvh_aligned_node_intersect(kg,
- isect_near,
- isect_far,
- #ifdef __KERNEL_AVX2__
- org_idir,
- #else
- org,
- #endif
- idir,
- near_x,
- near_y,
- near_z,
- far_x,
- far_y,
- far_z,
- node_addr,
- dist);
- }
- }
|