123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515 |
- /*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
- #ifndef __UTIL_TRANSFORM_H__
- #define __UTIL_TRANSFORM_H__
- #ifndef __KERNEL_GPU__
- # include <string.h>
- #endif
- #include "util/util_math.h"
- #include "util/util_types.h"
- CCL_NAMESPACE_BEGIN
- /* Affine transformation, stored as 4x3 matrix. */
- typedef struct Transform {
- float4 x, y, z;
- #ifndef __KERNEL_GPU__
- float4 operator[](int i) const
- {
- return *(&x + i);
- }
- float4 &operator[](int i)
- {
- return *(&x + i);
- }
- #endif
- } Transform;
- /* Transform decomposed in rotation/translation/scale. we use the same data
- * structure as Transform, and tightly pack decomposition into it. first the
- * rotation (4), then translation (3), then 3x3 scale matrix (9). */
- typedef struct DecomposedTransform {
- float4 x, y, z, w;
- } DecomposedTransform;
- /* Functions */
- ccl_device_inline float3 transform_point(const Transform *t, const float3 a)
- {
- /* TODO(sergey): Disabled for now, causes crashes in certain cases. */
- #if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE2__)
- ssef x, y, z, w, aa;
- aa = a.m128;
- x = _mm_loadu_ps(&t->x.x);
- y = _mm_loadu_ps(&t->y.x);
- z = _mm_loadu_ps(&t->z.x);
- w = _mm_set_ps(1.0f, 0.0f, 0.0f, 0.0f);
- _MM_TRANSPOSE4_PS(x, y, z, w);
- ssef tmp = shuffle<0>(aa) * x;
- tmp = madd(shuffle<1>(aa), y, tmp);
- tmp = madd(shuffle<2>(aa), z, tmp);
- tmp += w;
- return float3(tmp.m128);
- #else
- float3 c = make_float3(a.x * t->x.x + a.y * t->x.y + a.z * t->x.z + t->x.w,
- a.x * t->y.x + a.y * t->y.y + a.z * t->y.z + t->y.w,
- a.x * t->z.x + a.y * t->z.y + a.z * t->z.z + t->z.w);
- return c;
- #endif
- }
- ccl_device_inline float3 transform_direction(const Transform *t, const float3 a)
- {
- #if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE2__)
- ssef x, y, z, w, aa;
- aa = a.m128;
- x = _mm_loadu_ps(&t->x.x);
- y = _mm_loadu_ps(&t->y.x);
- z = _mm_loadu_ps(&t->z.x);
- w = _mm_setzero_ps();
- _MM_TRANSPOSE4_PS(x, y, z, w);
- ssef tmp = shuffle<0>(aa) * x;
- tmp = madd(shuffle<1>(aa), y, tmp);
- tmp = madd(shuffle<2>(aa), z, tmp);
- return float3(tmp.m128);
- #else
- float3 c = make_float3(a.x * t->x.x + a.y * t->x.y + a.z * t->x.z,
- a.x * t->y.x + a.y * t->y.y + a.z * t->y.z,
- a.x * t->z.x + a.y * t->z.y + a.z * t->z.z);
- return c;
- #endif
- }
- ccl_device_inline float3 transform_direction_transposed(const Transform *t, const float3 a)
- {
- float3 x = make_float3(t->x.x, t->y.x, t->z.x);
- float3 y = make_float3(t->x.y, t->y.y, t->z.y);
- float3 z = make_float3(t->x.z, t->y.z, t->z.z);
- return make_float3(dot(x, a), dot(y, a), dot(z, a));
- }
- ccl_device_inline Transform make_transform(float a,
- float b,
- float c,
- float d,
- float e,
- float f,
- float g,
- float h,
- float i,
- float j,
- float k,
- float l)
- {
- Transform t;
- t.x.x = a;
- t.x.y = b;
- t.x.z = c;
- t.x.w = d;
- t.y.x = e;
- t.y.y = f;
- t.y.z = g;
- t.y.w = h;
- t.z.x = i;
- t.z.y = j;
- t.z.z = k;
- t.z.w = l;
- return t;
- }
- /* Constructs a coordinate frame from a normalized normal. */
- ccl_device_inline Transform make_transform_frame(float3 N)
- {
- const float3 dx0 = cross(make_float3(1.0f, 0.0f, 0.0f), N);
- const float3 dx1 = cross(make_float3(0.0f, 1.0f, 0.0f), N);
- const float3 dx = normalize((dot(dx0, dx0) > dot(dx1, dx1)) ? dx0 : dx1);
- const float3 dy = normalize(cross(N, dx));
- return make_transform(dx.x, dx.y, dx.z, 0.0f, dy.x, dy.y, dy.z, 0.0f, N.x, N.y, N.z, 0.0f);
- }
- #ifndef __KERNEL_GPU__
- ccl_device_inline Transform operator*(const Transform a, const Transform b)
- {
- float4 c_x = make_float4(b.x.x, b.y.x, b.z.x, 0.0f);
- float4 c_y = make_float4(b.x.y, b.y.y, b.z.y, 0.0f);
- float4 c_z = make_float4(b.x.z, b.y.z, b.z.z, 0.0f);
- float4 c_w = make_float4(b.x.w, b.y.w, b.z.w, 1.0f);
- Transform t;
- t.x = make_float4(dot(a.x, c_x), dot(a.x, c_y), dot(a.x, c_z), dot(a.x, c_w));
- t.y = make_float4(dot(a.y, c_x), dot(a.y, c_y), dot(a.y, c_z), dot(a.y, c_w));
- t.z = make_float4(dot(a.z, c_x), dot(a.z, c_y), dot(a.z, c_z), dot(a.z, c_w));
- return t;
- }
- ccl_device_inline void print_transform(const char *label, const Transform &t)
- {
- print_float4(label, t.x);
- print_float4(label, t.y);
- print_float4(label, t.z);
- printf("\n");
- }
- ccl_device_inline Transform transform_translate(float3 t)
- {
- return make_transform(1, 0, 0, t.x, 0, 1, 0, t.y, 0, 0, 1, t.z);
- }
- ccl_device_inline Transform transform_translate(float x, float y, float z)
- {
- return transform_translate(make_float3(x, y, z));
- }
- ccl_device_inline Transform transform_scale(float3 s)
- {
- return make_transform(s.x, 0, 0, 0, 0, s.y, 0, 0, 0, 0, s.z, 0);
- }
- ccl_device_inline Transform transform_scale(float x, float y, float z)
- {
- return transform_scale(make_float3(x, y, z));
- }
- ccl_device_inline Transform transform_rotate(float angle, float3 axis)
- {
- float s = sinf(angle);
- float c = cosf(angle);
- float t = 1.0f - c;
- axis = normalize(axis);
- return make_transform(axis.x * axis.x * t + c,
- axis.x * axis.y * t - s * axis.z,
- axis.x * axis.z * t + s * axis.y,
- 0.0f,
- axis.y * axis.x * t + s * axis.z,
- axis.y * axis.y * t + c,
- axis.y * axis.z * t - s * axis.x,
- 0.0f,
- axis.z * axis.x * t - s * axis.y,
- axis.z * axis.y * t + s * axis.x,
- axis.z * axis.z * t + c,
- 0.0f);
- }
- /* Euler is assumed to be in XYZ order. */
- ccl_device_inline Transform transform_euler(float3 euler)
- {
- return transform_rotate(euler.z, make_float3(0.0f, 0.0f, 1.0f)) *
- transform_rotate(euler.y, make_float3(0.0f, 1.0f, 0.0f)) *
- transform_rotate(euler.x, make_float3(1.0f, 0.0f, 0.0f));
- }
- ccl_device_inline Transform transform_identity()
- {
- return transform_scale(1.0f, 1.0f, 1.0f);
- }
- ccl_device_inline bool operator==(const Transform &A, const Transform &B)
- {
- return memcmp(&A, &B, sizeof(Transform)) == 0;
- }
- ccl_device_inline bool operator!=(const Transform &A, const Transform &B)
- {
- return !(A == B);
- }
- ccl_device_inline float3 transform_get_column(const Transform *t, int column)
- {
- return make_float3(t->x[column], t->y[column], t->z[column]);
- }
- ccl_device_inline void transform_set_column(Transform *t, int column, float3 value)
- {
- t->x[column] = value.x;
- t->y[column] = value.y;
- t->z[column] = value.z;
- }
- Transform transform_inverse(const Transform &a);
- Transform transform_transposed_inverse(const Transform &a);
- ccl_device_inline bool transform_uniform_scale(const Transform &tfm, float &scale)
- {
- /* the epsilon here is quite arbitrary, but this function is only used for
- * surface area and bump, where we expect it to not be so sensitive */
- float eps = 1e-6f;
- float sx = len_squared(float4_to_float3(tfm.x));
- float sy = len_squared(float4_to_float3(tfm.y));
- float sz = len_squared(float4_to_float3(tfm.z));
- float stx = len_squared(transform_get_column(&tfm, 0));
- float sty = len_squared(transform_get_column(&tfm, 1));
- float stz = len_squared(transform_get_column(&tfm, 2));
- if (fabsf(sx - sy) < eps && fabsf(sx - sz) < eps && fabsf(sx - stx) < eps &&
- fabsf(sx - sty) < eps && fabsf(sx - stz) < eps) {
- scale = sx;
- return true;
- }
- return false;
- }
- ccl_device_inline bool transform_negative_scale(const Transform &tfm)
- {
- float3 c0 = transform_get_column(&tfm, 0);
- float3 c1 = transform_get_column(&tfm, 1);
- float3 c2 = transform_get_column(&tfm, 2);
- return (dot(cross(c0, c1), c2) < 0.0f);
- }
- ccl_device_inline Transform transform_clear_scale(const Transform &tfm)
- {
- Transform ntfm = tfm;
- transform_set_column(&ntfm, 0, normalize(transform_get_column(&ntfm, 0)));
- transform_set_column(&ntfm, 1, normalize(transform_get_column(&ntfm, 1)));
- transform_set_column(&ntfm, 2, normalize(transform_get_column(&ntfm, 2)));
- return ntfm;
- }
- ccl_device_inline Transform transform_empty()
- {
- return make_transform(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
- }
- #endif
- /* Motion Transform */
- ccl_device_inline float4 quat_interpolate(float4 q1, float4 q2, float t)
- {
- /* use simpe nlerp instead of slerp. it's faster and almost the same */
- return normalize((1.0f - t) * q1 + t * q2);
- #if 0
- /* note: this does not ensure rotation around shortest angle, q1 and q2
- * are assumed to be matched already in transform_motion_decompose */
- float costheta = dot(q1, q2);
- /* possible optimization: it might be possible to precompute theta/qperp */
- if (costheta > 0.9995f) {
- /* linear interpolation in degenerate case */
- return normalize((1.0f - t) * q1 + t * q2);
- }
- else {
- /* slerp */
- float theta = acosf(clamp(costheta, -1.0f, 1.0f));
- float4 qperp = normalize(q2 - q1 * costheta);
- float thetap = theta * t;
- return q1 * cosf(thetap) + qperp * sinf(thetap);
- }
- #endif
- }
- ccl_device_inline Transform transform_quick_inverse(Transform M)
- {
- /* possible optimization: can we avoid doing this altogether and construct
- * the inverse matrix directly from negated translation, transposed rotation,
- * scale can be inverted but what about shearing? */
- Transform R;
- float det = M.x.x * (M.z.z * M.y.y - M.z.y * M.y.z) - M.y.x * (M.z.z * M.x.y - M.z.y * M.x.z) +
- M.z.x * (M.y.z * M.x.y - M.y.y * M.x.z);
- if (det == 0.0f) {
- M.x.x += 1e-8f;
- M.y.y += 1e-8f;
- M.z.z += 1e-8f;
- det = M.x.x * (M.z.z * M.y.y - M.z.y * M.y.z) - M.y.x * (M.z.z * M.x.y - M.z.y * M.x.z) +
- M.z.x * (M.y.z * M.x.y - M.y.y * M.x.z);
- }
- det = (det != 0.0f) ? 1.0f / det : 0.0f;
- float3 Rx = det * make_float3(M.z.z * M.y.y - M.z.y * M.y.z,
- M.z.y * M.x.z - M.z.z * M.x.y,
- M.y.z * M.x.y - M.y.y * M.x.z);
- float3 Ry = det * make_float3(M.z.x * M.y.z - M.z.z * M.y.x,
- M.z.z * M.x.x - M.z.x * M.x.z,
- M.y.x * M.x.z - M.y.z * M.x.x);
- float3 Rz = det * make_float3(M.z.y * M.y.x - M.z.x * M.y.y,
- M.z.x * M.x.y - M.z.y * M.x.x,
- M.y.y * M.x.x - M.y.x * M.x.y);
- float3 T = -make_float3(M.x.w, M.y.w, M.z.w);
- R.x = make_float4(Rx.x, Rx.y, Rx.z, dot(Rx, T));
- R.y = make_float4(Ry.x, Ry.y, Ry.z, dot(Ry, T));
- R.z = make_float4(Rz.x, Rz.y, Rz.z, dot(Rz, T));
- return R;
- }
- ccl_device_inline void transform_compose(Transform *tfm, const DecomposedTransform *decomp)
- {
- /* rotation */
- float q0, q1, q2, q3, qda, qdb, qdc, qaa, qab, qac, qbb, qbc, qcc;
- q0 = M_SQRT2_F * decomp->x.w;
- q1 = M_SQRT2_F * decomp->x.x;
- q2 = M_SQRT2_F * decomp->x.y;
- q3 = M_SQRT2_F * decomp->x.z;
- qda = q0 * q1;
- qdb = q0 * q2;
- qdc = q0 * q3;
- qaa = q1 * q1;
- qab = q1 * q2;
- qac = q1 * q3;
- qbb = q2 * q2;
- qbc = q2 * q3;
- qcc = q3 * q3;
- float3 rotation_x = make_float3(1.0f - qbb - qcc, -qdc + qab, qdb + qac);
- float3 rotation_y = make_float3(qdc + qab, 1.0f - qaa - qcc, -qda + qbc);
- float3 rotation_z = make_float3(-qdb + qac, qda + qbc, 1.0f - qaa - qbb);
- /* scale */
- float3 scale_x = make_float3(decomp->y.w, decomp->z.z, decomp->w.y);
- float3 scale_y = make_float3(decomp->z.x, decomp->z.w, decomp->w.z);
- float3 scale_z = make_float3(decomp->z.y, decomp->w.x, decomp->w.w);
- /* compose with translation */
- tfm->x = make_float4(
- dot(rotation_x, scale_x), dot(rotation_x, scale_y), dot(rotation_x, scale_z), decomp->y.x);
- tfm->y = make_float4(
- dot(rotation_y, scale_x), dot(rotation_y, scale_y), dot(rotation_y, scale_z), decomp->y.y);
- tfm->z = make_float4(
- dot(rotation_z, scale_x), dot(rotation_z, scale_y), dot(rotation_z, scale_z), decomp->y.z);
- }
- /* Interpolate from array of decomposed transforms. */
- ccl_device void transform_motion_array_interpolate(Transform *tfm,
- const ccl_global DecomposedTransform *motion,
- uint numsteps,
- float time)
- {
- /* Figure out which steps we need to interpolate. */
- int maxstep = numsteps - 1;
- int step = min((int)(time * maxstep), maxstep - 1);
- float t = time * maxstep - step;
- const ccl_global DecomposedTransform *a = motion + step;
- const ccl_global DecomposedTransform *b = motion + step + 1;
- /* Interpolate rotation, translation and scale. */
- DecomposedTransform decomp;
- decomp.x = quat_interpolate(a->x, b->x, t);
- decomp.y = (1.0f - t) * a->y + t * b->y;
- decomp.z = (1.0f - t) * a->z + t * b->z;
- decomp.w = (1.0f - t) * a->w + t * b->w;
- /* Compose rotation, translation, scale into matrix. */
- transform_compose(tfm, &decomp);
- }
- #ifndef __KERNEL_GPU__
- # ifdef WITH_EMBREE
- ccl_device void transform_motion_array_interpolate_straight(
- Transform *tfm, const ccl_global DecomposedTransform *motion, uint numsteps, float time)
- {
- /* Figure out which steps we need to interpolate. */
- int maxstep = numsteps - 1;
- int step = min((int)(time * maxstep), maxstep - 1);
- float t = time * maxstep - step;
- const ccl_global DecomposedTransform *a = motion + step;
- const ccl_global DecomposedTransform *b = motion + step + 1;
- Transform step1, step2;
- transform_compose(&step1, a);
- transform_compose(&step2, b);
- /* matrix lerp */
- tfm->x = (1.0f - t) * step1.x + t * step2.x;
- tfm->y = (1.0f - t) * step1.y + t * step2.y;
- tfm->z = (1.0f - t) * step1.z + t * step2.z;
- }
- # endif
- class BoundBox2D;
- ccl_device_inline bool operator==(const DecomposedTransform &A, const DecomposedTransform &B)
- {
- return memcmp(&A, &B, sizeof(DecomposedTransform)) == 0;
- }
- float4 transform_to_quat(const Transform &tfm);
- void transform_motion_decompose(DecomposedTransform *decomp, const Transform *motion, size_t size);
- Transform transform_from_viewplane(BoundBox2D &viewplane);
- #endif
- /* TODO(sergey): This is only for until we've got OpenCL 2.0
- * on all devices we consider supported. It'll be replaced with
- * generic address space.
- */
- #ifdef __KERNEL_OPENCL__
- # define OPENCL_TRANSFORM_ADDRSPACE_GLUE(a, b) a##b
- # define OPENCL_TRANSFORM_ADDRSPACE_DECLARE(function) \
- ccl_device_inline float3 OPENCL_TRANSFORM_ADDRSPACE_GLUE(function, _addrspace)( \
- ccl_addr_space const Transform *t, const float3 a) \
- { \
- Transform private_tfm = *t; \
- return function(&private_tfm, a); \
- }
- OPENCL_TRANSFORM_ADDRSPACE_DECLARE(transform_point)
- OPENCL_TRANSFORM_ADDRSPACE_DECLARE(transform_direction)
- OPENCL_TRANSFORM_ADDRSPACE_DECLARE(transform_direction_transposed)
- # undef OPENCL_TRANSFORM_ADDRSPACE_DECLARE
- # undef OPENCL_TRANSFORM_ADDRSPACE_GLUE
- # define transform_point_auto transform_point_addrspace
- # define transform_direction_auto transform_direction_addrspace
- # define transform_direction_transposed_auto transform_direction_transposed_addrspace
- #else
- # define transform_point_auto transform_point
- # define transform_direction_auto transform_direction
- # define transform_direction_transposed_auto transform_direction_transposed
- #endif
- CCL_NAMESPACE_END
- #endif /* __UTIL_TRANSFORM_H__ */
|