util_color.h 6.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277
  1. /*
  2. * Copyright 2011-2013 Blender Foundation
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #ifndef __UTIL_COLOR_H__
  17. #define __UTIL_COLOR_H__
  18. #include "util/util_math.h"
  19. #include "util/util_types.h"
  20. #ifdef __KERNEL_SSE2__
  21. # include "util/util_simd.h"
  22. #endif
  23. CCL_NAMESPACE_BEGIN
  24. ccl_device uchar float_to_byte(float val)
  25. {
  26. return ((val <= 0.0f) ? 0 :
  27. ((val > (1.0f - 0.5f / 255.0f)) ? 255 : (uchar)((255.0f * val) + 0.5f)));
  28. }
  29. ccl_device uchar4 color_float_to_byte(float3 c)
  30. {
  31. uchar r, g, b;
  32. r = float_to_byte(c.x);
  33. g = float_to_byte(c.y);
  34. b = float_to_byte(c.z);
  35. return make_uchar4(r, g, b, 0);
  36. }
  37. ccl_device_inline float3 color_byte_to_float(uchar4 c)
  38. {
  39. return make_float3(c.x * (1.0f / 255.0f), c.y * (1.0f / 255.0f), c.z * (1.0f / 255.0f));
  40. }
  41. ccl_device float color_srgb_to_linear(float c)
  42. {
  43. if (c < 0.04045f)
  44. return (c < 0.0f) ? 0.0f : c * (1.0f / 12.92f);
  45. else
  46. return powf((c + 0.055f) * (1.0f / 1.055f), 2.4f);
  47. }
  48. ccl_device float color_linear_to_srgb(float c)
  49. {
  50. if (c < 0.0031308f)
  51. return (c < 0.0f) ? 0.0f : c * 12.92f;
  52. else
  53. return 1.055f * powf(c, 1.0f / 2.4f) - 0.055f;
  54. }
  55. ccl_device float3 rgb_to_hsv(float3 rgb)
  56. {
  57. float cmax, cmin, h, s, v, cdelta;
  58. float3 c;
  59. cmax = fmaxf(rgb.x, fmaxf(rgb.y, rgb.z));
  60. cmin = min(rgb.x, min(rgb.y, rgb.z));
  61. cdelta = cmax - cmin;
  62. v = cmax;
  63. if (cmax != 0.0f) {
  64. s = cdelta / cmax;
  65. }
  66. else {
  67. s = 0.0f;
  68. h = 0.0f;
  69. }
  70. if (s != 0.0f) {
  71. float3 cmax3 = make_float3(cmax, cmax, cmax);
  72. c = (cmax3 - rgb) / cdelta;
  73. if (rgb.x == cmax)
  74. h = c.z - c.y;
  75. else if (rgb.y == cmax)
  76. h = 2.0f + c.x - c.z;
  77. else
  78. h = 4.0f + c.y - c.x;
  79. h /= 6.0f;
  80. if (h < 0.0f)
  81. h += 1.0f;
  82. }
  83. else {
  84. h = 0.0f;
  85. }
  86. return make_float3(h, s, v);
  87. }
  88. ccl_device float3 hsv_to_rgb(float3 hsv)
  89. {
  90. float i, f, p, q, t, h, s, v;
  91. float3 rgb;
  92. h = hsv.x;
  93. s = hsv.y;
  94. v = hsv.z;
  95. if (s != 0.0f) {
  96. if (h == 1.0f)
  97. h = 0.0f;
  98. h *= 6.0f;
  99. i = floorf(h);
  100. f = h - i;
  101. rgb = make_float3(f, f, f);
  102. p = v * (1.0f - s);
  103. q = v * (1.0f - (s * f));
  104. t = v * (1.0f - (s * (1.0f - f)));
  105. if (i == 0.0f)
  106. rgb = make_float3(v, t, p);
  107. else if (i == 1.0f)
  108. rgb = make_float3(q, v, p);
  109. else if (i == 2.0f)
  110. rgb = make_float3(p, v, t);
  111. else if (i == 3.0f)
  112. rgb = make_float3(p, q, v);
  113. else if (i == 4.0f)
  114. rgb = make_float3(t, p, v);
  115. else
  116. rgb = make_float3(v, p, q);
  117. }
  118. else {
  119. rgb = make_float3(v, v, v);
  120. }
  121. return rgb;
  122. }
  123. ccl_device float3 xyY_to_xyz(float x, float y, float Y)
  124. {
  125. float X, Z;
  126. if (y != 0.0f)
  127. X = (x / y) * Y;
  128. else
  129. X = 0.0f;
  130. if (y != 0.0f && Y != 0.0f)
  131. Z = (1.0f - x - y) / y * Y;
  132. else
  133. Z = 0.0f;
  134. return make_float3(X, Y, Z);
  135. }
  136. #ifdef __KERNEL_SSE2__
  137. /*
  138. * Calculate initial guess for arg^exp based on float representation
  139. * This method gives a constant bias,
  140. * which can be easily compensated by multiplication with bias_coeff.
  141. * Gives better results for exponents near 1 (e. g. 4/5).
  142. * exp = exponent, encoded as uint32_t
  143. * e2coeff = 2^(127/exponent - 127) * bias_coeff^(1/exponent), encoded as uint32_t
  144. */
  145. template<unsigned exp, unsigned e2coeff> ccl_device_inline ssef fastpow(const ssef &arg)
  146. {
  147. ssef ret;
  148. ret = arg * cast(ssei(e2coeff));
  149. ret = ssef(cast(ret));
  150. ret = ret * cast(ssei(exp));
  151. ret = cast(ssei(ret));
  152. return ret;
  153. }
  154. /* Improve x ^ 1.0f/5.0f solution with Newton-Raphson method */
  155. ccl_device_inline ssef improve_5throot_solution(const ssef &old_result, const ssef &x)
  156. {
  157. ssef approx2 = old_result * old_result;
  158. ssef approx4 = approx2 * approx2;
  159. ssef t = x / approx4;
  160. ssef summ = madd(ssef(4.0f), old_result, t);
  161. return summ * ssef(1.0f / 5.0f);
  162. }
  163. /* Calculate powf(x, 2.4). Working domain: 1e-10 < x < 1e+10 */
  164. ccl_device_inline ssef fastpow24(const ssef &arg)
  165. {
  166. /* max, avg and |avg| errors were calculated in gcc without FMA instructions
  167. * The final precision should be better than powf in glibc */
  168. /* Calculate x^4/5, coefficient 0.994 was constructed manually to minimize avg error */
  169. /* 0x3F4CCCCD = 4/5 */
  170. /* 0x4F55A7FB = 2^(127/(4/5) - 127) * 0.994^(1/(4/5)) */
  171. ssef x = fastpow<0x3F4CCCCD, 0x4F55A7FB>(arg); // error max = 0.17 avg = 0.0018 |avg| = 0.05
  172. ssef arg2 = arg * arg;
  173. ssef arg4 = arg2 * arg2;
  174. x = improve_5throot_solution(x,
  175. arg4); /* error max = 0.018 avg = 0.0031 |avg| = 0.0031 */
  176. x = improve_5throot_solution(x,
  177. arg4); /* error max = 0.00021 avg = 1.6e-05 |avg| = 1.6e-05 */
  178. x = improve_5throot_solution(x,
  179. arg4); /* error max = 6.1e-07 avg = 5.2e-08 |avg| = 1.1e-07 */
  180. return x * (x * x);
  181. }
  182. ccl_device ssef color_srgb_to_linear(const ssef &c)
  183. {
  184. sseb cmp = c < ssef(0.04045f);
  185. ssef lt = max(c * ssef(1.0f / 12.92f), ssef(0.0f));
  186. ssef gtebase = (c + ssef(0.055f)) * ssef(1.0f / 1.055f); /* fma */
  187. ssef gte = fastpow24(gtebase);
  188. return select(cmp, lt, gte);
  189. }
  190. #endif /* __KERNEL_SSE2__ */
  191. ccl_device float3 color_srgb_to_linear_v3(float3 c)
  192. {
  193. return make_float3(
  194. color_srgb_to_linear(c.x), color_srgb_to_linear(c.y), color_srgb_to_linear(c.z));
  195. }
  196. ccl_device float3 color_linear_to_srgb_v3(float3 c)
  197. {
  198. return make_float3(
  199. color_linear_to_srgb(c.x), color_linear_to_srgb(c.y), color_linear_to_srgb(c.z));
  200. }
  201. ccl_device float4 color_linear_to_srgb_v4(float4 c)
  202. {
  203. return make_float4(
  204. color_linear_to_srgb(c.x), color_linear_to_srgb(c.y), color_linear_to_srgb(c.z), c.w);
  205. }
  206. ccl_device float4 color_srgb_to_linear_v4(float4 c)
  207. {
  208. #ifdef __KERNEL_SSE2__
  209. ssef r_ssef;
  210. float4 &r = (float4 &)r_ssef;
  211. r = c;
  212. r_ssef = color_srgb_to_linear(r_ssef);
  213. r.w = c.w;
  214. return r;
  215. #else
  216. return make_float4(
  217. color_srgb_to_linear(c.x), color_srgb_to_linear(c.y), color_srgb_to_linear(c.z), c.w);
  218. #endif
  219. }
  220. ccl_device float3 color_highlight_compress(float3 color, float3 *variance)
  221. {
  222. color += make_float3(1.0f, 1.0f, 1.0f);
  223. if (variance) {
  224. *variance *= sqr3(make_float3(1.0f, 1.0f, 1.0f) / color);
  225. }
  226. return log3(color);
  227. }
  228. ccl_device float3 color_highlight_uncompress(float3 color)
  229. {
  230. return exp3(color) - make_float3(1.0f, 1.0f, 1.0f);
  231. }
  232. CCL_NAMESPACE_END
  233. #endif /* __UTIL_COLOR_H__ */