spv.float16.frag 8.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307
  1. #version 450 core
  2. #extension GL_AMD_gpu_shader_half_float: enable
  3. #extension GL_ARB_gpu_shader_int64: enable
  4. void main()
  5. {
  6. }
  7. // Half float literals
  8. void literal()
  9. {
  10. const float16_t f16c = 0.000001hf;
  11. const f16vec2 f16cv = f16vec2(-0.25HF, 0.03HF);
  12. f16vec2 f16v;
  13. f16v.x = f16c;
  14. f16v += f16cv;
  15. }
  16. // Block memory layout
  17. struct S
  18. {
  19. float16_t x; // rule 1: align = 2, takes offsets 0-1
  20. f16vec2 y; // rule 2: align = 4, takes offsets 4-7
  21. f16vec3 z; // rule 3: align = 8, takes offsets 8-13
  22. };
  23. layout(column_major, std140) uniform B1
  24. {
  25. float16_t a; // rule 1: align = 2, takes offsets 0-1
  26. f16vec2 b; // rule 2: align = 4, takes offsets 4-7
  27. f16vec3 c; // rule 3: align = 8, takes offsets 8-15
  28. float16_t d[2]; // rule 4: align = 16, array stride = 16,
  29. // takes offsets 16-47
  30. f16mat2x3 e; // rule 5: align = 16, matrix stride = 16,
  31. // takes offsets 48-79
  32. f16mat2x3 f[2]; // rule 6: align = 16, matrix stride = 16,
  33. // array stride = 32, f[0] takes
  34. // offsets 80-111, f[1] takes offsets
  35. // 112-143
  36. S g; // rule 9: align = 16, g.x takes offsets
  37. // 144-145, g.y takes offsets 148-151,
  38. // g.z takes offsets 152-159
  39. S h[2]; // rule 10: align = 16, array stride = 16, h[0]
  40. // takes offsets 160-175, h[1] takes
  41. // offsets 176-191
  42. };
  43. layout(row_major, std430) buffer B2
  44. {
  45. float16_t o; // rule 1: align = 2, takes offsets 0-1
  46. f16vec2 p; // rule 2: align = 4, takes offsets 4-7
  47. f16vec3 q; // rule 3: align = 8, takes offsets 8-13
  48. float16_t r[2]; // rule 4: align = 2, array stride = 2, takes
  49. // offsets 14-17
  50. f16mat2x3 s; // rule 7: align = 4, matrix stride = 4, takes
  51. // offsets 20-31
  52. f16mat2x3 t[2]; // rule 8: align = 4, matrix stride = 4, array
  53. // stride = 12, t[0] takes offsets
  54. // 32-43, t[1] takes offsets 44-55
  55. S u; // rule 9: align = 8, u.x takes offsets
  56. // 56-57, u.y takes offsets 60-63, u.z
  57. // takes offsets 64-69
  58. S v[2]; // rule 10: align = 8, array stride = 16, v[0]
  59. // takes offsets 72-87, v[1] takes
  60. // offsets 88-103
  61. };
  62. // Specialization constant
  63. layout(constant_id = 100) const float16_t sf16 = 0.125hf;
  64. layout(constant_id = 101) const float sf = 0.25;
  65. layout(constant_id = 102) const double sd = 0.5lf;
  66. const float f16_to_f = float(sf16);
  67. const double f16_to_d = float(sf16);
  68. const float16_t f_to_f16 = float16_t(sf);
  69. const float16_t d_to_f16 = float16_t(sd);
  70. void operators()
  71. {
  72. float16_t f16;
  73. f16vec2 f16v;
  74. f16mat2x2 f16m;
  75. bool b;
  76. // Arithmetic
  77. f16v += f16v;
  78. f16v -= f16v;
  79. f16v *= f16v;
  80. f16v /= f16v;
  81. f16v++;
  82. f16v--;
  83. ++f16m;
  84. --f16m;
  85. f16v = -f16v;
  86. f16m = -f16m;
  87. f16 = f16v.x + f16v.y;
  88. f16 = f16v.x - f16v.y;
  89. f16 = f16v.x * f16v.y;
  90. f16 = f16v.x / f16v.y;
  91. // Relational
  92. b = (f16v.x != f16);
  93. b = (f16v.y == f16);
  94. b = (f16v.x > f16);
  95. b = (f16v.y < f16);
  96. b = (f16v.x >= f16);
  97. b = (f16v.y <= f16);
  98. // Vector/matrix operations
  99. f16v = f16v * f16;
  100. f16m = f16m * f16;
  101. f16v = f16m * f16v;
  102. f16v = f16v * f16m;
  103. f16m = f16m * f16m;
  104. }
  105. void typeCast()
  106. {
  107. bvec3 bv;
  108. vec3 fv;
  109. dvec3 dv;
  110. ivec3 iv;
  111. uvec3 uv;
  112. i64vec3 i64v;
  113. u64vec3 u64v;
  114. f16vec3 f16v;
  115. f16v = f16vec3(bv); // bool -> float16
  116. bv = bvec3(f16v); // float16 -> bool
  117. f16v = f16vec3(fv); // float -> float16
  118. fv = vec3(f16v); // float16 -> float
  119. f16v = f16vec3(dv); // double -> float16
  120. dv = dvec3(dv); // float16 -> double
  121. f16v = f16vec3(iv); // int -> float16
  122. iv = ivec3(f16v); // float16 -> int
  123. f16v = f16vec3(uv); // uint -> float16
  124. uv = uvec3(f16v); // float16 -> uint
  125. f16v = f16vec3(i64v); // int64 -> float16
  126. i64v = i64vec3(f16v); // float16 -> int64
  127. f16v = f16vec3(u64v); // uint64 -> float16
  128. u64v = u64vec3(f16v); // float16 -> uint64
  129. }
  130. void builtinAngleTrigFuncs()
  131. {
  132. f16vec4 f16v1, f16v2;
  133. f16v2 = radians(f16v1);
  134. f16v2 = degrees(f16v1);
  135. f16v2 = sin(f16v1);
  136. f16v2 = cos(f16v1);
  137. f16v2 = tan(f16v1);
  138. f16v2 = asin(f16v1);
  139. f16v2 = acos(f16v1);
  140. f16v2 = atan(f16v1, f16v2);
  141. f16v2 = atan(f16v1);
  142. f16v2 = sinh(f16v1);
  143. f16v2 = cosh(f16v1);
  144. f16v2 = tanh(f16v1);
  145. f16v2 = asinh(f16v1);
  146. f16v2 = acosh(f16v1);
  147. f16v2 = atanh(f16v1);
  148. }
  149. void builtinExpFuncs()
  150. {
  151. f16vec2 f16v1, f16v2;
  152. f16v2 = pow(f16v1, f16v2);
  153. f16v2 = exp(f16v1);
  154. f16v2 = log(f16v1);
  155. f16v2 = exp2(f16v1);
  156. f16v2 = log2(f16v1);
  157. f16v2 = sqrt(f16v1);
  158. f16v2 = inversesqrt(f16v1);
  159. }
  160. void builtinCommonFuncs()
  161. {
  162. f16vec3 f16v1, f16v2, f16v3;
  163. float16_t f16;
  164. bool b;
  165. bvec3 bv;
  166. ivec3 iv;
  167. f16v2 = abs(f16v1);
  168. f16v2 = sign(f16v1);
  169. f16v2 = floor(f16v1);
  170. f16v2 = trunc(f16v1);
  171. f16v2 = round(f16v1);
  172. f16v2 = roundEven(f16v1);
  173. f16v2 = ceil(f16v1);
  174. f16v2 = fract(f16v1);
  175. f16v2 = mod(f16v1, f16v2);
  176. f16v2 = mod(f16v1, f16);
  177. f16v3 = modf(f16v1, f16v2);
  178. f16v3 = min(f16v1, f16v2);
  179. f16v3 = min(f16v1, f16);
  180. f16v3 = max(f16v1, f16v2);
  181. f16v3 = max(f16v1, f16);
  182. f16v3 = clamp(f16v1, f16, f16v2.x);
  183. f16v3 = clamp(f16v1, f16v2, f16vec3(f16));
  184. f16v3 = mix(f16v1, f16v2, f16);
  185. f16v3 = mix(f16v1, f16v2, f16v3);
  186. f16v3 = mix(f16v1, f16v2, bv);
  187. f16v3 = step(f16v1, f16v2);
  188. f16v3 = step(f16, f16v3);
  189. f16v3 = smoothstep(f16v1, f16v2, f16v3);
  190. f16v3 = smoothstep(f16, f16v1.x, f16v2);
  191. b = isnan(f16);
  192. bv = isinf(f16v1);
  193. f16v3 = fma(f16v1, f16v2, f16v3);
  194. f16v2 = frexp(f16v1, iv);
  195. f16v2 = ldexp(f16v1, iv);
  196. }
  197. void builtinPackUnpackFuncs()
  198. {
  199. uint u;
  200. f16vec2 f16v;
  201. u = packFloat2x16(f16v);
  202. f16v = unpackFloat2x16(u);
  203. }
  204. void builtinGeometryFuncs()
  205. {
  206. float16_t f16;
  207. f16vec3 f16v1, f16v2, f16v3;
  208. f16 = length(f16v1);
  209. f16 = distance(f16v1, f16v2);
  210. f16 = dot(f16v1, f16v2);
  211. f16v3 = cross(f16v1, f16v2);
  212. f16v2 = normalize(f16v1);
  213. f16v3 = faceforward(f16v1, f16v2, f16v3);
  214. f16v3 = reflect(f16v1, f16v2);
  215. f16v3 = refract(f16v1, f16v2, f16);
  216. }
  217. void builtinMatrixFuncs()
  218. {
  219. f16mat2x3 f16m1, f16m2, f16m3;
  220. f16mat3x2 f16m4;
  221. f16mat3 f16m5;
  222. f16mat4 f16m6, f16m7;
  223. f16vec3 f16v1;
  224. f16vec2 f16v2;
  225. float16_t f16;
  226. f16m3 = matrixCompMult(f16m1, f16m2);
  227. f16m1 = outerProduct(f16v1, f16v2);
  228. f16m4 = transpose(f16m1);
  229. f16 = determinant(f16m5);
  230. f16m6 = inverse(f16m7);
  231. }
  232. void builtinVecRelFuncs()
  233. {
  234. f16vec3 f16v1, f16v2;
  235. bvec3 bv;
  236. bv = lessThan(f16v1, f16v2);
  237. bv = lessThanEqual(f16v1, f16v2);
  238. bv = greaterThan(f16v1, f16v2);
  239. bv = greaterThanEqual(f16v1, f16v2);
  240. bv = equal(f16v1, f16v2);
  241. bv = notEqual(f16v1, f16v2);
  242. }
  243. in f16vec3 if16v;
  244. void builtinFragProcFuncs()
  245. {
  246. f16vec3 f16v;
  247. // Derivative
  248. f16v.x = dFdx(if16v.x);
  249. f16v.y = dFdy(if16v.y);
  250. f16v.xy = dFdxFine(if16v.xy);
  251. f16v.xy = dFdyFine(if16v.xy);
  252. f16v = dFdxCoarse(if16v);
  253. f16v = dFdxCoarse(if16v);
  254. f16v.x = fwidth(if16v.x);
  255. f16v.xy = fwidthFine(if16v.xy);
  256. f16v = fwidthCoarse(if16v);
  257. // Interpolation
  258. f16v.x = interpolateAtCentroid(if16v.x);
  259. f16v.xy = interpolateAtSample(if16v.xy, 1);
  260. f16v = interpolateAtOffset(if16v, f16vec2(0.5hf));
  261. }