HexFloat.cpp 51 KB


  1. // Copyright (c) 2015-2016 The Khronos Group Inc.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. #include <cfloat>
  15. #include <cmath>
  16. #include <cstdio>
  17. #include <sstream>
  18. #include <string>
  19. #include <tuple>
  20. #include <gmock/gmock.h>
  21. #include "SPIRV/hex_float.h"
  22. namespace {
  23. using ::testing::Eq;
  24. using spvutils::BitwiseCast;
  25. using spvutils::Float16;
  26. using spvutils::FloatProxy;
  27. using spvutils::HexFloat;
  28. using spvutils::ParseNormalFloat;
  29. // In this file "encode" means converting a number into a string,
  30. // and "decode" means converting a string into a number.
  31. using HexFloatTest =
  32. ::testing::TestWithParam<std::pair<FloatProxy<float>, std::string>>;
  33. using DecodeHexFloatTest =
  34. ::testing::TestWithParam<std::pair<std::string, FloatProxy<float>>>;
  35. using HexDoubleTest =
  36. ::testing::TestWithParam<std::pair<FloatProxy<double>, std::string>>;
  37. using DecodeHexDoubleTest =
  38. ::testing::TestWithParam<std::pair<std::string, FloatProxy<double>>>;
  39. // Hex-encodes a float value.
  40. template <typename T>
  41. std::string EncodeViaHexFloat(const T& value) {
  42. std::stringstream ss;
  43. ss << spvutils::HexFloat<T>(value);
  44. return ss.str();
  45. }
  46. // The following two tests can't be DRY because they take different parameter
  47. // types.
  48. TEST_P(HexFloatTest, EncodeCorrectly) {
  49. EXPECT_THAT(EncodeViaHexFloat(GetParam().first), Eq(GetParam().second));
  50. }
  51. TEST_P(HexDoubleTest, EncodeCorrectly) {
  52. EXPECT_THAT(EncodeViaHexFloat(GetParam().first), Eq(GetParam().second));
  53. }
  54. // Decodes a hex-float string.
  55. template <typename T>
  56. FloatProxy<T> Decode(const std::string& str) {
  57. spvutils::HexFloat<FloatProxy<T>> decoded(0.f);
  58. EXPECT_TRUE((std::stringstream(str) >> decoded).eof());
  59. return decoded.value();
  60. }
  61. TEST_P(HexFloatTest, DecodeCorrectly) {
  62. EXPECT_THAT(Decode<float>(GetParam().second), Eq(GetParam().first));
  63. }
  64. TEST_P(HexDoubleTest, DecodeCorrectly) {
  65. EXPECT_THAT(Decode<double>(GetParam().second), Eq(GetParam().first));
  66. }
  67. INSTANTIATE_TEST_CASE_P(
  68. Float32Tests, HexFloatTest,
  69. ::testing::ValuesIn(std::vector<std::pair<FloatProxy<float>, std::string>>({
  70. {0.f, "0x0p+0"},
  71. {1.f, "0x1p+0"},
  72. {2.f, "0x1p+1"},
  73. {3.f, "0x1.8p+1"},
  74. {0.5f, "0x1p-1"},
  75. {0.25f, "0x1p-2"},
  76. {0.75f, "0x1.8p-1"},
  77. {-0.f, "-0x0p+0"},
  78. {-1.f, "-0x1p+0"},
  79. {-0.5f, "-0x1p-1"},
  80. {-0.25f, "-0x1p-2"},
  81. {-0.75f, "-0x1.8p-1"},
  82. // Larger numbers
  83. {512.f, "0x1p+9"},
  84. {-512.f, "-0x1p+9"},
  85. {1024.f, "0x1p+10"},
  86. {-1024.f, "-0x1p+10"},
  87. {1024.f + 8.f, "0x1.02p+10"},
  88. {-1024.f - 8.f, "-0x1.02p+10"},
  89. // Small numbers
  90. {1.0f / 512.f, "0x1p-9"},
  91. {1.0f / -512.f, "-0x1p-9"},
  92. {1.0f / 1024.f, "0x1p-10"},
  93. {1.0f / -1024.f, "-0x1p-10"},
  94. {1.0f / 1024.f + 1.0f / 8.f, "0x1.02p-3"},
  95. {1.0f / -1024.f - 1.0f / 8.f, "-0x1.02p-3"},
  96. // lowest non-denorm
  97. {float(ldexp(1.0f, -126)), "0x1p-126"},
  98. {float(ldexp(-1.0f, -126)), "-0x1p-126"},
  99. // Denormalized values
  100. {float(ldexp(1.0f, -127)), "0x1p-127"},
  101. {float(ldexp(1.0f, -127) / 2.0f), "0x1p-128"},
  102. {float(ldexp(1.0f, -127) / 4.0f), "0x1p-129"},
  103. {float(ldexp(1.0f, -127) / 8.0f), "0x1p-130"},
  104. {float(ldexp(-1.0f, -127)), "-0x1p-127"},
  105. {float(ldexp(-1.0f, -127) / 2.0f), "-0x1p-128"},
  106. {float(ldexp(-1.0f, -127) / 4.0f), "-0x1p-129"},
  107. {float(ldexp(-1.0f, -127) / 8.0f), "-0x1p-130"},
  108. {float(ldexp(1.0, -127) + (ldexp(1.0, -127) / 2.0f)), "0x1.8p-127"},
  109. {float(ldexp(1.0, -127) / 2.0 + (ldexp(1.0, -127) / 4.0f)),
  110. "0x1.8p-128"},
  111. })),);
  112. INSTANTIATE_TEST_CASE_P(
  113. Float32NanTests, HexFloatTest,
  114. ::testing::ValuesIn(std::vector<std::pair<FloatProxy<float>, std::string>>({
  115. // Various NAN and INF cases
  116. {uint32_t(0xFF800000), "-0x1p+128"}, // -inf
  117. {uint32_t(0x7F800000), "0x1p+128"}, // inf
  118. {uint32_t(0xFFC00000), "-0x1.8p+128"}, // -nan
  119. {uint32_t(0xFF800100), "-0x1.0002p+128"}, // -nan
  120. {uint32_t(0xFF800c00), "-0x1.0018p+128"}, // -nan
  121. {uint32_t(0xFF80F000), "-0x1.01ep+128"}, // -nan
  122. {uint32_t(0xFFFFFFFF), "-0x1.fffffep+128"}, // -nan
  123. {uint32_t(0x7FC00000), "0x1.8p+128"}, // +nan
  124. {uint32_t(0x7F800100), "0x1.0002p+128"}, // +nan
  125. {uint32_t(0x7f800c00), "0x1.0018p+128"}, // +nan
  126. {uint32_t(0x7F80F000), "0x1.01ep+128"}, // +nan
  127. {uint32_t(0x7FFFFFFF), "0x1.fffffep+128"}, // +nan
  128. })),);
  129. INSTANTIATE_TEST_CASE_P(
  130. Float64Tests, HexDoubleTest,
  131. ::testing::ValuesIn(
  132. std::vector<std::pair<FloatProxy<double>, std::string>>({
  133. {0., "0x0p+0"},
  134. {1., "0x1p+0"},
  135. {2., "0x1p+1"},
  136. {3., "0x1.8p+1"},
  137. {0.5, "0x1p-1"},
  138. {0.25, "0x1p-2"},
  139. {0.75, "0x1.8p-1"},
  140. {-0., "-0x0p+0"},
  141. {-1., "-0x1p+0"},
  142. {-0.5, "-0x1p-1"},
  143. {-0.25, "-0x1p-2"},
  144. {-0.75, "-0x1.8p-1"},
  145. // Larger numbers
  146. {512., "0x1p+9"},
  147. {-512., "-0x1p+9"},
  148. {1024., "0x1p+10"},
  149. {-1024., "-0x1p+10"},
  150. {1024. + 8., "0x1.02p+10"},
  151. {-1024. - 8., "-0x1.02p+10"},
  152. // Large outside the range of normal floats
  153. {ldexp(1.0, 128), "0x1p+128"},
  154. {ldexp(1.0, 129), "0x1p+129"},
  155. {ldexp(-1.0, 128), "-0x1p+128"},
  156. {ldexp(-1.0, 129), "-0x1p+129"},
  157. {ldexp(1.0, 128) + ldexp(1.0, 90), "0x1.0000000004p+128"},
  158. {ldexp(1.0, 129) + ldexp(1.0, 120), "0x1.008p+129"},
  159. {ldexp(-1.0, 128) + ldexp(1.0, 90), "-0x1.fffffffff8p+127"},
  160. {ldexp(-1.0, 129) + ldexp(1.0, 120), "-0x1.ffp+128"},
  161. // Small numbers
  162. {1.0 / 512., "0x1p-9"},
  163. {1.0 / -512., "-0x1p-9"},
  164. {1.0 / 1024., "0x1p-10"},
  165. {1.0 / -1024., "-0x1p-10"},
  166. {1.0 / 1024. + 1.0 / 8., "0x1.02p-3"},
  167. {1.0 / -1024. - 1.0 / 8., "-0x1.02p-3"},
  168. // Small outside the range of normal floats
  169. {ldexp(1.0, -128), "0x1p-128"},
  170. {ldexp(1.0, -129), "0x1p-129"},
  171. {ldexp(-1.0, -128), "-0x1p-128"},
  172. {ldexp(-1.0, -129), "-0x1p-129"},
  173. {ldexp(1.0, -128) + ldexp(1.0, -90), "0x1.0000000004p-90"},
  174. {ldexp(1.0, -129) + ldexp(1.0, -120), "0x1.008p-120"},
  175. {ldexp(-1.0, -128) + ldexp(1.0, -90), "0x1.fffffffff8p-91"},
  176. {ldexp(-1.0, -129) + ldexp(1.0, -120), "0x1.ffp-121"},
  177. // lowest non-denorm
  178. {ldexp(1.0, -1022), "0x1p-1022"},
  179. {ldexp(-1.0, -1022), "-0x1p-1022"},
  180. // Denormalized values
  181. {ldexp(1.0, -1023), "0x1p-1023"},
  182. {ldexp(1.0, -1023) / 2.0, "0x1p-1024"},
  183. {ldexp(1.0, -1023) / 4.0, "0x1p-1025"},
  184. {ldexp(1.0, -1023) / 8.0, "0x1p-1026"},
  185. {ldexp(-1.0, -1024), "-0x1p-1024"},
  186. {ldexp(-1.0, -1024) / 2.0, "-0x1p-1025"},
  187. {ldexp(-1.0, -1024) / 4.0, "-0x1p-1026"},
  188. {ldexp(-1.0, -1024) / 8.0, "-0x1p-1027"},
  189. {ldexp(1.0, -1023) + (ldexp(1.0, -1023) / 2.0), "0x1.8p-1023"},
  190. {ldexp(1.0, -1023) / 2.0 + (ldexp(1.0, -1023) / 4.0),
  191. "0x1.8p-1024"},
  192. })),);
  193. INSTANTIATE_TEST_CASE_P(
  194. Float64NanTests, HexDoubleTest,
  195. ::testing::ValuesIn(std::vector<
  196. std::pair<FloatProxy<double>, std::string>>({
  197. // Various NAN and INF cases
  198. {uint64_t(0xFFF0000000000000LL), "-0x1p+1024"}, //-inf
  199. {uint64_t(0x7FF0000000000000LL), "0x1p+1024"}, //+inf
  200. {uint64_t(0xFFF8000000000000LL), "-0x1.8p+1024"}, // -nan
  201. {uint64_t(0xFFF0F00000000000LL), "-0x1.0fp+1024"}, // -nan
  202. {uint64_t(0xFFF0000000000001LL), "-0x1.0000000000001p+1024"}, // -nan
  203. {uint64_t(0xFFF0000300000000LL), "-0x1.00003p+1024"}, // -nan
  204. {uint64_t(0xFFFFFFFFFFFFFFFFLL), "-0x1.fffffffffffffp+1024"}, // -nan
  205. {uint64_t(0x7FF8000000000000LL), "0x1.8p+1024"}, // +nan
  206. {uint64_t(0x7FF0F00000000000LL), "0x1.0fp+1024"}, // +nan
  207. {uint64_t(0x7FF0000000000001LL), "0x1.0000000000001p+1024"}, // -nan
  208. {uint64_t(0x7FF0000300000000LL), "0x1.00003p+1024"}, // -nan
  209. {uint64_t(0x7FFFFFFFFFFFFFFFLL), "0x1.fffffffffffffp+1024"}, // -nan
  210. })),);
  211. TEST(HexFloatStreamTest, OperatorLeftShiftPreservesFloatAndFill) {
  212. std::stringstream s;
  213. s << std::setw(4) << std::oct << std::setfill('x') << 8 << " "
  214. << FloatProxy<float>(uint32_t(0xFF800100)) << " " << std::setw(4) << 9;
  215. EXPECT_THAT(s.str(), Eq(std::string("xx10 -0x1.0002p+128 xx11")));
  216. }
  217. TEST(HexDoubleStreamTest, OperatorLeftShiftPreservesFloatAndFill) {
  218. std::stringstream s;
  219. s << std::setw(4) << std::oct << std::setfill('x') << 8 << " "
  220. << FloatProxy<double>(uint64_t(0x7FF0F00000000000LL)) << " " << std::setw(4)
  221. << 9;
  222. EXPECT_THAT(s.str(), Eq(std::string("xx10 0x1.0fp+1024 xx11")));
  223. }
  224. TEST_P(DecodeHexFloatTest, DecodeCorrectly) {
  225. EXPECT_THAT(Decode<float>(GetParam().first), Eq(GetParam().second));
  226. }
  227. TEST_P(DecodeHexDoubleTest, DecodeCorrectly) {
  228. EXPECT_THAT(Decode<double>(GetParam().first), Eq(GetParam().second));
  229. }
  230. INSTANTIATE_TEST_CASE_P(
  231. Float32DecodeTests, DecodeHexFloatTest,
  232. ::testing::ValuesIn(std::vector<std::pair<std::string, FloatProxy<float>>>({
  233. {"0x0p+000", 0.f},
  234. {"0x0p0", 0.f},
  235. {"0x0p-0", 0.f},
  236. // flush to zero cases
  237. {"0x1p-500", 0.f}, // Exponent underflows.
  238. {"-0x1p-500", -0.f},
  239. {"0x0.00000000001p-126", 0.f}, // Fraction causes underflow.
  240. {"-0x0.0000000001p-127", -0.f},
  241. {"-0x0.01p-142", -0.f}, // Fraction causes additional underflow.
  242. {"0x0.01p-142", 0.f},
  243. // Some floats that do not encode the same way as they decode.
  244. {"0x2p+0", 2.f},
  245. {"0xFFp+0", 255.f},
  246. {"0x0.8p+0", 0.5f},
  247. {"0x0.4p+0", 0.25f},
  248. })),);
  249. INSTANTIATE_TEST_CASE_P(
  250. Float32DecodeInfTests, DecodeHexFloatTest,
  251. ::testing::ValuesIn(std::vector<std::pair<std::string, FloatProxy<float>>>({
  252. // inf cases
  253. {"-0x1p+128", uint32_t(0xFF800000)}, // -inf
  254. {"0x32p+127", uint32_t(0x7F800000)}, // inf
  255. {"0x32p+500", uint32_t(0x7F800000)}, // inf
  256. {"-0x32p+127", uint32_t(0xFF800000)}, // -inf
  257. })),);
  258. INSTANTIATE_TEST_CASE_P(
  259. Float64DecodeTests, DecodeHexDoubleTest,
  260. ::testing::ValuesIn(
  261. std::vector<std::pair<std::string, FloatProxy<double>>>({
  262. {"0x0p+000", 0.},
  263. {"0x0p0", 0.},
  264. {"0x0p-0", 0.},
  265. // flush to zero cases
  266. {"0x1p-5000", 0.}, // Exponent underflows.
  267. {"-0x1p-5000", -0.},
  268. {"0x0.0000000000000001p-1023", 0.}, // Fraction causes underflow.
  269. {"-0x0.000000000000001p-1024", -0.},
  270. {"-0x0.01p-1090", -0.f}, // Fraction causes additional underflow.
  271. {"0x0.01p-1090", 0.},
  272. // Some floats that do not encode the same way as they decode.
  273. {"0x2p+0", 2.},
  274. {"0xFFp+0", 255.},
  275. {"0x0.8p+0", 0.5},
  276. {"0x0.4p+0", 0.25},
  277. })),);
  278. INSTANTIATE_TEST_CASE_P(
  279. Float64DecodeInfTests, DecodeHexDoubleTest,
  280. ::testing::ValuesIn(
  281. std::vector<std::pair<std::string, FloatProxy<double>>>({
  282. // inf cases
  283. {"-0x1p+1024", uint64_t(0xFFF0000000000000)}, // -inf
  284. {"0x32p+1023", uint64_t(0x7FF0000000000000)}, // inf
  285. {"0x32p+5000", uint64_t(0x7FF0000000000000)}, // inf
  286. {"-0x32p+1023", uint64_t(0xFFF0000000000000)}, // -inf
  287. })),);
  288. TEST(FloatProxy, ValidConversion) {
  289. EXPECT_THAT(FloatProxy<float>(1.f).getAsFloat(), Eq(1.0f));
  290. EXPECT_THAT(FloatProxy<float>(32.f).getAsFloat(), Eq(32.0f));
  291. EXPECT_THAT(FloatProxy<float>(-1.f).getAsFloat(), Eq(-1.0f));
  292. EXPECT_THAT(FloatProxy<float>(0.f).getAsFloat(), Eq(0.0f));
  293. EXPECT_THAT(FloatProxy<float>(-0.f).getAsFloat(), Eq(-0.0f));
  294. EXPECT_THAT(FloatProxy<float>(1.2e32f).getAsFloat(), Eq(1.2e32f));
  295. EXPECT_TRUE(std::isinf(FloatProxy<float>(uint32_t(0xFF800000)).getAsFloat()));
  296. EXPECT_TRUE(std::isinf(FloatProxy<float>(uint32_t(0x7F800000)).getAsFloat()));
  297. EXPECT_TRUE(std::isnan(FloatProxy<float>(uint32_t(0xFFC00000)).getAsFloat()));
  298. EXPECT_TRUE(std::isnan(FloatProxy<float>(uint32_t(0xFF800100)).getAsFloat()));
  299. EXPECT_TRUE(std::isnan(FloatProxy<float>(uint32_t(0xFF800c00)).getAsFloat()));
  300. EXPECT_TRUE(std::isnan(FloatProxy<float>(uint32_t(0xFF80F000)).getAsFloat()));
  301. EXPECT_TRUE(std::isnan(FloatProxy<float>(uint32_t(0xFFFFFFFF)).getAsFloat()));
  302. EXPECT_TRUE(std::isnan(FloatProxy<float>(uint32_t(0x7FC00000)).getAsFloat()));
  303. EXPECT_TRUE(std::isnan(FloatProxy<float>(uint32_t(0x7F800100)).getAsFloat()));
  304. EXPECT_TRUE(std::isnan(FloatProxy<float>(uint32_t(0x7f800c00)).getAsFloat()));
  305. EXPECT_TRUE(std::isnan(FloatProxy<float>(uint32_t(0x7F80F000)).getAsFloat()));
  306. EXPECT_TRUE(std::isnan(FloatProxy<float>(uint32_t(0x7FFFFFFF)).getAsFloat()));
  307. EXPECT_THAT(FloatProxy<float>(uint32_t(0xFF800000)).data(), Eq(0xFF800000u));
  308. EXPECT_THAT(FloatProxy<float>(uint32_t(0x7F800000)).data(), Eq(0x7F800000u));
  309. EXPECT_THAT(FloatProxy<float>(uint32_t(0xFFC00000)).data(), Eq(0xFFC00000u));
  310. EXPECT_THAT(FloatProxy<float>(uint32_t(0xFF800100)).data(), Eq(0xFF800100u));
  311. EXPECT_THAT(FloatProxy<float>(uint32_t(0xFF800c00)).data(), Eq(0xFF800c00u));
  312. EXPECT_THAT(FloatProxy<float>(uint32_t(0xFF80F000)).data(), Eq(0xFF80F000u));
  313. EXPECT_THAT(FloatProxy<float>(uint32_t(0xFFFFFFFF)).data(), Eq(0xFFFFFFFFu));
  314. EXPECT_THAT(FloatProxy<float>(uint32_t(0x7FC00000)).data(), Eq(0x7FC00000u));
  315. EXPECT_THAT(FloatProxy<float>(uint32_t(0x7F800100)).data(), Eq(0x7F800100u));
  316. EXPECT_THAT(FloatProxy<float>(uint32_t(0x7f800c00)).data(), Eq(0x7f800c00u));
  317. EXPECT_THAT(FloatProxy<float>(uint32_t(0x7F80F000)).data(), Eq(0x7F80F000u));
  318. EXPECT_THAT(FloatProxy<float>(uint32_t(0x7FFFFFFF)).data(), Eq(0x7FFFFFFFu));
  319. }
  320. TEST(FloatProxy, Nan) {
  321. EXPECT_TRUE(FloatProxy<float>(uint32_t(0xFFC00000)).isNan());
  322. EXPECT_TRUE(FloatProxy<float>(uint32_t(0xFF800100)).isNan());
  323. EXPECT_TRUE(FloatProxy<float>(uint32_t(0xFF800c00)).isNan());
  324. EXPECT_TRUE(FloatProxy<float>(uint32_t(0xFF80F000)).isNan());
  325. EXPECT_TRUE(FloatProxy<float>(uint32_t(0xFFFFFFFF)).isNan());
  326. EXPECT_TRUE(FloatProxy<float>(uint32_t(0x7FC00000)).isNan());
  327. EXPECT_TRUE(FloatProxy<float>(uint32_t(0x7F800100)).isNan());
  328. EXPECT_TRUE(FloatProxy<float>(uint32_t(0x7f800c00)).isNan());
  329. EXPECT_TRUE(FloatProxy<float>(uint32_t(0x7F80F000)).isNan());
  330. EXPECT_TRUE(FloatProxy<float>(uint32_t(0x7FFFFFFF)).isNan());
  331. }
  332. TEST(FloatProxy, Negation) {
  333. EXPECT_THAT((-FloatProxy<float>(1.f)).getAsFloat(), Eq(-1.0f));
  334. EXPECT_THAT((-FloatProxy<float>(0.f)).getAsFloat(), Eq(-0.0f));
  335. EXPECT_THAT((-FloatProxy<float>(-1.f)).getAsFloat(), Eq(1.0f));
  336. EXPECT_THAT((-FloatProxy<float>(-0.f)).getAsFloat(), Eq(0.0f));
  337. EXPECT_THAT((-FloatProxy<float>(32.f)).getAsFloat(), Eq(-32.0f));
  338. EXPECT_THAT((-FloatProxy<float>(-32.f)).getAsFloat(), Eq(32.0f));
  339. EXPECT_THAT((-FloatProxy<float>(1.2e32f)).getAsFloat(), Eq(-1.2e32f));
  340. EXPECT_THAT((-FloatProxy<float>(-1.2e32f)).getAsFloat(), Eq(1.2e32f));
  341. EXPECT_THAT(
  342. (-FloatProxy<float>(std::numeric_limits<float>::infinity())).getAsFloat(),
  343. Eq(-std::numeric_limits<float>::infinity()));
  344. EXPECT_THAT((-FloatProxy<float>(-std::numeric_limits<float>::infinity()))
  345. .getAsFloat(),
  346. Eq(std::numeric_limits<float>::infinity()));
  347. }
  348. // Test conversion of FloatProxy values to strings.
  349. //
  350. // In previous cases, we always wrapped the FloatProxy value in a HexFloat
  351. // before conversion to a string. In the following cases, the FloatProxy
  352. // decides for itself whether to print as a regular number or as a hex float.
  353. using FloatProxyFloatTest =
  354. ::testing::TestWithParam<std::pair<FloatProxy<float>, std::string>>;
  355. using FloatProxyDoubleTest =
  356. ::testing::TestWithParam<std::pair<FloatProxy<double>, std::string>>;
  357. // Converts a float value to a string via a FloatProxy.
  358. template <typename T>
  359. std::string EncodeViaFloatProxy(const T& value) {
  360. std::stringstream ss;
  361. ss << value;
  362. return ss.str();
  363. }
  364. // Converts a floating point string so that the exponent prefix
  365. // is 'e', and the exponent value does not have leading zeros.
  366. // The Microsoft runtime library likes to write things like "2.5E+010".
  367. // Convert that to "2.5e+10".
  368. // We don't care what happens to strings that are not floating point
  369. // strings.
  370. std::string NormalizeExponentInFloatString(std::string in) {
  371. std::string result;
  372. // Reserve one spot for the terminating null, even when the sscanf fails.
  373. std::vector<char> prefix(in.size() + 1);
  374. char e;
  375. char plus_or_minus;
  376. int exponent; // in base 10
  377. if ((4 == std::sscanf(in.c_str(), "%[-+.0123456789]%c%c%d", prefix.data(), &e,
  378. &plus_or_minus, &exponent)) &&
  379. (e == 'e' || e == 'E') &&
  380. (plus_or_minus == '-' || plus_or_minus == '+')) {
  381. // It looks like a floating point value with exponent.
  382. std::stringstream out;
  383. out << prefix.data() << 'e' << plus_or_minus << exponent;
  384. result = out.str();
  385. } else {
  386. result = in;
  387. }
  388. return result;
  389. }
  390. TEST(NormalizeFloat, Sample) {
  391. EXPECT_THAT(NormalizeExponentInFloatString(""), Eq(""));
  392. EXPECT_THAT(NormalizeExponentInFloatString("1e-12"), Eq("1e-12"));
  393. EXPECT_THAT(NormalizeExponentInFloatString("1E+14"), Eq("1e+14"));
  394. EXPECT_THAT(NormalizeExponentInFloatString("1e-0012"), Eq("1e-12"));
  395. EXPECT_THAT(NormalizeExponentInFloatString("1.263E+014"), Eq("1.263e+14"));
  396. }
  397. // The following two tests can't be DRY because they take different parameter
  398. // types.
  399. TEST_P(FloatProxyFloatTest, EncodeCorrectly) {
  400. EXPECT_THAT(
  401. NormalizeExponentInFloatString(EncodeViaFloatProxy(GetParam().first)),
  402. Eq(GetParam().second));
  403. }
  404. TEST_P(FloatProxyDoubleTest, EncodeCorrectly) {
  405. EXPECT_THAT(
  406. NormalizeExponentInFloatString(EncodeViaFloatProxy(GetParam().first)),
  407. Eq(GetParam().second));
  408. }
  409. INSTANTIATE_TEST_CASE_P(
  410. Float32Tests, FloatProxyFloatTest,
  411. ::testing::ValuesIn(std::vector<std::pair<FloatProxy<float>, std::string>>({
  412. // Zero
  413. {0.f, "0"},
  414. // Normal numbers
  415. {1.f, "1"},
  416. {-0.25f, "-0.25"},
  417. {1000.0f, "1000"},
  418. // Still normal numbers, but with large magnitude exponents.
  419. {float(ldexp(1.f, 126)), "8.50706e+37"},
  420. {float(ldexp(-1.f, -126)), "-1.17549e-38"},
  421. // denormalized values are printed as hex floats.
  422. {float(ldexp(1.0f, -127)), "0x1p-127"},
  423. {float(ldexp(1.5f, -128)), "0x1.8p-128"},
  424. {float(ldexp(1.25, -129)), "0x1.4p-129"},
  425. {float(ldexp(1.125, -130)), "0x1.2p-130"},
  426. {float(ldexp(-1.0f, -127)), "-0x1p-127"},
  427. {float(ldexp(-1.0f, -128)), "-0x1p-128"},
  428. {float(ldexp(-1.0f, -129)), "-0x1p-129"},
  429. {float(ldexp(-1.5f, -130)), "-0x1.8p-130"},
  430. // NaNs
  431. {FloatProxy<float>(uint32_t(0xFFC00000)), "-0x1.8p+128"},
  432. {FloatProxy<float>(uint32_t(0xFF800100)), "-0x1.0002p+128"},
  433. {std::numeric_limits<float>::infinity(), "0x1p+128"},
  434. {-std::numeric_limits<float>::infinity(), "-0x1p+128"},
  435. })),);
  436. INSTANTIATE_TEST_CASE_P(
  437. Float64Tests, FloatProxyDoubleTest,
  438. ::testing::ValuesIn(
  439. std::vector<std::pair<FloatProxy<double>, std::string>>({
  440. {0., "0"},
  441. {1., "1"},
  442. {-0.25, "-0.25"},
  443. {1000.0, "1000"},
  444. // Large outside the range of normal floats
  445. {ldexp(1.0, 128), "3.40282366920938e+38"},
  446. {ldexp(1.5, 129), "1.02084710076282e+39"},
  447. {ldexp(-1.0, 128), "-3.40282366920938e+38"},
  448. {ldexp(-1.5, 129), "-1.02084710076282e+39"},
  449. // Small outside the range of normal floats
  450. {ldexp(1.5, -129), "2.20405190779179e-39"},
  451. {ldexp(-1.5, -129), "-2.20405190779179e-39"},
  452. // lowest non-denorm
  453. {ldexp(1.0, -1022), "2.2250738585072e-308"},
  454. {ldexp(-1.0, -1022), "-2.2250738585072e-308"},
  455. // Denormalized values
  456. {ldexp(1.125, -1023), "0x1.2p-1023"},
  457. {ldexp(-1.375, -1024), "-0x1.6p-1024"},
  458. // NaNs
  459. {uint64_t(0x7FF8000000000000LL), "0x1.8p+1024"},
  460. {uint64_t(0xFFF0F00000000000LL), "-0x1.0fp+1024"},
  461. // Infinity
  462. {std::numeric_limits<double>::infinity(), "0x1p+1024"},
  463. {-std::numeric_limits<double>::infinity(), "-0x1p+1024"},
  464. })),);
  465. // double is used so that unbiased_exponent can be used with the output
  466. // of ldexp directly.
  467. int32_t unbiased_exponent(double f) {
  468. return spvutils::HexFloat<spvutils::FloatProxy<float>>(
  469. static_cast<float>(f)).getUnbiasedNormalizedExponent();
  470. }
  471. int16_t unbiased_half_exponent(uint16_t f) {
  472. return spvutils::HexFloat<spvutils::FloatProxy<spvutils::Float16>>(f)
  473. .getUnbiasedNormalizedExponent();
  474. }
  475. TEST(HexFloatOperationTest, UnbiasedExponent) {
  476. // Float cases
  477. EXPECT_EQ(0, unbiased_exponent(ldexp(1.0f, 0)));
  478. EXPECT_EQ(-32, unbiased_exponent(ldexp(1.0f, -32)));
  479. EXPECT_EQ(42, unbiased_exponent(ldexp(1.0f, 42)));
  480. EXPECT_EQ(125, unbiased_exponent(ldexp(1.0f, 125)));
  481. // Saturates to 128
  482. EXPECT_EQ(128, unbiased_exponent(ldexp(1.0f, 256)));
  483. EXPECT_EQ(-100, unbiased_exponent(ldexp(1.0f, -100)));
  484. EXPECT_EQ(-127, unbiased_exponent(ldexp(1.0f, -127))); // First denorm
  485. EXPECT_EQ(-128, unbiased_exponent(ldexp(1.0f, -128)));
  486. EXPECT_EQ(-129, unbiased_exponent(ldexp(1.0f, -129)));
  487. EXPECT_EQ(-140, unbiased_exponent(ldexp(1.0f, -140)));
  488. // Smallest representable number
  489. EXPECT_EQ(-126 - 23, unbiased_exponent(ldexp(1.0f, -126 - 23)));
  490. // Should get rounded to 0 first.
  491. EXPECT_EQ(0, unbiased_exponent(ldexp(1.0f, -127 - 23)));
  492. // Float16 cases
  493. // The exponent is represented in the bits 0x7C00
  494. // The offset is -15
  495. EXPECT_EQ(0, unbiased_half_exponent(0x3C00));
  496. EXPECT_EQ(3, unbiased_half_exponent(0x4800));
  497. EXPECT_EQ(-1, unbiased_half_exponent(0x3800));
  498. EXPECT_EQ(-14, unbiased_half_exponent(0x0400));
  499. EXPECT_EQ(16, unbiased_half_exponent(0x7C00));
  500. EXPECT_EQ(10, unbiased_half_exponent(0x6400));
  501. // Smallest representable number
  502. EXPECT_EQ(-24, unbiased_half_exponent(0x0001));
  503. }
  504. // Creates a float that is the sum of 1/(2 ^ fractions[i]) for i in factions
  505. float float_fractions(const std::vector<uint32_t>& fractions) {
  506. float f = 0;
  507. for(int32_t i: fractions) {
  508. f += std::ldexp(1.0f, -i);
  509. }
  510. return f;
  511. }
  512. // Returns the normalized significand of a HexFloat<FloatProxy<float>>
  513. // that was created by calling float_fractions with the input fractions,
  514. // raised to the power of exp.
  515. uint32_t normalized_significand(const std::vector<uint32_t>& fractions, uint32_t exp) {
  516. return spvutils::HexFloat<spvutils::FloatProxy<float>>(
  517. static_cast<float>(ldexp(float_fractions(fractions), exp)))
  518. .getNormalizedSignificand();
  519. }
  520. // Sets the bits from MSB to LSB of the significand part of a float.
  521. // For example 0 would set the bit 23 (counting from LSB to MSB),
  522. // and 1 would set the 22nd bit.
  523. uint32_t bits_set(const std::vector<uint32_t>& bits) {
  524. const uint32_t top_bit = 1u << 22u;
  525. uint32_t val= 0;
  526. for(uint32_t i: bits) {
  527. val |= top_bit >> i;
  528. }
  529. return val;
  530. }
  531. // The same as bits_set but for a Float16 value instead of 32-bit floating
  532. // point.
  533. uint16_t half_bits_set(const std::vector<uint32_t>& bits) {
  534. const uint32_t top_bit = 1u << 9u;
  535. uint32_t val= 0;
  536. for(uint32_t i: bits) {
  537. val |= top_bit >> i;
  538. }
  539. return static_cast<uint16_t>(val);
  540. }
  541. TEST(HexFloatOperationTest, NormalizedSignificand) {
  542. // For normalized numbers (the following) it should be a simple matter
  543. // of getting rid of the top implicit bit
  544. EXPECT_EQ(bits_set({}), normalized_significand({0}, 0));
  545. EXPECT_EQ(bits_set({0}), normalized_significand({0, 1}, 0));
  546. EXPECT_EQ(bits_set({0, 1}), normalized_significand({0, 1, 2}, 0));
  547. EXPECT_EQ(bits_set({1}), normalized_significand({0, 2}, 0));
  548. EXPECT_EQ(bits_set({1}), normalized_significand({0, 2}, 32));
  549. EXPECT_EQ(bits_set({1}), normalized_significand({0, 2}, 126));
  550. // For denormalized numbers we expect the normalized significand to
  551. // shift as if it were normalized. This means, in practice that the
  552. // top_most set bit will be cut off. Looks very similar to above (on purpose)
  553. EXPECT_EQ(bits_set({}), normalized_significand({0}, -127));
  554. EXPECT_EQ(bits_set({3}), normalized_significand({0, 4}, -128));
  555. EXPECT_EQ(bits_set({3}), normalized_significand({0, 4}, -127));
  556. EXPECT_EQ(bits_set({}), normalized_significand({22}, -127));
  557. EXPECT_EQ(bits_set({0}), normalized_significand({21, 22}, -127));
  558. }
  559. // Returns the 32-bit floating point value created by
  560. // calling setFromSignUnbiasedExponentAndNormalizedSignificand
  561. // on a HexFloat<FloatProxy<float>>
  562. float set_from_sign(bool negative, int32_t unbiased_exponent,
  563. uint32_t significand, bool round_denorm_up) {
  564. spvutils::HexFloat<spvutils::FloatProxy<float>> f(0.f);
  565. f.setFromSignUnbiasedExponentAndNormalizedSignificand(
  566. negative, unbiased_exponent, significand, round_denorm_up);
  567. return f.value().getAsFloat();
  568. }
  569. TEST(HexFloatOperationTests,
  570. SetFromSignUnbiasedExponentAndNormalizedSignificand) {
  571. EXPECT_EQ(1.f, set_from_sign(false, 0, 0, false));
  572. // Tests insertion of various denormalized numbers with and without round up.
  573. EXPECT_EQ(static_cast<float>(ldexp(1.f, -149)), set_from_sign(false, -149, 0, false));
  574. EXPECT_EQ(static_cast<float>(ldexp(1.f, -149)), set_from_sign(false, -149, 0, true));
  575. EXPECT_EQ(0.f, set_from_sign(false, -150, 1, false));
  576. EXPECT_EQ(static_cast<float>(ldexp(1.f, -149)), set_from_sign(false, -150, 1, true));
  577. EXPECT_EQ(ldexp(1.0f, -127), set_from_sign(false, -127, 0, false));
  578. EXPECT_EQ(ldexp(1.0f, -128), set_from_sign(false, -128, 0, false));
  579. EXPECT_EQ(float_fractions({0, 1, 2, 5}),
  580. set_from_sign(false, 0, bits_set({0, 1, 4}), false));
  581. EXPECT_EQ(ldexp(float_fractions({0, 1, 2, 5}), -32),
  582. set_from_sign(false, -32, bits_set({0, 1, 4}), false));
  583. EXPECT_EQ(ldexp(float_fractions({0, 1, 2, 5}), -128),
  584. set_from_sign(false, -128, bits_set({0, 1, 4}), false));
  585. // The negative cases from above.
  586. EXPECT_EQ(-1.f, set_from_sign(true, 0, 0, false));
  587. EXPECT_EQ(-ldexp(1.0, -127), set_from_sign(true, -127, 0, false));
  588. EXPECT_EQ(-ldexp(1.0, -128), set_from_sign(true, -128, 0, false));
  589. EXPECT_EQ(-float_fractions({0, 1, 2, 5}),
  590. set_from_sign(true, 0, bits_set({0, 1, 4}), false));
  591. EXPECT_EQ(-ldexp(float_fractions({0, 1, 2, 5}), -32),
  592. set_from_sign(true, -32, bits_set({0, 1, 4}), false));
  593. EXPECT_EQ(-ldexp(float_fractions({0, 1, 2, 5}), -128),
  594. set_from_sign(true, -128, bits_set({0, 1, 4}), false));
  595. }
  596. TEST(HexFloatOperationTests, NonRounding) {
  597. // Rounding from 32-bit hex-float to 32-bit hex-float should be trivial,
  598. // except in the denorm case which is a bit more complex.
  599. using HF = spvutils::HexFloat<spvutils::FloatProxy<float>>;
  600. bool carry_bit = false;
  601. spvutils::round_direction rounding[] = {
  602. spvutils::kRoundToZero,
  603. spvutils::kRoundToNearestEven,
  604. spvutils::kRoundToPositiveInfinity,
  605. spvutils::kRoundToNegativeInfinity};
  606. // Everything fits, so this should be straight-forward
  607. for (spvutils::round_direction round : rounding) {
  608. EXPECT_EQ(bits_set({}), HF(0.f).getRoundedNormalizedSignificand<HF>(
  609. round, &carry_bit));
  610. EXPECT_FALSE(carry_bit);
  611. EXPECT_EQ(bits_set({0}),
  612. HF(float_fractions({0, 1}))
  613. .getRoundedNormalizedSignificand<HF>(round, &carry_bit));
  614. EXPECT_FALSE(carry_bit);
  615. EXPECT_EQ(bits_set({1, 3}),
  616. HF(float_fractions({0, 2, 4}))
  617. .getRoundedNormalizedSignificand<HF>(round, &carry_bit));
  618. EXPECT_FALSE(carry_bit);
  619. EXPECT_EQ(
  620. bits_set({0, 1, 4}),
  621. HF(static_cast<float>(-ldexp(float_fractions({0, 1, 2, 5}), -128)))
  622. .getRoundedNormalizedSignificand<HF>(round, &carry_bit));
  623. EXPECT_FALSE(carry_bit);
  624. EXPECT_EQ(
  625. bits_set({0, 1, 4, 22}),
  626. HF(static_cast<float>(float_fractions({0, 1, 2, 5, 23})))
  627. .getRoundedNormalizedSignificand<HF>(round, &carry_bit));
  628. EXPECT_FALSE(carry_bit);
  629. }
  630. }
  631. struct RoundSignificandCase {
  632. float source_float;
  633. std::pair<int16_t, bool> expected_results;
  634. spvutils::round_direction round;
  635. };
  636. using HexFloatRoundTest =
  637. ::testing::TestWithParam<RoundSignificandCase>;
  638. TEST_P(HexFloatRoundTest, RoundDownToFP16) {
  639. using HF = spvutils::HexFloat<spvutils::FloatProxy<float>>;
  640. using HF16 = spvutils::HexFloat<spvutils::FloatProxy<spvutils::Float16>>;
  641. HF input_value(GetParam().source_float);
  642. bool carry_bit = false;
  643. EXPECT_EQ(GetParam().expected_results.first,
  644. input_value.getRoundedNormalizedSignificand<HF16>(
  645. GetParam().round, &carry_bit));
  646. EXPECT_EQ(carry_bit, GetParam().expected_results.second);
  647. }
  648. // clang-format off
  649. INSTANTIATE_TEST_CASE_P(F32ToF16, HexFloatRoundTest,
  650. ::testing::ValuesIn(std::vector<RoundSignificandCase>(
  651. {
  652. {float_fractions({0}), std::make_pair(half_bits_set({}), false), spvutils::kRoundToZero},
  653. {float_fractions({0}), std::make_pair(half_bits_set({}), false), spvutils::kRoundToNearestEven},
  654. {float_fractions({0}), std::make_pair(half_bits_set({}), false), spvutils::kRoundToPositiveInfinity},
  655. {float_fractions({0}), std::make_pair(half_bits_set({}), false), spvutils::kRoundToNegativeInfinity},
  656. {float_fractions({0, 1}), std::make_pair(half_bits_set({0}), false), spvutils::kRoundToZero},
  657. {float_fractions({0, 1, 11}), std::make_pair(half_bits_set({0}), false), spvutils::kRoundToZero},
  658. {float_fractions({0, 1, 11}), std::make_pair(half_bits_set({0, 9}), false), spvutils::kRoundToPositiveInfinity},
  659. {float_fractions({0, 1, 11}), std::make_pair(half_bits_set({0}), false), spvutils::kRoundToNegativeInfinity},
  660. {float_fractions({0, 1, 11}), std::make_pair(half_bits_set({0}), false), spvutils::kRoundToNearestEven},
  661. {float_fractions({0, 1, 10, 11}), std::make_pair(half_bits_set({0, 9}), false), spvutils::kRoundToZero},
  662. {float_fractions({0, 1, 10, 11}), std::make_pair(half_bits_set({0, 8}), false), spvutils::kRoundToPositiveInfinity},
  663. {float_fractions({0, 1, 10, 11}), std::make_pair(half_bits_set({0, 9}), false), spvutils::kRoundToNegativeInfinity},
  664. {float_fractions({0, 1, 10, 11}), std::make_pair(half_bits_set({0, 8}), false), spvutils::kRoundToNearestEven},
  665. {float_fractions({0, 1, 11, 12}), std::make_pair(half_bits_set({0}), false), spvutils::kRoundToZero},
  666. {float_fractions({0, 1, 11, 12}), std::make_pair(half_bits_set({0, 9}), false), spvutils::kRoundToPositiveInfinity},
  667. {float_fractions({0, 1, 11, 12}), std::make_pair(half_bits_set({0}), false), spvutils::kRoundToNegativeInfinity},
  668. {float_fractions({0, 1, 11, 12}), std::make_pair(half_bits_set({0, 9}), false), spvutils::kRoundToNearestEven},
  669. {-float_fractions({0, 1, 11, 12}), std::make_pair(half_bits_set({0}), false), spvutils::kRoundToZero},
  670. {-float_fractions({0, 1, 11, 12}), std::make_pair(half_bits_set({0}), false), spvutils::kRoundToPositiveInfinity},
  671. {-float_fractions({0, 1, 11, 12}), std::make_pair(half_bits_set({0, 9}), false), spvutils::kRoundToNegativeInfinity},
  672. {-float_fractions({0, 1, 11, 12}), std::make_pair(half_bits_set({0, 9}), false), spvutils::kRoundToNearestEven},
  673. {float_fractions({0, 1, 11, 22}), std::make_pair(half_bits_set({0}), false), spvutils::kRoundToZero},
  674. {float_fractions({0, 1, 11, 22}), std::make_pair(half_bits_set({0, 9}), false), spvutils::kRoundToPositiveInfinity},
  675. {float_fractions({0, 1, 11, 22}), std::make_pair(half_bits_set({0}), false), spvutils::kRoundToNegativeInfinity},
  676. {float_fractions({0, 1, 11, 22}), std::make_pair(half_bits_set({0, 9}), false), spvutils::kRoundToNearestEven},
  677. // Carries
  678. {float_fractions({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}), std::make_pair(half_bits_set({0, 1, 2, 3, 4, 5, 6, 7, 8, 9}), false), spvutils::kRoundToZero},
  679. {float_fractions({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}), std::make_pair(half_bits_set({}), true), spvutils::kRoundToPositiveInfinity},
  680. {float_fractions({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}), std::make_pair(half_bits_set({0, 1, 2, 3, 4, 5, 6, 7, 8, 9}), false), spvutils::kRoundToNegativeInfinity},
  681. {float_fractions({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}), std::make_pair(half_bits_set({}), true), spvutils::kRoundToNearestEven},
  682. // Cases where original number was denorm. Note: this should have no effect
  683. // the number is pre-normalized.
  684. {static_cast<float>(ldexp(float_fractions({0, 1, 11, 13}), -128)), std::make_pair(half_bits_set({0}), false), spvutils::kRoundToZero},
  685. {static_cast<float>(ldexp(float_fractions({0, 1, 11, 13}), -129)), std::make_pair(half_bits_set({0, 9}), false), spvutils::kRoundToPositiveInfinity},
  686. {static_cast<float>(ldexp(float_fractions({0, 1, 11, 13}), -131)), std::make_pair(half_bits_set({0}), false), spvutils::kRoundToNegativeInfinity},
  687. {static_cast<float>(ldexp(float_fractions({0, 1, 11, 13}), -130)), std::make_pair(half_bits_set({0, 9}), false), spvutils::kRoundToNearestEven},
  688. })),);
  689. // clang-format on
  690. struct UpCastSignificandCase {
  691. uint16_t source_half;
  692. uint32_t expected_result;
  693. };
  694. using HexFloatRoundUpSignificandTest =
  695. ::testing::TestWithParam<UpCastSignificandCase>;
  696. TEST_P(HexFloatRoundUpSignificandTest, Widening) {
  697. using HF = spvutils::HexFloat<spvutils::FloatProxy<float>>;
  698. using HF16 = spvutils::HexFloat<spvutils::FloatProxy<spvutils::Float16>>;
  699. bool carry_bit = false;
  700. spvutils::round_direction rounding[] = {
  701. spvutils::kRoundToZero,
  702. spvutils::kRoundToNearestEven,
  703. spvutils::kRoundToPositiveInfinity,
  704. spvutils::kRoundToNegativeInfinity};
  705. // Everything fits, so everything should just be bit-shifts.
  706. for (spvutils::round_direction round : rounding) {
  707. carry_bit = false;
  708. HF16 input_value(GetParam().source_half);
  709. EXPECT_EQ(
  710. GetParam().expected_result,
  711. input_value.getRoundedNormalizedSignificand<HF>(round, &carry_bit))
  712. << std::hex << "0x"
  713. << input_value.getRoundedNormalizedSignificand<HF>(round, &carry_bit)
  714. << " 0x" << GetParam().expected_result;
  715. EXPECT_FALSE(carry_bit);
  716. }
  717. }
  718. INSTANTIATE_TEST_CASE_P(F16toF32, HexFloatRoundUpSignificandTest,
  719. // 0xFC00 of the source 16-bit hex value cover the sign and the exponent.
  720. // They are ignored for this test.
  721. ::testing::ValuesIn(std::vector<UpCastSignificandCase>(
  722. {
  723. {0x3F00, 0x600000},
  724. {0x0F00, 0x600000},
  725. {0x0F01, 0x602000},
  726. {0x0FFF, 0x7FE000},
  727. })),);
  728. struct DownCastTest {
  729. float source_float;
  730. uint16_t expected_half;
  731. std::vector<spvutils::round_direction> directions;
  732. };
  733. std::string get_round_text(spvutils::round_direction direction) {
  734. #define CASE(round_direction) \
  735. case round_direction: \
  736. return #round_direction
  737. switch (direction) {
  738. CASE(spvutils::kRoundToZero);
  739. CASE(spvutils::kRoundToPositiveInfinity);
  740. CASE(spvutils::kRoundToNegativeInfinity);
  741. CASE(spvutils::kRoundToNearestEven);
  742. }
  743. #undef CASE
  744. return "";
  745. }
  746. using HexFloatFP32To16Tests = ::testing::TestWithParam<DownCastTest>;
  747. TEST_P(HexFloatFP32To16Tests, NarrowingCasts) {
  748. using HF = spvutils::HexFloat<spvutils::FloatProxy<float>>;
  749. using HF16 = spvutils::HexFloat<spvutils::FloatProxy<spvutils::Float16>>;
  750. HF f(GetParam().source_float);
  751. for (auto round : GetParam().directions) {
  752. HF16 half(0);
  753. f.castTo(half, round);
  754. EXPECT_EQ(GetParam().expected_half, half.value().getAsFloat().get_value())
  755. << get_round_text(round) << " " << std::hex
  756. << spvutils::BitwiseCast<uint32_t>(GetParam().source_float)
  757. << " cast to: " << half.value().getAsFloat().get_value();
  758. }
  759. }
  760. const uint16_t positive_infinity = 0x7C00;
  761. const uint16_t negative_infinity = 0xFC00;
  762. INSTANTIATE_TEST_CASE_P(F32ToF16, HexFloatFP32To16Tests,
  763. ::testing::ValuesIn(std::vector<DownCastTest>(
  764. {
  765. // Exactly representable as half.
  766. {0.f, 0x0, {spvutils::kRoundToZero, spvutils::kRoundToPositiveInfinity, spvutils::kRoundToNegativeInfinity, spvutils::kRoundToNearestEven}},
  767. {-0.f, 0x8000, {spvutils::kRoundToZero, spvutils::kRoundToPositiveInfinity, spvutils::kRoundToNegativeInfinity, spvutils::kRoundToNearestEven}},
  768. {1.0f, 0x3C00, {spvutils::kRoundToZero, spvutils::kRoundToPositiveInfinity, spvutils::kRoundToNegativeInfinity, spvutils::kRoundToNearestEven}},
  769. {-1.0f, 0xBC00, {spvutils::kRoundToZero, spvutils::kRoundToPositiveInfinity, spvutils::kRoundToNegativeInfinity, spvutils::kRoundToNearestEven}},
  770. {float_fractions({0, 1, 10}) , 0x3E01, {spvutils::kRoundToZero, spvutils::kRoundToPositiveInfinity, spvutils::kRoundToNegativeInfinity, spvutils::kRoundToNearestEven}},
  771. {-float_fractions({0, 1, 10}) , 0xBE01, {spvutils::kRoundToZero, spvutils::kRoundToPositiveInfinity, spvutils::kRoundToNegativeInfinity, spvutils::kRoundToNearestEven}},
  772. {static_cast<float>(ldexp(float_fractions({0, 1, 10}), 3)), 0x4A01, {spvutils::kRoundToZero, spvutils::kRoundToPositiveInfinity, spvutils::kRoundToNegativeInfinity, spvutils::kRoundToNearestEven}},
  773. {static_cast<float>(-ldexp(float_fractions({0, 1, 10}), 3)), 0xCA01, {spvutils::kRoundToZero, spvutils::kRoundToPositiveInfinity, spvutils::kRoundToNegativeInfinity, spvutils::kRoundToNearestEven}},
  774. // Underflow
  775. {static_cast<float>(ldexp(1.0f, -25)), 0x0, {spvutils::kRoundToZero, spvutils::kRoundToNegativeInfinity, spvutils::kRoundToNearestEven}},
  776. {static_cast<float>(ldexp(1.0f, -25)), 0x1, {spvutils::kRoundToPositiveInfinity}},
  777. {static_cast<float>(-ldexp(1.0f, -25)), 0x8000, {spvutils::kRoundToZero, spvutils::kRoundToPositiveInfinity, spvutils::kRoundToNearestEven}},
  778. {static_cast<float>(-ldexp(1.0f, -25)), 0x8001, {spvutils::kRoundToNegativeInfinity}},
  779. {static_cast<float>(ldexp(1.0f, -24)), 0x1, {spvutils::kRoundToZero, spvutils::kRoundToPositiveInfinity, spvutils::kRoundToNegativeInfinity, spvutils::kRoundToNearestEven}},
  780. // Overflow
  781. {static_cast<float>(ldexp(1.0f, 16)), positive_infinity, {spvutils::kRoundToZero, spvutils::kRoundToPositiveInfinity, spvutils::kRoundToNegativeInfinity, spvutils::kRoundToNearestEven}},
  782. {static_cast<float>(ldexp(1.0f, 18)), positive_infinity, {spvutils::kRoundToZero, spvutils::kRoundToPositiveInfinity, spvutils::kRoundToNegativeInfinity, spvutils::kRoundToNearestEven}},
  783. {static_cast<float>(ldexp(1.3f, 16)), positive_infinity, {spvutils::kRoundToZero, spvutils::kRoundToPositiveInfinity, spvutils::kRoundToNegativeInfinity, spvutils::kRoundToNearestEven}},
  784. {static_cast<float>(-ldexp(1.0f, 16)), negative_infinity, {spvutils::kRoundToZero, spvutils::kRoundToPositiveInfinity, spvutils::kRoundToNegativeInfinity, spvutils::kRoundToNearestEven}},
  785. {static_cast<float>(-ldexp(1.0f, 18)), negative_infinity, {spvutils::kRoundToZero, spvutils::kRoundToPositiveInfinity, spvutils::kRoundToNegativeInfinity, spvutils::kRoundToNearestEven}},
  786. {static_cast<float>(-ldexp(1.3f, 16)), negative_infinity, {spvutils::kRoundToZero, spvutils::kRoundToPositiveInfinity, spvutils::kRoundToNegativeInfinity, spvutils::kRoundToNearestEven}},
  787. // Transfer of Infinities
  788. {std::numeric_limits<float>::infinity(), positive_infinity, {spvutils::kRoundToZero, spvutils::kRoundToPositiveInfinity, spvutils::kRoundToNegativeInfinity, spvutils::kRoundToNearestEven}},
  789. {-std::numeric_limits<float>::infinity(), negative_infinity, {spvutils::kRoundToZero, spvutils::kRoundToPositiveInfinity, spvutils::kRoundToNegativeInfinity, spvutils::kRoundToNearestEven}},
  790. // Nans are below because we cannot test for equality.
  791. })),);
  792. struct UpCastCase{
  793. uint16_t source_half;
  794. float expected_float;
  795. };
  796. using HexFloatFP16To32Tests = ::testing::TestWithParam<UpCastCase>;
  797. TEST_P(HexFloatFP16To32Tests, WideningCasts) {
  798. using HF = spvutils::HexFloat<spvutils::FloatProxy<float>>;
  799. using HF16 = spvutils::HexFloat<spvutils::FloatProxy<spvutils::Float16>>;
  800. HF16 f(GetParam().source_half);
  801. spvutils::round_direction rounding[] = {
  802. spvutils::kRoundToZero,
  803. spvutils::kRoundToNearestEven,
  804. spvutils::kRoundToPositiveInfinity,
  805. spvutils::kRoundToNegativeInfinity};
  806. // Everything fits, so everything should just be bit-shifts.
  807. for (spvutils::round_direction round : rounding) {
  808. HF flt(0.f);
  809. f.castTo(flt, round);
  810. EXPECT_EQ(GetParam().expected_float, flt.value().getAsFloat())
  811. << get_round_text(round) << " " << std::hex
  812. << spvutils::BitwiseCast<uint16_t>(GetParam().source_half)
  813. << " cast to: " << flt.value().getAsFloat();
  814. }
  815. }
  816. INSTANTIATE_TEST_CASE_P(F16ToF32, HexFloatFP16To32Tests,
  817. ::testing::ValuesIn(std::vector<UpCastCase>(
  818. {
  819. {0x0000, 0.f},
  820. {0x8000, -0.f},
  821. {0x3C00, 1.0f},
  822. {0xBC00, -1.0f},
  823. {0x3F00, float_fractions({0, 1, 2})},
  824. {0xBF00, -float_fractions({0, 1, 2})},
  825. {0x3F01, float_fractions({0, 1, 2, 10})},
  826. {0xBF01, -float_fractions({0, 1, 2, 10})},
  827. // denorm
  828. {0x0001, static_cast<float>(ldexp(1.0, -24))},
  829. {0x0002, static_cast<float>(ldexp(1.0, -23))},
  830. {0x8001, static_cast<float>(-ldexp(1.0, -24))},
  831. {0x8011, static_cast<float>(-ldexp(1.0, -20) + -ldexp(1.0, -24))},
  832. // inf
  833. {0x7C00, std::numeric_limits<float>::infinity()},
  834. {0xFC00, -std::numeric_limits<float>::infinity()},
  835. })),);
  836. TEST(HexFloatOperationTests, NanTests) {
  837. using HF = spvutils::HexFloat<spvutils::FloatProxy<float>>;
  838. using HF16 = spvutils::HexFloat<spvutils::FloatProxy<spvutils::Float16>>;
  839. spvutils::round_direction rounding[] = {
  840. spvutils::kRoundToZero,
  841. spvutils::kRoundToNearestEven,
  842. spvutils::kRoundToPositiveInfinity,
  843. spvutils::kRoundToNegativeInfinity};
  844. // Everything fits, so everything should just be bit-shifts.
  845. for (spvutils::round_direction round : rounding) {
  846. HF16 f16(0);
  847. HF f(0.f);
  848. HF(std::numeric_limits<float>::quiet_NaN()).castTo(f16, round);
  849. EXPECT_TRUE(f16.value().isNan());
  850. HF(std::numeric_limits<float>::signaling_NaN()).castTo(f16, round);
  851. EXPECT_TRUE(f16.value().isNan());
  852. HF16(0x7C01).castTo(f, round);
  853. EXPECT_TRUE(f.value().isNan());
  854. HF16(0x7C11).castTo(f, round);
  855. EXPECT_TRUE(f.value().isNan());
  856. HF16(0xFC01).castTo(f, round);
  857. EXPECT_TRUE(f.value().isNan());
  858. HF16(0x7C10).castTo(f, round);
  859. EXPECT_TRUE(f.value().isNan());
  860. HF16(0xFF00).castTo(f, round);
  861. EXPECT_TRUE(f.value().isNan());
  862. }
  863. }
  864. // A test case for parsing good and bad HexFloat<FloatProxy<T>> literals.
  865. template <typename T>
  866. struct FloatParseCase {
  867. std::string literal;
  868. bool negate_value;
  869. bool expect_success;
  870. HexFloat<FloatProxy<T>> expected_value;
  871. };
  872. using ParseNormalFloatTest = ::testing::TestWithParam<FloatParseCase<float>>;
  873. TEST_P(ParseNormalFloatTest, Samples) {
  874. std::stringstream input(GetParam().literal);
  875. HexFloat<FloatProxy<float>> parsed_value(0.0f);
  876. ParseNormalFloat(input, GetParam().negate_value, parsed_value);
  877. EXPECT_NE(GetParam().expect_success, input.fail())
  878. << " literal: " << GetParam().literal
  879. << " negate: " << GetParam().negate_value;
  880. if (GetParam().expect_success) {
  881. EXPECT_THAT(parsed_value.value(), Eq(GetParam().expected_value.value()))
  882. << " literal: " << GetParam().literal
  883. << " negate: " << GetParam().negate_value;
  884. }
  885. }
  886. // Returns a FloatParseCase with expected failure.
  887. template <typename T>
  888. FloatParseCase<T> BadFloatParseCase(std::string literal, bool negate_value,
  889. T expected_value) {
  890. HexFloat<FloatProxy<T>> proxy_expected_value(expected_value);
  891. return FloatParseCase<T>{literal, negate_value, false, proxy_expected_value};
  892. }
  893. // Returns a FloatParseCase that should successfully parse to a given value.
  894. template <typename T>
  895. FloatParseCase<T> GoodFloatParseCase(std::string literal, bool negate_value,
  896. T expected_value) {
  897. HexFloat<FloatProxy<T>> proxy_expected_value(expected_value);
  898. return FloatParseCase<T>{literal, negate_value, true, proxy_expected_value};
  899. }
  900. INSTANTIATE_TEST_CASE_P(
  901. FloatParse, ParseNormalFloatTest,
  902. ::testing::ValuesIn(std::vector<FloatParseCase<float>>{
  903. // Failing cases due to trivially incorrect syntax.
  904. BadFloatParseCase("abc", false, 0.0f),
  905. BadFloatParseCase("abc", true, 0.0f),
  906. // Valid cases.
  907. GoodFloatParseCase("0", false, 0.0f),
  908. GoodFloatParseCase("0.0", false, 0.0f),
  909. GoodFloatParseCase("-0.0", false, -0.0f),
  910. GoodFloatParseCase("2.0", false, 2.0f),
  911. GoodFloatParseCase("-2.0", false, -2.0f),
  912. GoodFloatParseCase("+2.0", false, 2.0f),
  913. // Cases with negate_value being true.
  914. GoodFloatParseCase("0.0", true, -0.0f),
  915. GoodFloatParseCase("2.0", true, -2.0f),
  916. // When negate_value is true, we should not accept a
  917. // leading minus or plus.
  918. BadFloatParseCase("-0.0", true, 0.0f),
  919. BadFloatParseCase("-2.0", true, 0.0f),
  920. BadFloatParseCase("+0.0", true, 0.0f),
  921. BadFloatParseCase("+2.0", true, 0.0f),
  922. // Overflow is an error for 32-bit float parsing.
  923. BadFloatParseCase("1e40", false, FLT_MAX),
  924. BadFloatParseCase("1e40", true, -FLT_MAX),
  925. BadFloatParseCase("-1e40", false, -FLT_MAX),
  926. // We can't have -1e40 and negate_value == true since
  927. // that represents an original case of "--1e40" which
  928. // is invalid.
  929. }),);
  930. using ParseNormalFloat16Test =
  931. ::testing::TestWithParam<FloatParseCase<Float16>>;
  932. TEST_P(ParseNormalFloat16Test, Samples) {
  933. std::stringstream input(GetParam().literal);
  934. HexFloat<FloatProxy<Float16>> parsed_value(0);
  935. ParseNormalFloat(input, GetParam().negate_value, parsed_value);
  936. EXPECT_NE(GetParam().expect_success, input.fail())
  937. << " literal: " << GetParam().literal
  938. << " negate: " << GetParam().negate_value;
  939. if (GetParam().expect_success) {
  940. EXPECT_THAT(parsed_value.value(), Eq(GetParam().expected_value.value()))
  941. << " literal: " << GetParam().literal
  942. << " negate: " << GetParam().negate_value;
  943. }
  944. }
  945. INSTANTIATE_TEST_CASE_P(
  946. Float16Parse, ParseNormalFloat16Test,
  947. ::testing::ValuesIn(std::vector<FloatParseCase<Float16>>{
  948. // Failing cases due to trivially incorrect syntax.
  949. BadFloatParseCase<Float16>("abc", false, uint16_t{0}),
  950. BadFloatParseCase<Float16>("abc", true, uint16_t{0}),
  951. // Valid cases.
  952. GoodFloatParseCase<Float16>("0", false, uint16_t{0}),
  953. GoodFloatParseCase<Float16>("0.0", false, uint16_t{0}),
  954. GoodFloatParseCase<Float16>("-0.0", false, uint16_t{0x8000}),
  955. GoodFloatParseCase<Float16>("2.0", false, uint16_t{0x4000}),
  956. GoodFloatParseCase<Float16>("-2.0", false, uint16_t{0xc000}),
  957. GoodFloatParseCase<Float16>("+2.0", false, uint16_t{0x4000}),
  958. // Cases with negate_value being true.
  959. GoodFloatParseCase<Float16>("0.0", true, uint16_t{0x8000}),
  960. GoodFloatParseCase<Float16>("2.0", true, uint16_t{0xc000}),
  961. // When negate_value is true, we should not accept a leading minus or
  962. // plus.
  963. BadFloatParseCase<Float16>("-0.0", true, uint16_t{0}),
  964. BadFloatParseCase<Float16>("-2.0", true, uint16_t{0}),
  965. BadFloatParseCase<Float16>("+0.0", true, uint16_t{0}),
  966. BadFloatParseCase<Float16>("+2.0", true, uint16_t{0}),
  967. }),);
  968. // A test case for detecting infinities.
  969. template <typename T>
  970. struct OverflowParseCase {
  971. std::string input;
  972. bool expect_success;
  973. T expected_value;
  974. };
  975. using FloatProxyParseOverflowFloatTest =
  976. ::testing::TestWithParam<OverflowParseCase<float>>;
  977. TEST_P(FloatProxyParseOverflowFloatTest, Sample) {
  978. std::istringstream input(GetParam().input);
  979. HexFloat<FloatProxy<float>> value(0.0f);
  980. input >> value;
  981. EXPECT_NE(GetParam().expect_success, input.fail());
  982. if (GetParam().expect_success) {
  983. EXPECT_THAT(value.value().getAsFloat(), GetParam().expected_value);
  984. }
  985. }
  986. INSTANTIATE_TEST_CASE_P(
  987. FloatOverflow, FloatProxyParseOverflowFloatTest,
  988. ::testing::ValuesIn(std::vector<OverflowParseCase<float>>({
  989. {"0", true, 0.0f},
  990. {"0.0", true, 0.0f},
  991. {"1.0", true, 1.0f},
  992. {"1e38", true, 1e38f},
  993. {"-1e38", true, -1e38f},
  994. {"1e40", false, FLT_MAX},
  995. {"-1e40", false, -FLT_MAX},
  996. {"1e400", false, FLT_MAX},
  997. {"-1e400", false, -FLT_MAX},
  998. })),);
  999. using FloatProxyParseOverflowDoubleTest =
  1000. ::testing::TestWithParam<OverflowParseCase<double>>;
  1001. TEST_P(FloatProxyParseOverflowDoubleTest, Sample) {
  1002. std::istringstream input(GetParam().input);
  1003. HexFloat<FloatProxy<double>> value(0.0);
  1004. input >> value;
  1005. EXPECT_NE(GetParam().expect_success, input.fail());
  1006. if (GetParam().expect_success) {
  1007. EXPECT_THAT(value.value().getAsFloat(), Eq(GetParam().expected_value));
  1008. }
  1009. }
  1010. INSTANTIATE_TEST_CASE_P(
  1011. DoubleOverflow, FloatProxyParseOverflowDoubleTest,
  1012. ::testing::ValuesIn(std::vector<OverflowParseCase<double>>({
  1013. {"0", true, 0.0},
  1014. {"0.0", true, 0.0},
  1015. {"1.0", true, 1.0},
  1016. {"1e38", true, 1e38},
  1017. {"-1e38", true, -1e38},
  1018. {"1e40", true, 1e40},
  1019. {"-1e40", true, -1e40},
  1020. {"1e400", false, DBL_MAX},
  1021. {"-1e400", false, -DBL_MAX},
  1022. })),);
  1023. using FloatProxyParseOverflowFloat16Test =
  1024. ::testing::TestWithParam<OverflowParseCase<uint16_t>>;
  1025. TEST_P(FloatProxyParseOverflowFloat16Test, Sample) {
  1026. std::istringstream input(GetParam().input);
  1027. HexFloat<FloatProxy<Float16>> value(0);
  1028. input >> value;
  1029. EXPECT_NE(GetParam().expect_success, input.fail()) << " literal: "
  1030. << GetParam().input;
  1031. if (GetParam().expect_success) {
  1032. EXPECT_THAT(value.value().data(), Eq(GetParam().expected_value))
  1033. << " literal: " << GetParam().input;
  1034. }
  1035. }
  1036. INSTANTIATE_TEST_CASE_P(
  1037. Float16Overflow, FloatProxyParseOverflowFloat16Test,
  1038. ::testing::ValuesIn(std::vector<OverflowParseCase<uint16_t>>({
  1039. {"0", true, uint16_t{0}},
  1040. {"0.0", true, uint16_t{0}},
  1041. {"1.0", true, uint16_t{0x3c00}},
  1042. // Overflow for 16-bit float is an error, and returns max or
  1043. // lowest value.
  1044. {"1e38", false, uint16_t{0x7bff}},
  1045. {"1e40", false, uint16_t{0x7bff}},
  1046. {"1e400", false, uint16_t{0x7bff}},
  1047. {"-1e38", false, uint16_t{0xfbff}},
  1048. {"-1e40", false, uint16_t{0xfbff}},
  1049. {"-1e400", false, uint16_t{0xfbff}},
  1050. })),);
  1051. TEST(FloatProxy, Max) {
  1052. EXPECT_THAT(FloatProxy<Float16>::max().getAsFloat().get_value(),
  1053. Eq(uint16_t{0x7bff}));
  1054. EXPECT_THAT(FloatProxy<float>::max().getAsFloat(),
  1055. Eq(std::numeric_limits<float>::max()));
  1056. EXPECT_THAT(FloatProxy<double>::max().getAsFloat(),
  1057. Eq(std::numeric_limits<double>::max()));
  1058. }
  1059. TEST(FloatProxy, Lowest) {
  1060. EXPECT_THAT(FloatProxy<Float16>::lowest().getAsFloat().get_value(),
  1061. Eq(uint16_t{0xfbff}));
  1062. EXPECT_THAT(FloatProxy<float>::lowest().getAsFloat(),
  1063. Eq(std::numeric_limits<float>::lowest()));
  1064. EXPECT_THAT(FloatProxy<double>::lowest().getAsFloat(),
  1065. Eq(std::numeric_limits<double>::lowest()));
  1066. }
  1067. // TODO(awoloszyn): Add fp16 tests and HexFloatTraits.
  1068. } // anonymous namespace