rsqrt_test.cpp 4.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153
  1. /* This file is part of the dynarmic project.
  2. * Copyright (c) 2021 MerryMage
  3. * SPDX-License-Identifier: 0BSD
  4. */
  5. #include <catch2/benchmark/catch_benchmark.hpp>
  6. #include <catch2/catch_test_macros.hpp>
  7. #include <fmt/printf.h>
  8. #include <mcl/stdint.hpp>
  9. #include "dynarmic/common/fp/fpcr.h"
  10. #include "dynarmic/common/fp/fpsr.h"
  11. #include "dynarmic/common/fp/op/FPRSqrtEstimate.h"
  12. extern "C" u32 rsqrt_inaccurate(u32);
  13. extern "C" u32 rsqrt_full(u32);
  14. extern "C" u32 rsqrt_full_gpr(u32);
  15. extern "C" u32 rsqrt_full_nb(u32);
  16. extern "C" u32 rsqrt_full_nb2(u32);
  17. extern "C" u32 rsqrt_full_nb_gpr(u32);
  18. extern "C" u32 rsqrt_newton(u32);
  19. extern "C" u32 rsqrt_hack(u32);
  20. using namespace Dynarmic;
  21. extern "C" u32 rsqrt_fallback(u32 value) {
  22. FP::FPCR fpcr;
  23. FP::FPSR fpsr;
  24. return FP::FPRSqrtEstimate(value, fpcr, fpsr);
  25. }
  26. extern "C" u32 _rsqrt_fallback(u32 value) {
  27. return rsqrt_fallback(value);
  28. }
  29. void Test(u32 value) {
  30. FP::FPCR fpcr;
  31. FP::FPSR fpsr;
  32. const u32 expect = FP::FPRSqrtEstimate(value, fpcr, fpsr);
  33. const u32 full = rsqrt_full(value);
  34. const u32 full_gpr = rsqrt_full_gpr(value);
  35. const u32 newton = rsqrt_newton(value);
  36. const u32 hack = rsqrt_hack(value);
  37. if (expect != full || expect != full_gpr || expect != newton || expect != hack) {
  38. fmt::print("{:08x} = {:08x} : {:08x} : {:08x} : {:08x} : {:08x}\n", value, expect, full, full_gpr, newton, hack);
  39. REQUIRE(expect == full);
  40. REQUIRE(expect == full_gpr);
  41. REQUIRE(expect == newton);
  42. REQUIRE(expect == hack);
  43. }
  44. }
  45. TEST_CASE("RSqrt Tests", "[fp][.]") {
  46. Test(0x00000000);
  47. Test(0x80000000);
  48. Test(0x7f8b7201);
  49. Test(0x7f800000);
  50. Test(0x7fc00000);
  51. Test(0xff800000);
  52. Test(0xffc00000);
  53. Test(0xff800001);
  54. for (u64 i = 0; i < 0x1'0000'0000; i++) {
  55. const u32 value = static_cast<u32>(i);
  56. Test(value);
  57. }
  58. }
  59. TEST_CASE("Benchmark RSqrt", "[fp][.]") {
  60. BENCHMARK("Inaccurate") {
  61. u64 total = 0;
  62. for (u64 i = 0; i < 0x1'0000'0000; i += 0x1234) {
  63. const u32 value = static_cast<u32>(i);
  64. total += rsqrt_inaccurate(value);
  65. }
  66. return total;
  67. };
  68. BENCHMARK("Full divss") {
  69. u64 total = 0;
  70. for (u64 i = 0; i < 0x1'0000'0000; i += 0x1234) {
  71. const u32 value = static_cast<u32>(i);
  72. total += rsqrt_full(value);
  73. }
  74. return total;
  75. };
  76. BENCHMARK("Full divss (GPR)") {
  77. u64 total = 0;
  78. for (u64 i = 0; i < 0x1'0000'0000; i += 0x1234) {
  79. const u32 value = static_cast<u32>(i);
  80. total += rsqrt_full_gpr(value);
  81. }
  82. return total;
  83. };
  84. BENCHMARK("Full divss (NB)") {
  85. u64 total = 0;
  86. for (u64 i = 0; i < 0x1'0000'0000; i += 0x1234) {
  87. const u32 value = static_cast<u32>(i);
  88. total += rsqrt_full_nb(value);
  89. }
  90. return total;
  91. };
  92. BENCHMARK("Full divss (NB2)") {
  93. u64 total = 0;
  94. for (u64 i = 0; i < 0x1'0000'0000; i += 0x1234) {
  95. const u32 value = static_cast<u32>(i);
  96. total += rsqrt_full_nb2(value);
  97. }
  98. return total;
  99. };
  100. BENCHMARK("Full divss (NB + GPR)") {
  101. u64 total = 0;
  102. for (u64 i = 0; i < 0x1'0000'0000; i += 0x1234) {
  103. const u32 value = static_cast<u32>(i);
  104. total += rsqrt_full_nb_gpr(value);
  105. }
  106. return total;
  107. };
  108. BENCHMARK("One Newton iteration") {
  109. u64 total = 0;
  110. for (u64 i = 0; i < 0x1'0000'0000; i += 0x1234) {
  111. const u32 value = static_cast<u32>(i);
  112. total += rsqrt_newton(value);
  113. }
  114. return total;
  115. };
  116. BENCHMARK("Ugly Hack") {
  117. u64 total = 0;
  118. for (u64 i = 0; i < 0x1'0000'0000; i += 0x1234) {
  119. const u32 value = static_cast<u32>(i);
  120. total += rsqrt_hack(value);
  121. }
  122. return total;
  123. };
  124. BENCHMARK("Softfloat") {
  125. u64 total = 0;
  126. for (u64 i = 0; i < 0x1'0000'0000; i += 0x1234) {
  127. const u32 value = static_cast<u32>(i);
  128. total += rsqrt_fallback(value);
  129. }
  130. return total;
  131. };
  132. }