simde-bf16.h 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132
  1. /* SPDX-License-Identifier: MIT
  2. *
  3. * Permission is hereby granted, free of charge, to any person
  4. * obtaining a copy of this software and associated documentation
  5. * files (the "Software"), to deal in the Software without
  6. * restriction, including without limitation the rights to use, copy,
  7. * modify, merge, publish, distribute, sublicense, and/or sell copies
  8. * of the Software, and to permit persons to whom the Software is
  9. * furnished to do so, subject to the following conditions:
  10. *
  11. * The above copyright notice and this permission notice shall be
  12. * included in all copies or substantial portions of the Software.
  13. *
  14. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  15. * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  16. * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  17. * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  18. * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  19. * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  20. * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  21. * SOFTWARE.
  22. *
  23. * Copyright:
  24. * 2023 Yi-Yen Chung <eric681@andestech.com> (Copyright owned by Andes Technology)
  25. */
  26. #include "hedley.h"
  27. #include "simde-common.h"
  28. #include "simde-detect-clang.h"
  29. #if !defined(SIMDE_BFLOAT16_H)
  30. #define SIMDE_BFLOAT16_H
  31. HEDLEY_DIAGNOSTIC_PUSH
  32. SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
  33. SIMDE_BEGIN_DECLS_
  34. /* This implementations is based upon simde-f16.h */
  35. /* Portable version which should work on pretty much any compiler.
  36. * Obviously you can't rely on compiler support for things like
  37. * conversion to/from 32-bit floats, so make sure you always use the
  38. * functions and macros in this file!
  39. */
  40. #define SIMDE_BFLOAT16_API_PORTABLE 1
  41. #define SIMDE_BFLOAT16_API_BF16 2
  42. #if !defined(SIMDE_BFLOAT16_API)
  43. #if defined(SIMDE_ARM_NEON_BF16)
  44. #define SIMDE_BFLOAT16_API SIMDE_BFLOAT16_API_BF16
  45. #else
  46. #define SIMDE_BFLOAT16_API SIMDE_BFLOAT16_API_PORTABLE
  47. #endif
  48. #endif
  49. #if SIMDE_BFLOAT16_API == SIMDE_BFLOAT16_API_BF16
  50. #include <arm_bf16.h>
  51. typedef __bf16 simde_bfloat16;
  52. #elif SIMDE_BFLOAT16_API == SIMDE_BFLOAT16_API_PORTABLE
  53. typedef struct { uint16_t value; } simde_bfloat16;
  54. #else
  55. #error No 16-bit floating point API.
  56. #endif
  57. /* Conversion -- convert between single-precision and brain half-precision
  58. * floats. */
  59. static HEDLEY_ALWAYS_INLINE HEDLEY_CONST
  60. simde_bfloat16
  61. simde_bfloat16_from_float32 (simde_float32 value) {
  62. #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16)
  63. return vcvth_bf16_f32(value);
  64. #else
  65. simde_bfloat16 res;
  66. char* src = HEDLEY_REINTERPRET_CAST(char*, &value);
  67. // rounding to nearest bfloat16
  68. // If the 17th bit of value is 1, set the rounding to 1.
  69. uint8_t rounding = 0;
  70. #if SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE
  71. if (src[1] & UINT8_C(0x80)) rounding = 1;
  72. src[2] = HEDLEY_STATIC_CAST(char, (HEDLEY_STATIC_CAST(uint8_t, src[2]) + rounding));
  73. simde_memcpy(&res, src+2, sizeof(res));
  74. #else
  75. if (src[2] & UINT8_C(0x80)) rounding = 1;
  76. src[1] = HEDLEY_STATIC_CAST(char, (HEDLEY_STATIC_CAST(uint8_t, src[1]) + rounding));
  77. simde_memcpy(&res, src, sizeof(res));
  78. #endif
  79. return res;
  80. #endif
  81. }
  82. static HEDLEY_ALWAYS_INLINE HEDLEY_CONST
  83. simde_float32
  84. simde_bfloat16_to_float32 (simde_bfloat16 value) {
  85. #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16)
  86. return vcvtah_f32_bf16(value);
  87. #else
  88. simde_float32 res = 0.0;
  89. char* _res = HEDLEY_REINTERPRET_CAST(char*, &res);
  90. #if SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE
  91. simde_memcpy(_res+2, &value, sizeof(value));
  92. #else
  93. simde_memcpy(_res, &value, sizeof(value));
  94. #endif
  95. return res;
  96. #endif
  97. }
  98. SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint16_as_bfloat16, simde_bfloat16, uint16_t)
  99. #define SIMDE_NANBF simde_uint16_as_bfloat16(0xFFC1) // a quiet Not-a-Number
  100. #define SIMDE_INFINITYBF simde_uint16_as_bfloat16(0x7F80)
  101. #define SIMDE_NINFINITYBF simde_uint16_as_bfloat16(0xFF80)
  102. #define SIMDE_BFLOAT16_VALUE(value) simde_bfloat16_from_float32(SIMDE_FLOAT32_C(value))
  103. #if !defined(simde_isinfbf) && defined(simde_math_isinff)
  104. #define simde_isinfbf(a) simde_math_isinff(simde_bfloat16_to_float32(a))
  105. #endif
  106. #if !defined(simde_isnanbf) && defined(simde_math_isnanf)
  107. #define simde_isnanbf(a) simde_math_isnanf(simde_bfloat16_to_float32(a))
  108. #endif
  109. SIMDE_END_DECLS_
  110. HEDLEY_DIAGNOSTIC_POP
  111. #endif /* !defined(SIMDE_BFLOAT16_H) */