softfloat64.go 9.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499
  1. // Copyright 2010 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. // Software IEEE754 64-bit floating point.
  5. // Only referred to (and thus linked in) by arm port
  6. // and by tests in this directory.
  7. package runtime
  8. const (
  9. mantbits64 uint = 52
  10. expbits64 uint = 11
  11. bias64 = -1<<(expbits64-1) + 1
  12. nan64 uint64 = (1<<expbits64-1)<<mantbits64 + 1
  13. inf64 uint64 = (1<<expbits64 - 1) << mantbits64
  14. neg64 uint64 = 1 << (expbits64 + mantbits64)
  15. mantbits32 uint = 23
  16. expbits32 uint = 8
  17. bias32 = -1<<(expbits32-1) + 1
  18. nan32 uint32 = (1<<expbits32-1)<<mantbits32 + 1
  19. inf32 uint32 = (1<<expbits32 - 1) << mantbits32
  20. neg32 uint32 = 1 << (expbits32 + mantbits32)
  21. )
  22. func funpack64(f uint64) (sign, mant uint64, exp int, inf, nan bool) {
  23. sign = f & (1 << (mantbits64 + expbits64))
  24. mant = f & (1<<mantbits64 - 1)
  25. exp = int(f>>mantbits64) & (1<<expbits64 - 1)
  26. switch exp {
  27. case 1<<expbits64 - 1:
  28. if mant != 0 {
  29. nan = true
  30. return
  31. }
  32. inf = true
  33. return
  34. case 0:
  35. // denormalized
  36. if mant != 0 {
  37. exp += bias64 + 1
  38. for mant < 1<<mantbits64 {
  39. mant <<= 1
  40. exp--
  41. }
  42. }
  43. default:
  44. // add implicit top bit
  45. mant |= 1 << mantbits64
  46. exp += bias64
  47. }
  48. return
  49. }
  50. func funpack32(f uint32) (sign, mant uint32, exp int, inf, nan bool) {
  51. sign = f & (1 << (mantbits32 + expbits32))
  52. mant = f & (1<<mantbits32 - 1)
  53. exp = int(f>>mantbits32) & (1<<expbits32 - 1)
  54. switch exp {
  55. case 1<<expbits32 - 1:
  56. if mant != 0 {
  57. nan = true
  58. return
  59. }
  60. inf = true
  61. return
  62. case 0:
  63. // denormalized
  64. if mant != 0 {
  65. exp += bias32 + 1
  66. for mant < 1<<mantbits32 {
  67. mant <<= 1
  68. exp--
  69. }
  70. }
  71. default:
  72. // add implicit top bit
  73. mant |= 1 << mantbits32
  74. exp += bias32
  75. }
  76. return
  77. }
  78. func fpack64(sign, mant uint64, exp int, trunc uint64) uint64 {
  79. mant0, exp0, trunc0 := mant, exp, trunc
  80. if mant == 0 {
  81. return sign
  82. }
  83. for mant < 1<<mantbits64 {
  84. mant <<= 1
  85. exp--
  86. }
  87. for mant >= 4<<mantbits64 {
  88. trunc |= mant & 1
  89. mant >>= 1
  90. exp++
  91. }
  92. if mant >= 2<<mantbits64 {
  93. if mant&1 != 0 && (trunc != 0 || mant&2 != 0) {
  94. mant++
  95. if mant >= 4<<mantbits64 {
  96. mant >>= 1
  97. exp++
  98. }
  99. }
  100. mant >>= 1
  101. exp++
  102. }
  103. if exp >= 1<<expbits64-1+bias64 {
  104. return sign ^ inf64
  105. }
  106. if exp < bias64+1 {
  107. if exp < bias64-int(mantbits64) {
  108. return sign | 0
  109. }
  110. // repeat expecting denormal
  111. mant, exp, trunc = mant0, exp0, trunc0
  112. for exp < bias64 {
  113. trunc |= mant & 1
  114. mant >>= 1
  115. exp++
  116. }
  117. if mant&1 != 0 && (trunc != 0 || mant&2 != 0) {
  118. mant++
  119. }
  120. mant >>= 1
  121. exp++
  122. if mant < 1<<mantbits64 {
  123. return sign | mant
  124. }
  125. }
  126. return sign | uint64(exp-bias64)<<mantbits64 | mant&(1<<mantbits64-1)
  127. }
  128. func fpack32(sign, mant uint32, exp int, trunc uint32) uint32 {
  129. mant0, exp0, trunc0 := mant, exp, trunc
  130. if mant == 0 {
  131. return sign
  132. }
  133. for mant < 1<<mantbits32 {
  134. mant <<= 1
  135. exp--
  136. }
  137. for mant >= 4<<mantbits32 {
  138. trunc |= mant & 1
  139. mant >>= 1
  140. exp++
  141. }
  142. if mant >= 2<<mantbits32 {
  143. if mant&1 != 0 && (trunc != 0 || mant&2 != 0) {
  144. mant++
  145. if mant >= 4<<mantbits32 {
  146. mant >>= 1
  147. exp++
  148. }
  149. }
  150. mant >>= 1
  151. exp++
  152. }
  153. if exp >= 1<<expbits32-1+bias32 {
  154. return sign ^ inf32
  155. }
  156. if exp < bias32+1 {
  157. if exp < bias32-int(mantbits32) {
  158. return sign | 0
  159. }
  160. // repeat expecting denormal
  161. mant, exp, trunc = mant0, exp0, trunc0
  162. for exp < bias32 {
  163. trunc |= mant & 1
  164. mant >>= 1
  165. exp++
  166. }
  167. if mant&1 != 0 && (trunc != 0 || mant&2 != 0) {
  168. mant++
  169. }
  170. mant >>= 1
  171. exp++
  172. if mant < 1<<mantbits32 {
  173. return sign | mant
  174. }
  175. }
  176. return sign | uint32(exp-bias32)<<mantbits32 | mant&(1<<mantbits32-1)
  177. }
  178. func fadd64(f, g uint64) uint64 {
  179. fs, fm, fe, fi, fn := funpack64(f)
  180. gs, gm, ge, gi, gn := funpack64(g)
  181. // Special cases.
  182. switch {
  183. case fn || gn: // NaN + x or x + NaN = NaN
  184. return nan64
  185. case fi && gi && fs != gs: // +Inf + -Inf or -Inf + +Inf = NaN
  186. return nan64
  187. case fi: // ±Inf + g = ±Inf
  188. return f
  189. case gi: // f + ±Inf = ±Inf
  190. return g
  191. case fm == 0 && gm == 0 && fs != 0 && gs != 0: // -0 + -0 = -0
  192. return f
  193. case fm == 0: // 0 + g = g but 0 + -0 = +0
  194. if gm == 0 {
  195. g ^= gs
  196. }
  197. return g
  198. case gm == 0: // f + 0 = f
  199. return f
  200. }
  201. if fe < ge || fe == ge && fm < gm {
  202. f, g, fs, fm, fe, gs, gm, ge = g, f, gs, gm, ge, fs, fm, fe
  203. }
  204. shift := uint(fe - ge)
  205. fm <<= 2
  206. gm <<= 2
  207. trunc := gm & (1<<shift - 1)
  208. gm >>= shift
  209. if fs == gs {
  210. fm += gm
  211. } else {
  212. fm -= gm
  213. if trunc != 0 {
  214. fm--
  215. }
  216. }
  217. if fm == 0 {
  218. fs = 0
  219. }
  220. return fpack64(fs, fm, fe-2, trunc)
  221. }
  222. func fsub64(f, g uint64) uint64 {
  223. return fadd64(f, fneg64(g))
  224. }
  225. func fneg64(f uint64) uint64 {
  226. return f ^ (1 << (mantbits64 + expbits64))
  227. }
  228. func fmul64(f, g uint64) uint64 {
  229. fs, fm, fe, fi, fn := funpack64(f)
  230. gs, gm, ge, gi, gn := funpack64(g)
  231. // Special cases.
  232. switch {
  233. case fn || gn: // NaN * g or f * NaN = NaN
  234. return nan64
  235. case fi && gi: // Inf * Inf = Inf (with sign adjusted)
  236. return f ^ gs
  237. case fi && gm == 0, fm == 0 && gi: // 0 * Inf = Inf * 0 = NaN
  238. return nan64
  239. case fm == 0: // 0 * x = 0 (with sign adjusted)
  240. return f ^ gs
  241. case gm == 0: // x * 0 = 0 (with sign adjusted)
  242. return g ^ fs
  243. }
  244. // 53-bit * 53-bit = 107- or 108-bit
  245. lo, hi := mullu(fm, gm)
  246. shift := mantbits64 - 1
  247. trunc := lo & (1<<shift - 1)
  248. mant := hi<<(64-shift) | lo>>shift
  249. return fpack64(fs^gs, mant, fe+ge-1, trunc)
  250. }
  251. func fdiv64(f, g uint64) uint64 {
  252. fs, fm, fe, fi, fn := funpack64(f)
  253. gs, gm, ge, gi, gn := funpack64(g)
  254. // Special cases.
  255. switch {
  256. case fn || gn: // NaN / g = f / NaN = NaN
  257. return nan64
  258. case fi && gi: // ±Inf / ±Inf = NaN
  259. return nan64
  260. case !fi && !gi && fm == 0 && gm == 0: // 0 / 0 = NaN
  261. return nan64
  262. case fi, !gi && gm == 0: // Inf / g = f / 0 = Inf
  263. return fs ^ gs ^ inf64
  264. case gi, fm == 0: // f / Inf = 0 / g = Inf
  265. return fs ^ gs ^ 0
  266. }
  267. _, _, _, _ = fi, fn, gi, gn
  268. // 53-bit<<54 / 53-bit = 53- or 54-bit.
  269. shift := mantbits64 + 2
  270. q, r := divlu(fm>>(64-shift), fm<<shift, gm)
  271. return fpack64(fs^gs, q, fe-ge-2, r)
  272. }
  273. func f64to32(f uint64) uint32 {
  274. fs, fm, fe, fi, fn := funpack64(f)
  275. if fn {
  276. return nan32
  277. }
  278. fs32 := uint32(fs >> 32)
  279. if fi {
  280. return fs32 ^ inf32
  281. }
  282. const d = mantbits64 - mantbits32 - 1
  283. return fpack32(fs32, uint32(fm>>d), fe-1, uint32(fm&(1<<d-1)))
  284. }
  285. func f32to64(f uint32) uint64 {
  286. const d = mantbits64 - mantbits32
  287. fs, fm, fe, fi, fn := funpack32(f)
  288. if fn {
  289. return nan64
  290. }
  291. fs64 := uint64(fs) << 32
  292. if fi {
  293. return fs64 ^ inf64
  294. }
  295. return fpack64(fs64, uint64(fm)<<d, fe, 0)
  296. }
  297. func fcmp64(f, g uint64) (cmp int, isnan bool) {
  298. fs, fm, _, fi, fn := funpack64(f)
  299. gs, gm, _, gi, gn := funpack64(g)
  300. switch {
  301. case fn, gn: // flag NaN
  302. return 0, true
  303. case !fi && !gi && fm == 0 && gm == 0: // ±0 == ±0
  304. return 0, false
  305. case fs > gs: // f < 0, g > 0
  306. return -1, false
  307. case fs < gs: // f > 0, g < 0
  308. return +1, false
  309. // Same sign, not NaN.
  310. // Can compare encodings directly now.
  311. // Reverse for sign.
  312. case fs == 0 && f < g, fs != 0 && f > g:
  313. return -1, false
  314. case fs == 0 && f > g, fs != 0 && f < g:
  315. return +1, false
  316. }
  317. // f == g
  318. return 0, false
  319. }
  320. func f64toint(f uint64) (val int64, ok bool) {
  321. fs, fm, fe, fi, fn := funpack64(f)
  322. switch {
  323. case fi, fn: // NaN
  324. return 0, false
  325. case fe < -1: // f < 0.5
  326. return 0, false
  327. case fe > 63: // f >= 2^63
  328. if fs != 0 && fm == 0 { // f == -2^63
  329. return -1 << 63, true
  330. }
  331. if fs != 0 {
  332. return 0, false
  333. }
  334. return 0, false
  335. }
  336. for fe > int(mantbits64) {
  337. fe--
  338. fm <<= 1
  339. }
  340. for fe < int(mantbits64) {
  341. fe++
  342. fm >>= 1
  343. }
  344. val = int64(fm)
  345. if fs != 0 {
  346. val = -val
  347. }
  348. return val, true
  349. }
  350. func fintto64(val int64) (f uint64) {
  351. fs := uint64(val) & (1 << 63)
  352. mant := uint64(val)
  353. if fs != 0 {
  354. mant = -mant
  355. }
  356. return fpack64(fs, mant, int(mantbits64), 0)
  357. }
  358. // 64x64 -> 128 multiply.
  359. // adapted from hacker's delight.
  360. func mullu(u, v uint64) (lo, hi uint64) {
  361. const (
  362. s = 32
  363. mask = 1<<s - 1
  364. )
  365. u0 := u & mask
  366. u1 := u >> s
  367. v0 := v & mask
  368. v1 := v >> s
  369. w0 := u0 * v0
  370. t := u1*v0 + w0>>s
  371. w1 := t & mask
  372. w2 := t >> s
  373. w1 += u0 * v1
  374. return u * v, u1*v1 + w2 + w1>>s
  375. }
  376. // 128/64 -> 64 quotient, 64 remainder.
  377. // adapted from hacker's delight
  378. func divlu(u1, u0, v uint64) (q, r uint64) {
  379. const b = 1 << 32
  380. if u1 >= v {
  381. return 1<<64 - 1, 1<<64 - 1
  382. }
  383. // s = nlz(v); v <<= s
  384. s := uint(0)
  385. for v&(1<<63) == 0 {
  386. s++
  387. v <<= 1
  388. }
  389. vn1 := v >> 32
  390. vn0 := v & (1<<32 - 1)
  391. un32 := u1<<s | u0>>(64-s)
  392. un10 := u0 << s
  393. un1 := un10 >> 32
  394. un0 := un10 & (1<<32 - 1)
  395. q1 := un32 / vn1
  396. rhat := un32 - q1*vn1
  397. again1:
  398. if q1 >= b || q1*vn0 > b*rhat+un1 {
  399. q1--
  400. rhat += vn1
  401. if rhat < b {
  402. goto again1
  403. }
  404. }
  405. un21 := un32*b + un1 - q1*v
  406. q0 := un21 / vn1
  407. rhat = un21 - q0*vn1
  408. again2:
  409. if q0 >= b || q0*vn0 > b*rhat+un0 {
  410. q0--
  411. rhat += vn1
  412. if rhat < b {
  413. goto again2
  414. }
  415. }
  416. return q1*b + q0, (un21*b + un0 - q0*v) >> s
  417. }
  418. // callable from C
  419. func fadd64c(f, g uint64, ret *uint64) { *ret = fadd64(f, g) }
  420. func fsub64c(f, g uint64, ret *uint64) { *ret = fsub64(f, g) }
  421. func fmul64c(f, g uint64, ret *uint64) { *ret = fmul64(f, g) }
  422. func fdiv64c(f, g uint64, ret *uint64) { *ret = fdiv64(f, g) }
  423. func fneg64c(f uint64, ret *uint64) { *ret = fneg64(f) }
  424. func f32to64c(f uint32, ret *uint64) { *ret = f32to64(f) }
  425. func f64to32c(f uint64, ret *uint32) { *ret = f64to32(f) }
  426. func fcmp64c(f, g uint64, ret *int, retnan *bool) { *ret, *retnan = fcmp64(f, g) }
  427. func fintto64c(val int64, ret *uint64) { *ret = fintto64(val) }
  428. func f64tointc(f uint64, ret *int64, retok *bool) { *ret, *retok = f64toint(f) }