sdivsi3.S 2.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136
  1. .global __sdivsi3
  2. .global __sdivsi3_1
  3. .global __sdivsi3_2
  4. .section .text..SHmedia32,"ax"
  5. .align 2
  6. /* inputs: r4,r5 */
  7. /* clobbered: r1,r18,r19,r20,r21,r25,tr0 */
  8. /* result in r0 */
  9. __sdivsi3:
  10. __sdivsi3_1:
  11. ptb __div_table,tr0
  12. gettr tr0,r20
  13. __sdivsi3_2:
  14. nsb r5, r1
  15. shlld r5, r1, r25 /* normalize; [-2 ..1, 1..2) in s2.62 */
  16. shari r25, 58, r21 /* extract 5(6) bit index (s2.4 with hole -1..1) */
  17. /* bubble */
  18. ldx.ub r20, r21, r19 /* u0.8 */
  19. shari r25, 32, r25 /* normalize to s2.30 */
  20. shlli r21, 1, r21
  21. muls.l r25, r19, r19 /* s2.38 */
  22. ldx.w r20, r21, r21 /* s2.14 */
  23. ptabs r18, tr0
  24. shari r19, 24, r19 /* truncate to s2.14 */
  25. sub r21, r19, r19 /* some 11 bit inverse in s1.14 */
  26. muls.l r19, r19, r21 /* u0.28 */
  27. sub r63, r1, r1
  28. addi r1, 92, r1
  29. muls.l r25, r21, r18 /* s2.58 */
  30. shlli r19, 45, r19 /* multiply by two and convert to s2.58 */
  31. /* bubble */
  32. sub r19, r18, r18
  33. shari r18, 28, r18 /* some 22 bit inverse in s1.30 */
  34. muls.l r18, r25, r0 /* s2.60 */
  35. muls.l r18, r4, r25 /* s32.30 */
  36. /* bubble */
  37. shari r0, 16, r19 /* s-16.44 */
  38. muls.l r19, r18, r19 /* s-16.74 */
  39. shari r25, 63, r0
  40. shari r4, 14, r18 /* s19.-14 */
  41. shari r19, 30, r19 /* s-16.44 */
  42. muls.l r19, r18, r19 /* s15.30 */
  43. xor r21, r0, r21 /* You could also use the constant 1 << 27. */
  44. add r21, r25, r21
  45. sub r21, r19, r21
  46. shard r21, r1, r21
  47. sub r21, r0, r0
  48. blink tr0, r63
  49. /* This table has been generated by divtab.c .
  50. Defects for bias -330:
  51. Max defect: 6.081536e-07 at -1.000000e+00
  52. Min defect: 2.849516e-08 at 1.030651e+00
  53. Max 2nd step defect: 9.606539e-12 at -1.000000e+00
  54. Min 2nd step defect: 0.000000e+00 at 0.000000e+00
  55. Defect at 1: 1.238659e-07
  56. Defect at -2: 1.061708e-07 */
  57. .balign 2
  58. .type __div_table,@object
  59. .size __div_table,128
  60. /* negative division constants */
  61. .word -16638
  62. .word -17135
  63. .word -17737
  64. .word -18433
  65. .word -19103
  66. .word -19751
  67. .word -20583
  68. .word -21383
  69. .word -22343
  70. .word -23353
  71. .word -24407
  72. .word -25582
  73. .word -26863
  74. .word -28382
  75. .word -29965
  76. .word -31800
  77. /* negative division factors */
  78. .byte 66
  79. .byte 70
  80. .byte 75
  81. .byte 81
  82. .byte 87
  83. .byte 93
  84. .byte 101
  85. .byte 109
  86. .byte 119
  87. .byte 130
  88. .byte 142
  89. .byte 156
  90. .byte 172
  91. .byte 192
  92. .byte 214
  93. .byte 241
  94. .skip 16
  95. .global __div_table
  96. __div_table:
  97. .skip 16
  98. /* positive division factors */
  99. .byte 241
  100. .byte 214
  101. .byte 192
  102. .byte 172
  103. .byte 156
  104. .byte 142
  105. .byte 130
  106. .byte 119
  107. .byte 109
  108. .byte 101
  109. .byte 93
  110. .byte 87
  111. .byte 81
  112. .byte 75
  113. .byte 70
  114. .byte 66
  115. /* positive division constants */
  116. .word 31801
  117. .word 29966
  118. .word 28383
  119. .word 26864
  120. .word 25583
  121. .word 24408
  122. .word 23354
  123. .word 22344
  124. .word 21384
  125. .word 20584
  126. .word 19752
  127. .word 19104
  128. .word 18434
  129. .word 17738
  130. .word 17136
  131. .word 16639