polynom_Xsig.S 3.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138
  1. /* SPDX-License-Identifier: GPL-2.0 */
  2. /*---------------------------------------------------------------------------+
  3. | polynomial_Xsig.S |
  4. | |
  5. | Fixed point arithmetic polynomial evaluation. |
  6. | |
  7. | Copyright (C) 1992,1993,1994,1995 |
  8. | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
  9. | Australia. E-mail billm@jacobi.maths.monash.edu.au |
  10. | |
  11. | Call from C as: |
  12. | void polynomial_Xsig(Xsig *accum, unsigned long long x, |
  13. | unsigned long long terms[], int n) |
  14. | |
  15. | Computes: |
  16. | terms[0] + (terms[1] + (terms[2] + ... + (terms[n-1]*x)*x)*x)*x) ... )*x |
  17. | and adds the result to the 12 byte Xsig. |
  18. | The terms[] are each 8 bytes, but all computation is performed to 12 byte |
  19. | precision. |
  20. | |
  21. | This function must be used carefully: most overflow of intermediate |
  22. | results is controlled, but overflow of the result is not. |
  23. | |
  24. +---------------------------------------------------------------------------*/
  25. .file "polynomial_Xsig.S"
  26. #include "fpu_emu.h"
  27. #define TERM_SIZE $8
  28. #define SUM_MS -20(%ebp) /* sum ms long */
  29. #define SUM_MIDDLE -24(%ebp) /* sum middle long */
  30. #define SUM_LS -28(%ebp) /* sum ls long */
  31. #define ACCUM_MS -4(%ebp) /* accum ms long */
  32. #define ACCUM_MIDDLE -8(%ebp) /* accum middle long */
  33. #define ACCUM_LS -12(%ebp) /* accum ls long */
  34. #define OVERFLOWED -16(%ebp) /* addition overflow flag */
  35. .text
  36. ENTRY(polynomial_Xsig)
  37. pushl %ebp
  38. movl %esp,%ebp
  39. subl $32,%esp
  40. pushl %esi
  41. pushl %edi
  42. pushl %ebx
  43. movl PARAM2,%esi /* x */
  44. movl PARAM3,%edi /* terms */
  45. movl TERM_SIZE,%eax
  46. mull PARAM4 /* n */
  47. addl %eax,%edi
  48. movl 4(%edi),%edx /* terms[n] */
  49. movl %edx,SUM_MS
  50. movl (%edi),%edx /* terms[n] */
  51. movl %edx,SUM_MIDDLE
  52. xor %eax,%eax
  53. movl %eax,SUM_LS
  54. movb %al,OVERFLOWED
  55. subl TERM_SIZE,%edi
  56. decl PARAM4
  57. js L_accum_done
  58. L_accum_loop:
  59. xor %eax,%eax
  60. movl %eax,ACCUM_MS
  61. movl %eax,ACCUM_MIDDLE
  62. movl SUM_MIDDLE,%eax
  63. mull (%esi) /* x ls long */
  64. movl %edx,ACCUM_LS
  65. movl SUM_MIDDLE,%eax
  66. mull 4(%esi) /* x ms long */
  67. addl %eax,ACCUM_LS
  68. adcl %edx,ACCUM_MIDDLE
  69. adcl $0,ACCUM_MS
  70. movl SUM_MS,%eax
  71. mull (%esi) /* x ls long */
  72. addl %eax,ACCUM_LS
  73. adcl %edx,ACCUM_MIDDLE
  74. adcl $0,ACCUM_MS
  75. movl SUM_MS,%eax
  76. mull 4(%esi) /* x ms long */
  77. addl %eax,ACCUM_MIDDLE
  78. adcl %edx,ACCUM_MS
  79. testb $0xff,OVERFLOWED
  80. jz L_no_overflow
  81. movl (%esi),%eax
  82. addl %eax,ACCUM_MIDDLE
  83. movl 4(%esi),%eax
  84. adcl %eax,ACCUM_MS /* This could overflow too */
  85. L_no_overflow:
  86. /*
  87. * Now put the sum of next term and the accumulator
  88. * into the sum register
  89. */
  90. movl ACCUM_LS,%eax
  91. addl (%edi),%eax /* term ls long */
  92. movl %eax,SUM_LS
  93. movl ACCUM_MIDDLE,%eax
  94. adcl (%edi),%eax /* term ls long */
  95. movl %eax,SUM_MIDDLE
  96. movl ACCUM_MS,%eax
  97. adcl 4(%edi),%eax /* term ms long */
  98. movl %eax,SUM_MS
  99. sbbb %al,%al
  100. movb %al,OVERFLOWED /* Used in the next iteration */
  101. subl TERM_SIZE,%edi
  102. decl PARAM4
  103. jns L_accum_loop
  104. L_accum_done:
  105. movl PARAM1,%edi /* accum */
  106. movl SUM_LS,%eax
  107. addl %eax,(%edi)
  108. movl SUM_MIDDLE,%eax
  109. adcl %eax,4(%edi)
  110. movl SUM_MS,%eax
  111. adcl %eax,8(%edi)
  112. popl %ebx
  113. popl %edi
  114. popl %esi
  115. leave
  116. ret
  117. ENDPROC(polynomial_Xsig)