xor.S 3.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185
  1. /*
  2. * arch/ia64/lib/xor.S
  3. *
  4. * Optimized RAID-5 checksumming functions for IA-64.
  5. *
  6. * This program is free software; you can redistribute it and/or modify
  7. * it under the terms of the GNU General Public License as published by
  8. * the Free Software Foundation; either version 2, or (at your option)
  9. * any later version.
  10. *
  11. * You should have received a copy of the GNU General Public License
  12. * (for example /usr/src/linux/COPYING); if not, write to the Free
  13. * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  14. */
  15. #include <asm/asmmacro.h>
  16. GLOBAL_ENTRY(xor_ia64_2)
  17. .prologue
  18. .fframe 0
  19. .save ar.pfs, r31
  20. alloc r31 = ar.pfs, 3, 0, 13, 16
  21. .save ar.lc, r30
  22. mov r30 = ar.lc
  23. .save pr, r29
  24. mov r29 = pr
  25. ;;
  26. .body
  27. mov r8 = in1
  28. mov ar.ec = 6 + 2
  29. shr in0 = in0, 3
  30. ;;
  31. adds in0 = -1, in0
  32. mov r16 = in1
  33. mov r17 = in2
  34. ;;
  35. mov ar.lc = in0
  36. mov pr.rot = 1 << 16
  37. ;;
  38. .rotr s1[6+1], s2[6+1], d[2]
  39. .rotp p[6+2]
  40. 0:
  41. (p[0]) ld8.nta s1[0] = [r16], 8
  42. (p[0]) ld8.nta s2[0] = [r17], 8
  43. (p[6]) xor d[0] = s1[6], s2[6]
  44. (p[6+1])st8.nta [r8] = d[1], 8
  45. nop.f 0
  46. br.ctop.dptk.few 0b
  47. ;;
  48. mov ar.lc = r30
  49. mov pr = r29, -1
  50. br.ret.sptk.few rp
  51. END(xor_ia64_2)
  52. GLOBAL_ENTRY(xor_ia64_3)
  53. .prologue
  54. .fframe 0
  55. .save ar.pfs, r31
  56. alloc r31 = ar.pfs, 4, 0, 20, 24
  57. .save ar.lc, r30
  58. mov r30 = ar.lc
  59. .save pr, r29
  60. mov r29 = pr
  61. ;;
  62. .body
  63. mov r8 = in1
  64. mov ar.ec = 6 + 2
  65. shr in0 = in0, 3
  66. ;;
  67. adds in0 = -1, in0
  68. mov r16 = in1
  69. mov r17 = in2
  70. ;;
  71. mov r18 = in3
  72. mov ar.lc = in0
  73. mov pr.rot = 1 << 16
  74. ;;
  75. .rotr s1[6+1], s2[6+1], s3[6+1], d[2]
  76. .rotp p[6+2]
  77. 0:
  78. (p[0]) ld8.nta s1[0] = [r16], 8
  79. (p[0]) ld8.nta s2[0] = [r17], 8
  80. (p[6]) xor d[0] = s1[6], s2[6]
  81. ;;
  82. (p[0]) ld8.nta s3[0] = [r18], 8
  83. (p[6+1])st8.nta [r8] = d[1], 8
  84. (p[6]) xor d[0] = d[0], s3[6]
  85. br.ctop.dptk.few 0b
  86. ;;
  87. mov ar.lc = r30
  88. mov pr = r29, -1
  89. br.ret.sptk.few rp
  90. END(xor_ia64_3)
  91. GLOBAL_ENTRY(xor_ia64_4)
  92. .prologue
  93. .fframe 0
  94. .save ar.pfs, r31
  95. alloc r31 = ar.pfs, 5, 0, 27, 32
  96. .save ar.lc, r30
  97. mov r30 = ar.lc
  98. .save pr, r29
  99. mov r29 = pr
  100. ;;
  101. .body
  102. mov r8 = in1
  103. mov ar.ec = 6 + 2
  104. shr in0 = in0, 3
  105. ;;
  106. adds in0 = -1, in0
  107. mov r16 = in1
  108. mov r17 = in2
  109. ;;
  110. mov r18 = in3
  111. mov ar.lc = in0
  112. mov pr.rot = 1 << 16
  113. mov r19 = in4
  114. ;;
  115. .rotr s1[6+1], s2[6+1], s3[6+1], s4[6+1], d[2]
  116. .rotp p[6+2]
  117. 0:
  118. (p[0]) ld8.nta s1[0] = [r16], 8
  119. (p[0]) ld8.nta s2[0] = [r17], 8
  120. (p[6]) xor d[0] = s1[6], s2[6]
  121. (p[0]) ld8.nta s3[0] = [r18], 8
  122. (p[0]) ld8.nta s4[0] = [r19], 8
  123. (p[6]) xor r20 = s3[6], s4[6]
  124. ;;
  125. (p[6+1])st8.nta [r8] = d[1], 8
  126. (p[6]) xor d[0] = d[0], r20
  127. br.ctop.dptk.few 0b
  128. ;;
  129. mov ar.lc = r30
  130. mov pr = r29, -1
  131. br.ret.sptk.few rp
  132. END(xor_ia64_4)
  133. GLOBAL_ENTRY(xor_ia64_5)
  134. .prologue
  135. .fframe 0
  136. .save ar.pfs, r31
  137. alloc r31 = ar.pfs, 6, 0, 34, 40
  138. .save ar.lc, r30
  139. mov r30 = ar.lc
  140. .save pr, r29
  141. mov r29 = pr
  142. ;;
  143. .body
  144. mov r8 = in1
  145. mov ar.ec = 6 + 2
  146. shr in0 = in0, 3
  147. ;;
  148. adds in0 = -1, in0
  149. mov r16 = in1
  150. mov r17 = in2
  151. ;;
  152. mov r18 = in3
  153. mov ar.lc = in0
  154. mov pr.rot = 1 << 16
  155. mov r19 = in4
  156. mov r20 = in5
  157. ;;
  158. .rotr s1[6+1], s2[6+1], s3[6+1], s4[6+1], s5[6+1], d[2]
  159. .rotp p[6+2]
  160. 0:
  161. (p[0]) ld8.nta s1[0] = [r16], 8
  162. (p[0]) ld8.nta s2[0] = [r17], 8
  163. (p[6]) xor d[0] = s1[6], s2[6]
  164. (p[0]) ld8.nta s3[0] = [r18], 8
  165. (p[0]) ld8.nta s4[0] = [r19], 8
  166. (p[6]) xor r21 = s3[6], s4[6]
  167. ;;
  168. (p[0]) ld8.nta s5[0] = [r20], 8
  169. (p[6+1])st8.nta [r8] = d[1], 8
  170. (p[6]) xor d[0] = d[0], r21
  171. ;;
  172. (p[6]) xor d[0] = d[0], s5[6]
  173. nop.f 0
  174. br.ctop.dptk.few 0b
  175. ;;
  176. mov ar.lc = r30
  177. mov pr = r29, -1
  178. br.ret.sptk.few rp
  179. END(xor_ia64_5)