xor.S 3.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190
  1. /*
  2. * arch/ia64/lib/xor.S
  3. *
  4. * Optimized RAID-5 checksumming functions for IA-64.
  5. *
  6. * This program is free software; you can redistribute it and/or modify
  7. * it under the terms of the GNU General Public License as published by
  8. * the Free Software Foundation; either version 2, or (at your option)
  9. * any later version.
  10. *
  11. * You should have received a copy of the GNU General Public License
  12. * (for example /usr/src/linux/COPYING); if not, write to the Free
  13. * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  14. */
  15. #include <asm/asmmacro.h>
  16. #include <asm/export.h>
  17. GLOBAL_ENTRY(xor_ia64_2)
  18. .prologue
  19. .fframe 0
  20. .save ar.pfs, r31
  21. alloc r31 = ar.pfs, 3, 0, 13, 16
  22. .save ar.lc, r30
  23. mov r30 = ar.lc
  24. .save pr, r29
  25. mov r29 = pr
  26. ;;
  27. .body
  28. mov r8 = in1
  29. mov ar.ec = 6 + 2
  30. shr in0 = in0, 3
  31. ;;
  32. adds in0 = -1, in0
  33. mov r16 = in1
  34. mov r17 = in2
  35. ;;
  36. mov ar.lc = in0
  37. mov pr.rot = 1 << 16
  38. ;;
  39. .rotr s1[6+1], s2[6+1], d[2]
  40. .rotp p[6+2]
  41. 0:
  42. (p[0]) ld8.nta s1[0] = [r16], 8
  43. (p[0]) ld8.nta s2[0] = [r17], 8
  44. (p[6]) xor d[0] = s1[6], s2[6]
  45. (p[6+1])st8.nta [r8] = d[1], 8
  46. nop.f 0
  47. br.ctop.dptk.few 0b
  48. ;;
  49. mov ar.lc = r30
  50. mov pr = r29, -1
  51. br.ret.sptk.few rp
  52. END(xor_ia64_2)
  53. EXPORT_SYMBOL(xor_ia64_2)
  54. GLOBAL_ENTRY(xor_ia64_3)
  55. .prologue
  56. .fframe 0
  57. .save ar.pfs, r31
  58. alloc r31 = ar.pfs, 4, 0, 20, 24
  59. .save ar.lc, r30
  60. mov r30 = ar.lc
  61. .save pr, r29
  62. mov r29 = pr
  63. ;;
  64. .body
  65. mov r8 = in1
  66. mov ar.ec = 6 + 2
  67. shr in0 = in0, 3
  68. ;;
  69. adds in0 = -1, in0
  70. mov r16 = in1
  71. mov r17 = in2
  72. ;;
  73. mov r18 = in3
  74. mov ar.lc = in0
  75. mov pr.rot = 1 << 16
  76. ;;
  77. .rotr s1[6+1], s2[6+1], s3[6+1], d[2]
  78. .rotp p[6+2]
  79. 0:
  80. (p[0]) ld8.nta s1[0] = [r16], 8
  81. (p[0]) ld8.nta s2[0] = [r17], 8
  82. (p[6]) xor d[0] = s1[6], s2[6]
  83. ;;
  84. (p[0]) ld8.nta s3[0] = [r18], 8
  85. (p[6+1])st8.nta [r8] = d[1], 8
  86. (p[6]) xor d[0] = d[0], s3[6]
  87. br.ctop.dptk.few 0b
  88. ;;
  89. mov ar.lc = r30
  90. mov pr = r29, -1
  91. br.ret.sptk.few rp
  92. END(xor_ia64_3)
  93. EXPORT_SYMBOL(xor_ia64_3)
  94. GLOBAL_ENTRY(xor_ia64_4)
  95. .prologue
  96. .fframe 0
  97. .save ar.pfs, r31
  98. alloc r31 = ar.pfs, 5, 0, 27, 32
  99. .save ar.lc, r30
  100. mov r30 = ar.lc
  101. .save pr, r29
  102. mov r29 = pr
  103. ;;
  104. .body
  105. mov r8 = in1
  106. mov ar.ec = 6 + 2
  107. shr in0 = in0, 3
  108. ;;
  109. adds in0 = -1, in0
  110. mov r16 = in1
  111. mov r17 = in2
  112. ;;
  113. mov r18 = in3
  114. mov ar.lc = in0
  115. mov pr.rot = 1 << 16
  116. mov r19 = in4
  117. ;;
  118. .rotr s1[6+1], s2[6+1], s3[6+1], s4[6+1], d[2]
  119. .rotp p[6+2]
  120. 0:
  121. (p[0]) ld8.nta s1[0] = [r16], 8
  122. (p[0]) ld8.nta s2[0] = [r17], 8
  123. (p[6]) xor d[0] = s1[6], s2[6]
  124. (p[0]) ld8.nta s3[0] = [r18], 8
  125. (p[0]) ld8.nta s4[0] = [r19], 8
  126. (p[6]) xor r20 = s3[6], s4[6]
  127. ;;
  128. (p[6+1])st8.nta [r8] = d[1], 8
  129. (p[6]) xor d[0] = d[0], r20
  130. br.ctop.dptk.few 0b
  131. ;;
  132. mov ar.lc = r30
  133. mov pr = r29, -1
  134. br.ret.sptk.few rp
  135. END(xor_ia64_4)
  136. EXPORT_SYMBOL(xor_ia64_4)
  137. GLOBAL_ENTRY(xor_ia64_5)
  138. .prologue
  139. .fframe 0
  140. .save ar.pfs, r31
  141. alloc r31 = ar.pfs, 6, 0, 34, 40
  142. .save ar.lc, r30
  143. mov r30 = ar.lc
  144. .save pr, r29
  145. mov r29 = pr
  146. ;;
  147. .body
  148. mov r8 = in1
  149. mov ar.ec = 6 + 2
  150. shr in0 = in0, 3
  151. ;;
  152. adds in0 = -1, in0
  153. mov r16 = in1
  154. mov r17 = in2
  155. ;;
  156. mov r18 = in3
  157. mov ar.lc = in0
  158. mov pr.rot = 1 << 16
  159. mov r19 = in4
  160. mov r20 = in5
  161. ;;
  162. .rotr s1[6+1], s2[6+1], s3[6+1], s4[6+1], s5[6+1], d[2]
  163. .rotp p[6+2]
  164. 0:
  165. (p[0]) ld8.nta s1[0] = [r16], 8
  166. (p[0]) ld8.nta s2[0] = [r17], 8
  167. (p[6]) xor d[0] = s1[6], s2[6]
  168. (p[0]) ld8.nta s3[0] = [r18], 8
  169. (p[0]) ld8.nta s4[0] = [r19], 8
  170. (p[6]) xor r21 = s3[6], s4[6]
  171. ;;
  172. (p[0]) ld8.nta s5[0] = [r20], 8
  173. (p[6+1])st8.nta [r8] = d[1], 8
  174. (p[6]) xor d[0] = d[0], r21
  175. ;;
  176. (p[6]) xor d[0] = d[0], s5[6]
  177. nop.f 0
  178. br.ctop.dptk.few 0b
  179. ;;
  180. mov ar.lc = r30
  181. mov pr = r29, -1
  182. br.ret.sptk.few rp
  183. END(xor_ia64_5)
  184. EXPORT_SYMBOL(xor_ia64_5)