memcmp_64.S 2.9 KB


  1. /*
  2. * Author: Anton Blanchard <anton@au.ibm.com>
  3. * Copyright 2015 IBM Corporation.
  4. *
  5. * This program is free software; you can redistribute it and/or
  6. * modify it under the terms of the GNU General Public License
  7. * as published by the Free Software Foundation; either version
  8. * 2 of the License, or (at your option) any later version.
  9. */
  10. #include <asm/ppc_asm.h>
  11. #include <asm/export.h>
  12. #define off8 r6
  13. #define off16 r7
  14. #define off24 r8
  15. #define rA r9
  16. #define rB r10
  17. #define rC r11
  18. #define rD r27
  19. #define rE r28
  20. #define rF r29
  21. #define rG r30
  22. #define rH r31
  23. #ifdef __LITTLE_ENDIAN__
  24. #define LD ldbrx
  25. #else
  26. #define LD ldx
  27. #endif
  28. _GLOBAL(memcmp)
  29. cmpdi cr1,r5,0
  30. /* Use the short loop if both strings are not 8B aligned */
  31. or r6,r3,r4
  32. andi. r6,r6,7
  33. /* Use the short loop if length is less than 32B */
  34. cmpdi cr6,r5,31
  35. beq cr1,.Lzero
  36. bne .Lshort
  37. bgt cr6,.Llong
  38. .Lshort:
  39. mtctr r5
  40. 1: lbz rA,0(r3)
  41. lbz rB,0(r4)
  42. subf. rC,rB,rA
  43. bne .Lnon_zero
  44. bdz .Lzero
  45. lbz rA,1(r3)
  46. lbz rB,1(r4)
  47. subf. rC,rB,rA
  48. bne .Lnon_zero
  49. bdz .Lzero
  50. lbz rA,2(r3)
  51. lbz rB,2(r4)
  52. subf. rC,rB,rA
  53. bne .Lnon_zero
  54. bdz .Lzero
  55. lbz rA,3(r3)
  56. lbz rB,3(r4)
  57. subf. rC,rB,rA
  58. bne .Lnon_zero
  59. addi r3,r3,4
  60. addi r4,r4,4
  61. bdnz 1b
  62. .Lzero:
  63. li r3,0
  64. blr
  65. .Lnon_zero:
  66. mr r3,rC
  67. blr
  68. .Llong:
  69. li off8,8
  70. li off16,16
  71. li off24,24
  72. std r31,-8(r1)
  73. std r30,-16(r1)
  74. std r29,-24(r1)
  75. std r28,-32(r1)
  76. std r27,-40(r1)
  77. srdi r0,r5,5
  78. mtctr r0
  79. andi. r5,r5,31
  80. LD rA,0,r3
  81. LD rB,0,r4
  82. LD rC,off8,r3
  83. LD rD,off8,r4
  84. LD rE,off16,r3
  85. LD rF,off16,r4
  86. LD rG,off24,r3
  87. LD rH,off24,r4
  88. cmpld cr0,rA,rB
  89. addi r3,r3,32
  90. addi r4,r4,32
  91. bdz .Lfirst32
  92. LD rA,0,r3
  93. LD rB,0,r4
  94. cmpld cr1,rC,rD
  95. LD rC,off8,r3
  96. LD rD,off8,r4
  97. cmpld cr6,rE,rF
  98. LD rE,off16,r3
  99. LD rF,off16,r4
  100. cmpld cr7,rG,rH
  101. bne cr0,.LcmpAB
  102. LD rG,off24,r3
  103. LD rH,off24,r4
  104. cmpld cr0,rA,rB
  105. bne cr1,.LcmpCD
  106. addi r3,r3,32
  107. addi r4,r4,32
  108. bdz .Lsecond32
  109. .balign 16
  110. 1: LD rA,0,r3
  111. LD rB,0,r4
  112. cmpld cr1,rC,rD
  113. bne cr6,.LcmpEF
  114. LD rC,off8,r3
  115. LD rD,off8,r4
  116. cmpld cr6,rE,rF
  117. bne cr7,.LcmpGH
  118. LD rE,off16,r3
  119. LD rF,off16,r4
  120. cmpld cr7,rG,rH
  121. bne cr0,.LcmpAB
  122. LD rG,off24,r3
  123. LD rH,off24,r4
  124. cmpld cr0,rA,rB
  125. bne cr1,.LcmpCD
  126. addi r3,r3,32
  127. addi r4,r4,32
  128. bdnz 1b
  129. .Lsecond32:
  130. cmpld cr1,rC,rD
  131. bne cr6,.LcmpEF
  132. cmpld cr6,rE,rF
  133. bne cr7,.LcmpGH
  134. cmpld cr7,rG,rH
  135. bne cr0,.LcmpAB
  136. bne cr1,.LcmpCD
  137. bne cr6,.LcmpEF
  138. bne cr7,.LcmpGH
  139. .Ltail:
  140. ld r31,-8(r1)
  141. ld r30,-16(r1)
  142. ld r29,-24(r1)
  143. ld r28,-32(r1)
  144. ld r27,-40(r1)
  145. cmpdi r5,0
  146. beq .Lzero
  147. b .Lshort
  148. .Lfirst32:
  149. cmpld cr1,rC,rD
  150. cmpld cr6,rE,rF
  151. cmpld cr7,rG,rH
  152. bne cr0,.LcmpAB
  153. bne cr1,.LcmpCD
  154. bne cr6,.LcmpEF
  155. bne cr7,.LcmpGH
  156. b .Ltail
  157. .LcmpAB:
  158. li r3,1
  159. bgt cr0,.Lout
  160. li r3,-1
  161. b .Lout
  162. .LcmpCD:
  163. li r3,1
  164. bgt cr1,.Lout
  165. li r3,-1
  166. b .Lout
  167. .LcmpEF:
  168. li r3,1
  169. bgt cr6,.Lout
  170. li r3,-1
  171. b .Lout
  172. .LcmpGH:
  173. li r3,1
  174. bgt cr7,.Lout
  175. li r3,-1
  176. .Lout:
  177. ld r31,-8(r1)
  178. ld r30,-16(r1)
  179. ld r29,-24(r1)
  180. ld r28,-32(r1)
  181. ld r27,-40(r1)
  182. blr
  183. EXPORT_SYMBOL(memcmp)