memscan_64.S 2.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137
  1. /* SPDX-License-Identifier: GPL-2.0 */
  2. /*
  3. * memscan.S: Optimized memscan for Sparc64.
  4. *
  5. * Copyright (C) 1997,1998 Jakub Jelinek (jj@ultra.linux.cz)
  6. * Copyright (C) 1998 David S. Miller (davem@redhat.com)
  7. */
  8. #include <asm/export.h>
  9. #define HI_MAGIC 0x8080808080808080
  10. #define LO_MAGIC 0x0101010101010101
  11. #define ASI_PL 0x88
  12. .text
  13. .align 32
  14. .globl __memscan_zero, __memscan_generic
  15. .type __memscan_zero,#function
  16. .type __memscan_generic,#function
  17. .globl memscan
  18. EXPORT_SYMBOL(__memscan_zero)
  19. EXPORT_SYMBOL(__memscan_generic)
  20. __memscan_zero:
  21. /* %o0 = bufp, %o1 = size */
  22. brlez,pn %o1, szzero
  23. andcc %o0, 7, %g0
  24. be,pt %icc, we_are_aligned
  25. sethi %hi(HI_MAGIC), %o4
  26. ldub [%o0], %o5
  27. 1: subcc %o1, 1, %o1
  28. brz,pn %o5, 10f
  29. add %o0, 1, %o0
  30. be,pn %xcc, szzero
  31. andcc %o0, 7, %g0
  32. bne,a,pn %icc, 1b
  33. ldub [%o0], %o5
  34. we_are_aligned:
  35. ldxa [%o0] ASI_PL, %o5
  36. or %o4, %lo(HI_MAGIC), %o3
  37. sllx %o3, 32, %o4
  38. or %o4, %o3, %o3
  39. srlx %o3, 7, %o2
  40. msloop:
  41. sub %o1, 8, %o1
  42. add %o0, 8, %o0
  43. sub %o5, %o2, %o4
  44. xor %o4, %o5, %o4
  45. andcc %o4, %o3, %g3
  46. bne,pn %xcc, check_bytes
  47. srlx %o4, 32, %g3
  48. brgz,a,pt %o1, msloop
  49. ldxa [%o0] ASI_PL, %o5
  50. check_bytes:
  51. bne,a,pn %icc, 2f
  52. andcc %o5, 0xff, %g0
  53. add %o0, -5, %g2
  54. ba,pt %xcc, 3f
  55. srlx %o5, 32, %g7
  56. 2: srlx %o5, 8, %g7
  57. be,pn %icc, 1f
  58. add %o0, -8, %g2
  59. andcc %g7, 0xff, %g0
  60. srlx %g7, 8, %g7
  61. be,pn %icc, 1f
  62. inc %g2
  63. andcc %g7, 0xff, %g0
  64. srlx %g7, 8, %g7
  65. be,pn %icc, 1f
  66. inc %g2
  67. andcc %g7, 0xff, %g0
  68. srlx %g7, 8, %g7
  69. be,pn %icc, 1f
  70. inc %g2
  71. andcc %g3, %o3, %g0
  72. be,a,pn %icc, 2f
  73. mov %o0, %g2
  74. 3: andcc %g7, 0xff, %g0
  75. srlx %g7, 8, %g7
  76. be,pn %icc, 1f
  77. inc %g2
  78. andcc %g7, 0xff, %g0
  79. srlx %g7, 8, %g7
  80. be,pn %icc, 1f
  81. inc %g2
  82. andcc %g7, 0xff, %g0
  83. srlx %g7, 8, %g7
  84. be,pn %icc, 1f
  85. inc %g2
  86. andcc %g7, 0xff, %g0
  87. srlx %g7, 8, %g7
  88. be,pn %icc, 1f
  89. inc %g2
  90. 2: brgz,a,pt %o1, msloop
  91. ldxa [%o0] ASI_PL, %o5
  92. inc %g2
  93. 1: add %o0, %o1, %o0
  94. cmp %g2, %o0
  95. retl
  96. movle %xcc, %g2, %o0
  97. 10: retl
  98. sub %o0, 1, %o0
  99. szzero: retl
  100. nop
  101. memscan:
  102. __memscan_generic:
  103. /* %o0 = addr, %o1 = c, %o2 = size */
  104. brz,pn %o2, 3f
  105. add %o0, %o2, %o3
  106. ldub [%o0], %o5
  107. sub %g0, %o2, %o4
  108. 1:
  109. cmp %o5, %o1
  110. be,pn %icc, 2f
  111. addcc %o4, 1, %o4
  112. bne,a,pt %xcc, 1b
  113. ldub [%o3 + %o4], %o5
  114. retl
  115. /* The delay slot is the same as the next insn, this is just to make it look more awful */
  116. 2:
  117. add %o3, %o4, %o0
  118. retl
  119. sub %o0, 1, %o0
  120. 3:
  121. retl
  122. nop