strnlen.S 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172
  1. /*
  2. * Copyright (C) 2013 ARM Ltd.
  3. * Copyright (C) 2013 Linaro.
  4. *
  5. * This code is based on glibc cortex strings work originally authored by Linaro
  6. * and re-licensed under GPLv2 for the Linux kernel. The original code can
  7. * be found @
  8. *
  9. * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/
  10. * files/head:/src/aarch64/
  11. *
  12. * This program is free software; you can redistribute it and/or modify
  13. * it under the terms of the GNU General Public License version 2 as
  14. * published by the Free Software Foundation.
  15. *
  16. * This program is distributed in the hope that it will be useful,
  17. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  19. * GNU General Public License for more details.
  20. *
  21. * You should have received a copy of the GNU General Public License
  22. * along with this program. If not, see <http://www.gnu.org/licenses/>.
  23. */
  24. #include <linux/linkage.h>
  25. #include <asm/assembler.h>
  26. /*
  27. * determine the length of a fixed-size string
  28. *
  29. * Parameters:
  30. * x0 - const string pointer
  31. * x1 - maximal string length
  32. * Returns:
  33. * x0 - the return length of specific string
  34. */
  35. /* Arguments and results. */
  36. srcin .req x0
  37. len .req x0
  38. limit .req x1
  39. /* Locals and temporaries. */
  40. src .req x2
  41. data1 .req x3
  42. data2 .req x4
  43. data2a .req x5
  44. has_nul1 .req x6
  45. has_nul2 .req x7
  46. tmp1 .req x8
  47. tmp2 .req x9
  48. tmp3 .req x10
  49. tmp4 .req x11
  50. zeroones .req x12
  51. pos .req x13
  52. limit_wd .req x14
  53. #define REP8_01 0x0101010101010101
  54. #define REP8_7f 0x7f7f7f7f7f7f7f7f
  55. #define REP8_80 0x8080808080808080
  56. ENTRY(strnlen)
  57. cbz limit, .Lhit_limit
  58. mov zeroones, #REP8_01
  59. bic src, srcin, #15
  60. ands tmp1, srcin, #15
  61. b.ne .Lmisaligned
  62. /* Calculate the number of full and partial words -1. */
  63. sub limit_wd, limit, #1 /* Limit != 0, so no underflow. */
  64. lsr limit_wd, limit_wd, #4 /* Convert to Qwords. */
  65. /*
  66. * NUL detection works on the principle that (X - 1) & (~X) & 0x80
  67. * (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
  68. * can be done in parallel across the entire word.
  69. */
  70. /*
  71. * The inner loop deals with two Dwords at a time. This has a
  72. * slightly higher start-up cost, but we should win quite quickly,
  73. * especially on cores with a high number of issue slots per
  74. * cycle, as we get much better parallelism out of the operations.
  75. */
  76. .Lloop:
  77. ldp data1, data2, [src], #16
  78. .Lrealigned:
  79. sub tmp1, data1, zeroones
  80. orr tmp2, data1, #REP8_7f
  81. sub tmp3, data2, zeroones
  82. orr tmp4, data2, #REP8_7f
  83. bic has_nul1, tmp1, tmp2
  84. bic has_nul2, tmp3, tmp4
  85. subs limit_wd, limit_wd, #1
  86. orr tmp1, has_nul1, has_nul2
  87. ccmp tmp1, #0, #0, pl /* NZCV = 0000 */
  88. b.eq .Lloop
  89. cbz tmp1, .Lhit_limit /* No null in final Qword. */
  90. /*
  91. * We know there's a null in the final Qword. The easiest thing
  92. * to do now is work out the length of the string and return
  93. * MIN (len, limit).
  94. */
  95. sub len, src, srcin
  96. cbz has_nul1, .Lnul_in_data2
  97. CPU_BE( mov data2, data1 ) /*perpare data to re-calculate the syndrome*/
  98. sub len, len, #8
  99. mov has_nul2, has_nul1
  100. .Lnul_in_data2:
  101. /*
  102. * For big-endian, carry propagation (if the final byte in the
  103. * string is 0x01) means we cannot use has_nul directly. The
  104. * easiest way to get the correct byte is to byte-swap the data
  105. * and calculate the syndrome a second time.
  106. */
  107. CPU_BE( rev data2, data2 )
  108. CPU_BE( sub tmp1, data2, zeroones )
  109. CPU_BE( orr tmp2, data2, #REP8_7f )
  110. CPU_BE( bic has_nul2, tmp1, tmp2 )
  111. sub len, len, #8
  112. rev has_nul2, has_nul2
  113. clz pos, has_nul2
  114. add len, len, pos, lsr #3 /* Bits to bytes. */
  115. cmp len, limit
  116. csel len, len, limit, ls /* Return the lower value. */
  117. ret
  118. .Lmisaligned:
  119. /*
  120. * Deal with a partial first word.
  121. * We're doing two things in parallel here;
  122. * 1) Calculate the number of words (but avoiding overflow if
  123. * limit is near ULONG_MAX) - to do this we need to work out
  124. * limit + tmp1 - 1 as a 65-bit value before shifting it;
  125. * 2) Load and mask the initial data words - we force the bytes
  126. * before the ones we are interested in to 0xff - this ensures
  127. * early bytes will not hit any zero detection.
  128. */
  129. ldp data1, data2, [src], #16
  130. sub limit_wd, limit, #1
  131. and tmp3, limit_wd, #15
  132. lsr limit_wd, limit_wd, #4
  133. add tmp3, tmp3, tmp1
  134. add limit_wd, limit_wd, tmp3, lsr #4
  135. neg tmp4, tmp1
  136. lsl tmp4, tmp4, #3 /* Bytes beyond alignment -> bits. */
  137. mov tmp2, #~0
  138. /* Big-endian. Early bytes are at MSB. */
  139. CPU_BE( lsl tmp2, tmp2, tmp4 ) /* Shift (tmp1 & 63). */
  140. /* Little-endian. Early bytes are at LSB. */
  141. CPU_LE( lsr tmp2, tmp2, tmp4 ) /* Shift (tmp1 & 63). */
  142. cmp tmp1, #8
  143. orr data1, data1, tmp2
  144. orr data2a, data2, tmp2
  145. csinv data1, data1, xzr, le
  146. csel data2, data2, data2a, le
  147. b .Lrealigned
  148. .Lhit_limit:
  149. mov len, limit
  150. ret
  151. ENDPIPROC(strnlen)