strcmp.S 2.6 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697
  1. /*
  2. * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
  3. *
  4. * This program is free software; you can redistribute it and/or modify
  5. * it under the terms of the GNU General Public License version 2 as
  6. * published by the Free Software Foundation.
  7. */
  8. /* This is optimized primarily for the ARC700.
  9. It would be possible to speed up the loops by one cycle / word
  10. respective one cycle / byte by forcing double source 1 alignment, unrolling
  11. by a factor of two, and speculatively loading the second word / byte of
  12. source 1; however, that would increase the overhead for loop setup / finish,
  13. and strcmp might often terminate early. */
  14. #include <linux/linkage.h>
  15. ENTRY(strcmp)
  16. or r2,r0,r1
  17. bmsk_s r2,r2,1
  18. brne r2,0,.Lcharloop
  19. mov_s r12,0x01010101
  20. ror r5,r12
  21. .Lwordloop:
  22. ld.ab r2,[r0,4]
  23. ld.ab r3,[r1,4]
  24. nop_s
  25. sub r4,r2,r12
  26. bic r4,r4,r2
  27. and r4,r4,r5
  28. brne r4,0,.Lfound0
  29. breq r2,r3,.Lwordloop
  30. #ifdef __LITTLE_ENDIAN__
  31. xor r0,r2,r3 ; mask for difference
  32. sub_s r1,r0,1
  33. bic_s r0,r0,r1 ; mask for least significant difference bit
  34. sub r1,r5,r0
  35. xor r0,r5,r1 ; mask for least significant difference byte
  36. and_s r2,r2,r0
  37. and_s r3,r3,r0
  38. #endif /* LITTLE ENDIAN */
  39. cmp_s r2,r3
  40. mov_s r0,1
  41. j_s.d [blink]
  42. bset.lo r0,r0,31
  43. .balign 4
  44. #ifdef __LITTLE_ENDIAN__
  45. .Lfound0:
  46. xor r0,r2,r3 ; mask for difference
  47. or r0,r0,r4 ; or in zero indicator
  48. sub_s r1,r0,1
  49. bic_s r0,r0,r1 ; mask for least significant difference bit
  50. sub r1,r5,r0
  51. xor r0,r5,r1 ; mask for least significant difference byte
  52. and_s r2,r2,r0
  53. and_s r3,r3,r0
  54. sub.f r0,r2,r3
  55. mov.hi r0,1
  56. j_s.d [blink]
  57. bset.lo r0,r0,31
  58. #else /* BIG ENDIAN */
  59. /* The zero-detection above can mis-detect 0x01 bytes as zeroes
  60. because of carry-propagateion from a lower significant zero byte.
  61. We can compensate for this by checking that bit0 is zero.
  62. This compensation is not necessary in the step where we
  63. get a low estimate for r2, because in any affected bytes
  64. we already have 0x00 or 0x01, which will remain unchanged
  65. when bit 7 is cleared. */
  66. .balign 4
  67. .Lfound0:
  68. lsr r0,r4,8
  69. lsr_s r1,r2
  70. bic_s r2,r2,r0 ; get low estimate for r2 and get ...
  71. bic_s r0,r0,r1 ; <this is the adjusted mask for zeros>
  72. or_s r3,r3,r0 ; ... high estimate r3 so that r2 > r3 will ...
  73. cmp_s r3,r2 ; ... be independent of trailing garbage
  74. or_s r2,r2,r0 ; likewise for r3 > r2
  75. bic_s r3,r3,r0
  76. rlc r0,0 ; r0 := r2 > r3 ? 1 : 0
  77. cmp_s r2,r3
  78. j_s.d [blink]
  79. bset.lo r0,r0,31
  80. #endif /* ENDIAN */
  81. .balign 4
  82. .Lcharloop:
  83. ldb.ab r2,[r0,1]
  84. ldb.ab r3,[r1,1]
  85. nop_s
  86. breq r2,0,.Lcmpend
  87. breq r2,r3,.Lcharloop
  88. .Lcmpend:
  89. j_s.d [blink]
  90. sub r0,r2,r3
  91. END(strcmp)