memset-sh4.S 1.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108
  1. /*
  2. * "memset" implementation for SH4
  3. *
  4. * Copyright (C) 1999 Niibe Yutaka
  5. * Copyright (c) 2009 STMicroelectronics Limited
  6. * Author: Stuart Menefy <stuart.menefy:st.com>
  7. */
  8. /*
  9. * void *memset(void *s, int c, size_t n);
  10. */
  11. #include <linux/linkage.h>
  12. ENTRY(memset)
  13. mov #12,r0
  14. add r6,r4
  15. cmp/gt r6,r0
  16. bt/s 40f ! if it's too small, set a byte at once
  17. mov r4,r0
  18. and #3,r0
  19. cmp/eq #0,r0
  20. bt/s 2f ! It's aligned
  21. sub r0,r6
  22. 1:
  23. dt r0
  24. bf/s 1b
  25. mov.b r5,@-r4
  26. 2: ! make VVVV
  27. extu.b r5,r5
  28. swap.b r5,r0 ! V0
  29. or r0,r5 ! VV
  30. swap.w r5,r0 ! VV00
  31. or r0,r5 ! VVVV
  32. ! Check if enough bytes need to be copied to be worth the big loop
  33. mov #0x40, r0 ! (MT)
  34. cmp/gt r6,r0 ! (MT) 64 > len => slow loop
  35. bt/s 22f
  36. mov r6,r0
  37. ! align the dst to the cache block size if necessary
  38. mov r4, r3
  39. mov #~(0x1f), r1
  40. and r3, r1
  41. cmp/eq r3, r1
  42. bt/s 11f ! dst is already aligned
  43. sub r1, r3 ! r3-r1 -> r3
  44. shlr2 r3 ! number of loops
  45. 10: mov.l r5,@-r4
  46. dt r3
  47. bf/s 10b
  48. add #-4, r6
  49. 11: ! dst is 32byte aligned
  50. mov r6,r2
  51. mov #-5,r0
  52. shld r0,r2 ! number of loops
  53. add #-32, r4
  54. mov r5, r0
  55. 12:
  56. movca.l r0,@r4
  57. mov.l r5,@(4, r4)
  58. mov.l r5,@(8, r4)
  59. mov.l r5,@(12,r4)
  60. mov.l r5,@(16,r4)
  61. mov.l r5,@(20,r4)
  62. add #-0x20, r6
  63. mov.l r5,@(24,r4)
  64. dt r2
  65. mov.l r5,@(28,r4)
  66. bf/s 12b
  67. add #-32, r4
  68. add #32, r4
  69. mov #8, r0
  70. cmp/ge r0, r6
  71. bf 40f
  72. mov r6,r0
  73. 22:
  74. shlr2 r0
  75. shlr r0 ! r0 = r6 >> 3
  76. 3:
  77. dt r0
  78. mov.l r5,@-r4 ! set 8-byte at once
  79. bf/s 3b
  80. mov.l r5,@-r4
  81. !
  82. mov #7,r0
  83. and r0,r6
  84. ! fill bytes (length may be zero)
  85. 40: tst r6,r6
  86. bt 5f
  87. 4:
  88. dt r6
  89. bf/s 4b
  90. mov.b r5,@-r4
  91. 5:
  92. rts
  93. mov r4,r0