memset-archs.S 1.8 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394
  1. /*
  2. * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
  3. *
  4. * This program is free software; you can redistribute it and/or modify
  5. * it under the terms of the GNU General Public License version 2 as
  6. * published by the Free Software Foundation.
  7. */
  8. #include <linux/linkage.h>
  9. #undef PREALLOC_NOT_AVAIL
  10. #ifdef PREALLOC_NOT_AVAIL
  11. #define PREWRITE(A,B) prefetchw [(A),(B)]
  12. #else
  13. #define PREWRITE(A,B) prealloc [(A),(B)]
  14. #endif
  15. ENTRY(memset)
  16. prefetchw [r0] ; Prefetch the write location
  17. mov.f 0, r2
  18. ;;; if size is zero
  19. jz.d [blink]
  20. mov r3, r0 ; don't clobber ret val
  21. ;;; if length < 8
  22. brls.d.nt r2, 8, .Lsmallchunk
  23. mov.f lp_count,r2
  24. and.f r4, r0, 0x03
  25. rsub lp_count, r4, 4
  26. lpnz @.Laligndestination
  27. ;; LOOP BEGIN
  28. stb.ab r1, [r3,1]
  29. sub r2, r2, 1
  30. .Laligndestination:
  31. ;;; Destination is aligned
  32. and r1, r1, 0xFF
  33. asl r4, r1, 8
  34. or r4, r4, r1
  35. asl r5, r4, 16
  36. or r5, r5, r4
  37. mov r4, r5
  38. sub3 lp_count, r2, 8
  39. cmp r2, 64
  40. bmsk.hi r2, r2, 5
  41. mov.ls lp_count, 0
  42. add3.hi r2, r2, 8
  43. ;;; Convert len to Dwords, unfold x8
  44. lsr.f lp_count, lp_count, 6
  45. lpnz @.Lset64bytes
  46. ;; LOOP START
  47. PREWRITE(r3, 64) ;Prefetch the next write location
  48. std.ab r4, [r3, 8]
  49. std.ab r4, [r3, 8]
  50. std.ab r4, [r3, 8]
  51. std.ab r4, [r3, 8]
  52. std.ab r4, [r3, 8]
  53. std.ab r4, [r3, 8]
  54. std.ab r4, [r3, 8]
  55. std.ab r4, [r3, 8]
  56. .Lset64bytes:
  57. lsr.f lp_count, r2, 5 ;Last remaining max 124 bytes
  58. lpnz .Lset32bytes
  59. ;; LOOP START
  60. prefetchw [r3, 32] ;Prefetch the next write location
  61. std.ab r4, [r3, 8]
  62. std.ab r4, [r3, 8]
  63. std.ab r4, [r3, 8]
  64. std.ab r4, [r3, 8]
  65. .Lset32bytes:
  66. and.f lp_count, r2, 0x1F ;Last remaining 31 bytes
  67. .Lsmallchunk:
  68. lpnz .Lcopy3bytes
  69. ;; LOOP START
  70. stb.ab r1, [r3, 1]
  71. .Lcopy3bytes:
  72. j [blink]
  73. END(memset)
  74. ENTRY(memzero)
  75. ; adjust bzero args to memset args
  76. mov r2, r1
  77. b.d memset ;tail call so need to tinker with blink
  78. mov r1, 0
  79. END(memzero)