exch_n.c 3.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115
  1. /* Copyright (C) 2012-2015 Free Software Foundation, Inc.
  2. Contributed by Richard Henderson <rth@redhat.com>.
  3. This file is part of the GNU Atomic Library (libatomic).
  4. Libatomic is free software; you can redistribute it and/or modify it
  5. under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 3 of the License, or
  7. (at your option) any later version.
  8. Libatomic is distributed in the hope that it will be useful, but WITHOUT ANY
  9. WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  10. FOR A PARTICULAR PURPOSE. See the GNU General Public License for
  11. more details.
  12. Under Section 7 of GPL version 3, you are granted additional
  13. permissions described in the GCC Runtime Library Exception, version
  14. 3.1, as published by the Free Software Foundation.
  15. You should have received a copy of the GNU General Public License and
  16. a copy of the GCC Runtime Library Exception along with this program;
  17. see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
  18. <http://www.gnu.org/licenses/>. */
  19. #include <libatomic_i.h>
  20. #include <arm-config.h>
  21. /* When using STREX to implement sub-word exchange, we can do much better
  22. than the compiler by using the APSR.GE and APSR.C flags. */
  23. #if !DONE && HAVE_STREX && !HAVE_STREXBH && N == 2
  24. UTYPE
  25. SIZE(libat_exchange) (UTYPE *mptr, UTYPE newval, int smodel)
  26. {
  27. UWORD t1, t2;
  28. UTYPE oldval;
  29. __atomic_thread_fence (__ATOMIC_SEQ_CST);
  30. /* In the N=2 case, there are only two cases for MPTR: mptr % 4 == {0,2}.
  31. Rather than computing a variable shift for this, we can store the one
  32. bit of misalignment in the carry flag, and use conditional constant
  33. shifts instead. This saves a register. */
  34. #ifdef __ARMEB__
  35. # define HI "cc" /* iff value is in high half */
  36. # define LO "cs" /* iff value is in low half */
  37. #else
  38. # define HI "cs"
  39. # define LO "cc"
  40. #endif
  41. asm volatile (
  42. "lsrs %[t2],%[ptr],#2\n" /* carry = mptr & 2 */
  43. " bic %[ptr],%[ptr],#3\n" /* align mptr */
  44. " itt "HI"\n"
  45. " lsl"HI" %[t1],%[t1],#16\n" /* shift mask into place */
  46. " lsl"HI" %[new],%[new],#16\n" /* shift newval into place */
  47. " uadd16 %[t1],%[t1],%[t1]\n" /* copy mask into APSR.GE */
  48. "0: ldrex %[t2],[%[ptr]]\n"
  49. " ite "LO"\n"
  50. " uxth"LO" %[old],%[t2]\n" /* return old value */
  51. " uxth"HI" %[old],%[t2], ror #16\n"
  52. " sel %[t1],%[new],%[t2]\n" /* merge newval */
  53. " strex %[t2],%[t1],[%[ptr]]\n"
  54. " tst %[t2],%[t2]\n" /* dont clobber carry */
  55. " bne 0b"
  56. : [old] "=&r"(oldval), [t1] "=&r"(t1), [t2] "=&r"(t2),
  57. [ptr] "+r"(mptr), [new] "+r"(newval)
  58. : "1"(0xffff)
  59. : "memory");
  60. __atomic_thread_fence (__ATOMIC_SEQ_CST);
  61. return oldval;
  62. }
  63. #define DONE 1
  64. #endif /* !HAVE_STREXBH && N == 2 */
  65. #if !DONE && HAVE_STREX && !HAVE_STREXBH && N == 1
  66. UTYPE
  67. SIZE(libat_exchange) (UTYPE *mptr, UTYPE newval, int smodel)
  68. {
  69. UWORD *wptr, woldval, wnewval, shift, mask, t1, t2;
  70. __atomic_thread_fence (__ATOMIC_SEQ_CST);
  71. wptr = (UWORD *)((uintptr_t)mptr & -WORDSIZE);
  72. shift = (((uintptr_t)mptr % WORDSIZE) * CHAR_BIT) ^ INVERT_MASK_1;
  73. mask = MASK_1 << shift;
  74. wnewval = newval << shift;
  75. asm volatile (
  76. "uadd8 %[t1],%[t1],%[t1]\n" /* move mask to APSR.GE */
  77. "0: ldrex %[old],[%[wptr]]\n"
  78. " sel %[t1],%[new],%[old]\n" /* merge newval */
  79. " strex %[t2],%[t1],[%[wptr]]\n"
  80. " cmp %[t2],#0\n"
  81. " bne 0b"
  82. : [old] "=&r"(woldval), [t1] "=&r"(t1), [t2] "=&r"(t2)
  83. : [new] "r"(wnewval), [wptr] "r"(wptr), "1"(mask)
  84. : "memory");
  85. __atomic_thread_fence (__ATOMIC_SEQ_CST);
  86. return woldval >> shift;
  87. }
  88. #define DONE 1
  89. #endif /* !HAVE_STREXBH && N == 1 */
  90. #include "../../exch_n.c"