xatomic.hpp 7.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344
  1. #ifndef __XRCU_XATOMIC_HPP__
  2. #define __XRCU_XATOMIC_HPP__ 1
  3. #include <cstdint>
  4. /*
  5. * This file defines an interface for atomic operations that isn't (quite)
  6. * achievable with the C++ standard atomic API. Basically, instead of using
  7. * a template class, we use raw pointers.
  8. *
  9. * This interface is needed because for some inexplicable reason, it is not
  10. * possible to get a pointer to the underlying integer in the std::atomic
  11. * interface (it may not even exist as such).
  12. *
  13. * While we are at it, we also define a few additional operations that are
  14. * not present in the standard (double CAS, atomic spin).
  15. *
  16. * Note that these aren't template functions; we only require these atomic
  17. * ops to work on pointer-sized values, so we don't bother with anything else.
  18. */
  19. namespace xrcu
  20. {
  21. #if (defined (__GNUC__) && (__GNUC__ > 4 || \
  22. (__GNUC__ == 4 && __GNUC_MINOR__ >= 7))) || (defined (__clang__) && \
  23. defined (__clang_major__) && (__clang_major__ >= 4 || \
  24. (__clang_major__ == 3 && __clang_minor__ >= 8)))
  25. inline uintptr_t
  26. xatomic_cas (uintptr_t *ptr, uintptr_t exp, uintptr_t nval)
  27. {
  28. __atomic_compare_exchange_n (ptr, &exp, nval, 0,
  29. __ATOMIC_ACQ_REL, __ATOMIC_RELAXED);
  30. return (exp);
  31. }
  32. inline uintptr_t
  33. xatomic_or (uintptr_t *ptr, uintptr_t val)
  34. {
  35. return (__atomic_fetch_or (ptr, val, __ATOMIC_ACQ_REL));
  36. }
  37. inline void
  38. xatomic_and (uintptr_t *ptr, uintptr_t val)
  39. {
  40. (void)__atomic_and_fetch (ptr, val, __ATOMIC_ACQ_REL);
  41. }
  42. inline uintptr_t
  43. xatomic_swap (uintptr_t *ptr, uintptr_t val)
  44. {
  45. return (__atomic_exchange_n (ptr, val, __ATOMIC_ACQ_REL));
  46. }
  47. inline uintptr_t
  48. xatomic_add (uintptr_t *ptr, intptr_t val)
  49. {
  50. return (__atomic_fetch_add (ptr, val, __ATOMIC_ACQ_REL));
  51. }
  52. #else
  53. #include <atomic>
  54. static_assert (sizeof (uintptr_t) == sizeof (std::atomic_uintptr_t) &&
  55. alignof (uintptr_t) == alignof (std::atomic_uintptr_t),
  56. "unsupported compiler (uintptr_t and atomic_uintptr_t mismatch)");
  57. inline uintptr_t
  58. xatomic_cas (uintptr_t *ptr, uintptr_t exp, uintptr_t nval)
  59. {
  60. reinterpret_cast<std::atomic_uintptr_t&>(ptr).compare_exchange_weak
  61. (exp, nval, std::memory_order_acq_rel, std::memory_order_relaxed);
  62. return (exp);
  63. }
  64. inline uintptr_t
  65. xatomic_swap (uintptr_t *ptr, uintptr_t val)
  66. {
  67. return (reinterpret_cast<std::atomic_uintptr_t&>(ptr).exchange
  68. (ptr, val, std::memory_order_acq_rel));
  69. }
  70. inline uintptr_t
  71. xatomic_add (uintptr_t *ptr, intptr_t val)
  72. {
  73. return (reinterpret_cast<std::atomic_uintptr_t&>(ptr).fetch_add
  74. (ptr, val, std::memory_order_acq_rel));
  75. }
  76. inline uintptr_t
  77. xatomic_or (uintptr_t *ptr, uintptr_t val)
  78. {
  79. while (true)
  80. {
  81. uintptr_t ret = *ptr;
  82. if (xatomic_cas (ptr, ret, ret | val) == ret)
  83. return (ret);
  84. xatomic_spin_nop ();
  85. }
  86. }
  87. inline void
  88. xatomic_and (uintptr_t *ptr, uintptr_t val)
  89. {
  90. while (true)
  91. {
  92. uintptr_t ret = *ptr;
  93. if (xatomic_cas (ptr, ret, ret & val) == ret)
  94. return;
  95. xatomic_spin_nop ();
  96. }
  97. }
  98. #endif
  99. #if defined (__GNUC__)
  100. # if defined (__i386__) || defined (__x86_64__)
  101. inline void
  102. xatomic_spin_nop ()
  103. {
  104. __asm__ __volatile__ ("pause" : : : "memory");
  105. }
  106. # elif defined (__aarch64__) || defined (__arm__)
  107. inline void
  108. xatomic_spin_nop ()
  109. {
  110. __asm__ __volatile__ ("wfe" : : : "memory");
  111. }
  112. # else
  113. inline void
  114. xatomic_spin_nop ()
  115. {
  116. __atomic_thread_fence (__ATOMIC_ACQUIRE);
  117. }
  118. # endif
  119. #else
  120. #include <atomic>
  121. inline void
  122. xatomic_spin_nop ()
  123. {
  124. std::atomic_thread_fence (std::memory_order_acquire);
  125. }
  126. #endif
  127. inline bool
  128. xatomic_cas_bool (uintptr_t *ptr, uintptr_t exp, uintptr_t nval)
  129. {
  130. return (xatomic_cas (ptr, exp, nval) == exp);
  131. }
  132. // Try to define double-width CAS.
  133. #if defined (__GNUC__)
  134. # if defined (__amd64) || defined (__amd64__) || \
  135. defined (__x86_64) || defined (__x86_64__)
  136. # define XRCU_HAVE_XATOMIC_DCAS
  137. # if defined (_ILP32) || defined (__ILP32__)
  138. inline bool
  139. xatomic_dcas_bool (uintptr_t *ptr, uintptr_t elo,
  140. uintptr_t ehi, uintptr_t nlo, uintptr_t nhi)
  141. {
  142. uint64_t exp = ((uint64_t)ehi << 32) | elo;
  143. uint64_t nval = ((uint64_t)nhi << 32) | nlo;
  144. return (__atomic_compare_exchange_n ((uint64_t *)ptr,
  145. &exp, nval, 0, __ATOMIC_ACQ_REL, __ATOMIC_RELAXED));
  146. }
  147. # else
  148. inline bool
  149. xatomic_dcas_bool (uintptr_t *ptr, uintptr_t elo,
  150. uintptr_t ehi, uintptr_t nlo, uintptr_t nhi)
  151. {
  152. char r;
  153. __asm__ __volatile__
  154. (
  155. "lock; cmpxchg16b %0\n\t"
  156. "setz %1"
  157. : "+m" (*ptr), "=q" (r)
  158. : "d" (ehi), "a" (elo),
  159. "c" (nhi), "b" (nlo)
  160. : "memory"
  161. );
  162. return ((bool)r);
  163. }
  164. # endif // ILP32
  165. # elif defined (__i386) || defined (__i386__)
  166. # define XRCU_HAVE_XATOMIC_DCAS
  167. # if defined (__PIC__) && __GNUC__ < 5
  168. inline bool
  169. xatomic_dcas_bool (uintptr_t *ptr, uintptr_t elo,
  170. uintptr_t ehi, uintptr_t nlo, uintptr_t nhi)
  171. {
  172. uintptr_t s;
  173. char r;
  174. __asm__ __volatile__
  175. (
  176. "movl %%ebx, %2\n\t"
  177. "leal %0, %%edi\n\t"
  178. "movl %7, %%ebx\n\t"
  179. "lock; cmpxchg8b (%%edi)\n\t"
  180. "movl %2, %%ebx\n\t"
  181. "setz %1"
  182. : "=m" (*ptr), "=a" (r), "=m" (s)
  183. : "m" (*ptr), "d" (ehi), "a" (elo),
  184. "c" (nhi), "m" (nlo)
  185. : "%edi", "memory"
  186. );
  187. return ((bool)r);
  188. }
  189. # else
  190. inline bool
  191. xatomic_dcas_bool (uintptr_t *ptr, uintptr_t elo,
  192. uintptr_t ehi, uintptr_t nlo, uintptr_t nhi)
  193. {
  194. char r;
  195. __asm__ __volatile__
  196. (
  197. "lock; cmpxchg8b %0\n\t"
  198. "setz %1"
  199. : "+m" (*ptr), "=a" (r)
  200. : "d" (ehi), "a" (elo),
  201. "c" (nhi), "b" (nlo)
  202. : "memory"
  203. );
  204. return ((bool)r);
  205. }
  206. # endif // PIC.
  207. # elif (defined (__arm__) || defined (__thumb__)) && \
  208. ((!defined (__thumb__) || (defined (__thumb2__) && \
  209. !defined (__ARM_ARCH_7__)) && !defined (__ARCH_ARM_7M__) && \
  210. !defined (__ARM_ARCH_7EM__)) && (!defined (__clang__) || \
  211. (__clang_major__ == 3 && __clang_minor__ >= 3)))
  212. # define XRCU_HAVE_XATOMIC_DCAS
  213. inline bool
  214. xatomic_dcas_bool (uintptr_t *ptr, uintptr_t elo,
  215. uintptr_t ehi, uintptr_t nlo, uintptr_t nhi)
  216. {
  217. uint64_t qv = ((uint64_t)ehi << 32) | elo;
  218. uint64_t nv = ((uint64_t)nhi << 32) | nlo;
  219. while (true)
  220. {
  221. uint64_t tmp;
  222. __asm__ __volatile__
  223. (
  224. "ldrexd %0, %H0, [%1]"
  225. : "=&r" (tmp) : "r" (ptr)
  226. );
  227. if (tmp != qv)
  228. return (false);
  229. int r;
  230. __asm__ __volatile__
  231. (
  232. "strexd %0, %3, %H3, [%2]"
  233. : "=&r" (r), "+m" (*ptr)
  234. : "r" (ptr), "r" (nv)
  235. : "cc"
  236. );
  237. if (r == 0)
  238. return (true);
  239. }
  240. }
  241. # elif defined (__aarch64__)
  242. # define XRCU_HAVE_XATOMIC_DCAS
  243. inline bool
  244. xatomic_dcas_bool (uintptr_t *ptr, uintptr_t elo,
  245. uintptr_t ehi, uintptr_t nlo, uintptr_t nhi)
  246. {
  247. while (true)
  248. {
  249. uintptr_t t1, t2;
  250. __asm__ __volatile__
  251. (
  252. "ldaxp %0, %1, %2"
  253. : "=&r" (t1), "=&r" (t2)
  254. : "Q" (*ptr)
  255. );
  256. if (t1 != elo || t2 != ehi)
  257. return (false);
  258. int r;
  259. __asm__ __volatile__
  260. (
  261. "stxp %w0, %2, %3, %1"
  262. : "=&r" (r), "=Q" (*ptr)
  263. : "r" (nlo), "r" (nhi)
  264. );
  265. if (r == 0)
  266. return (true);
  267. }
  268. }
  269. # endif
  270. #endif
  271. } // namespace xrcu
  272. #endif