rwlock.cc 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283
  1. /* Copyright (C) 2011-2015 Free Software Foundation, Inc.
  2. Contributed by Torvald Riegel <triegel@redhat.com>.
  3. This file is part of the GNU Transactional Memory Library (libitm).
  4. Libitm is free software; you can redistribute it and/or modify it
  5. under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 3 of the License, or
  7. (at your option) any later version.
  8. Libitm is distributed in the hope that it will be useful, but WITHOUT ANY
  9. WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  10. FOR A PARTICULAR PURPOSE. See the GNU General Public License for
  11. more details.
  12. Under Section 7 of GPL version 3, you are granted additional
  13. permissions described in the GCC Runtime Library Exception, version
  14. 3.1, as published by the Free Software Foundation.
  15. You should have received a copy of the GNU General Public License and
  16. a copy of the GCC Runtime Library Exception along with this program;
  17. see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
  18. <http://www.gnu.org/licenses/>. */
  19. #include "libitm_i.h"
  20. #include "futex.h"
  21. #include <limits.h>
  22. namespace GTM HIDDEN {
  23. // Acquire a RW lock for reading.
  24. void
  25. gtm_rwlock::read_lock (gtm_thread *tx)
  26. {
  27. for (;;)
  28. {
  29. // Fast path: first announce our intent to read, then check for
  30. // conflicting intents to write. The fence ensures that this happens
  31. // in exactly this order.
  32. tx->shared_state.store (0, memory_order_relaxed);
  33. atomic_thread_fence (memory_order_seq_cst);
  34. if (likely (writers.load (memory_order_relaxed) == 0))
  35. return;
  36. // There seems to be an active, waiting, or confirmed writer, so enter
  37. // the futex-based slow path.
  38. // Before waiting, we clear our read intent check whether there are any
  39. // writers that might potentially wait for readers. If so, wake them.
  40. // We need the barrier here for the same reason that we need it in
  41. // read_unlock().
  42. // TODO Potentially too many wake-ups. See comments in read_unlock().
  43. tx->shared_state.store (-1, memory_order_relaxed);
  44. atomic_thread_fence (memory_order_seq_cst);
  45. if (writer_readers.load (memory_order_relaxed) > 0)
  46. {
  47. writer_readers.store (0, memory_order_relaxed);
  48. futex_wake(&writer_readers, 1);
  49. }
  50. // Signal that there are waiting readers and wait until there is no
  51. // writer anymore.
  52. // TODO Spin here on writers for a while. Consider whether we woke
  53. // any writers before?
  54. while (writers.load (memory_order_relaxed))
  55. {
  56. // An active writer. Wait until it has finished. To avoid lost
  57. // wake-ups, we need to use Dekker-like synchronization.
  58. // Note that we cannot reset readers to zero when we see that there
  59. // are no writers anymore after the barrier because this pending
  60. // store could then lead to lost wake-ups at other readers.
  61. readers.store (1, memory_order_relaxed);
  62. atomic_thread_fence (memory_order_seq_cst);
  63. if (writers.load (memory_order_relaxed))
  64. futex_wait(&readers, 1);
  65. else
  66. {
  67. // There is no writer, actually. However, we can have enabled
  68. // a futex_wait in other readers by previously setting readers
  69. // to 1, so we have to wake them up because there is no writer
  70. // that will do that. We don't know whether the wake-up is
  71. // really necessary, but we can get lost wake-up situations
  72. // otherwise.
  73. // No additional barrier nor a nonrelaxed load is required due
  74. // to coherency constraints. write_unlock() checks readers to
  75. // see if any wake-up is necessary, but it is not possible that
  76. // a reader's store prevents a required later writer wake-up;
  77. // If the waking reader's store (value 0) is in modification
  78. // order after the waiting readers store (value 1), then the
  79. // latter will have to read 0 in the futex due to coherency
  80. // constraints and the happens-before enforced by the futex
  81. // (paragraph 6.10 in the standard, 6.19.4 in the Batty et al
  82. // TR); second, the writer will be forced to read in
  83. // modification order too due to Dekker-style synchronization
  84. // with the waiting reader (see write_unlock()).
  85. // ??? Can we avoid the wake-up if readers is zero (like in
  86. // write_unlock())? Anyway, this might happen too infrequently
  87. // to improve performance significantly.
  88. readers.store (0, memory_order_relaxed);
  89. futex_wake(&readers, INT_MAX);
  90. }
  91. }
  92. // And we try again to acquire a read lock.
  93. }
  94. }
  95. // Acquire a RW lock for writing. Generic version that also works for
  96. // upgrades.
  97. // Note that an upgrade might fail (and thus waste previous work done during
  98. // this transaction) if there is another thread that tried to go into serial
  99. // mode earlier (i.e., upgrades do not have higher priority than pure writers).
  100. // However, this seems rare enough to not consider it further as we need both
  101. // a non-upgrade writer and a writer to happen to switch to serial mode
  102. // concurrently. If we'd want to handle this, a writer waiting for readers
  103. // would have to coordinate with later arriving upgrades and hand over the
  104. // lock to them, including the the reader-waiting state. We can try to support
  105. // this if this will actually happen often enough in real workloads.
  106. bool
  107. gtm_rwlock::write_lock_generic (gtm_thread *tx)
  108. {
  109. // Try to acquire the write lock.
  110. int w = 0;
  111. if (unlikely (!writers.compare_exchange_strong (w, 1)))
  112. {
  113. // If this is an upgrade, we must not wait for other writers or
  114. // upgrades.
  115. if (tx != 0)
  116. return false;
  117. // There is already a writer. If there are no other waiting writers,
  118. // switch to contended mode. We need seq_cst memory order to make the
  119. // Dekker-style synchronization work.
  120. if (w != 2)
  121. w = writers.exchange (2);
  122. while (w != 0)
  123. {
  124. futex_wait(&writers, 2);
  125. w = writers.exchange (2);
  126. }
  127. }
  128. // We have acquired the writer side of the R/W lock. Now wait for any
  129. // readers that might still be active.
  130. // We don't need an extra barrier here because the CAS and the xchg
  131. // operations have full barrier semantics already.
  132. // TODO In the worst case, this requires one wait/wake pair for each
  133. // active reader. Reduce this!
  134. for (gtm_thread *it = gtm_thread::list_of_threads; it != 0;
  135. it = it->next_thread)
  136. {
  137. if (it == tx)
  138. continue;
  139. // Use a loop here to check reader flags again after waiting.
  140. while (it->shared_state.load (memory_order_relaxed)
  141. != ~(typeof it->shared_state)0)
  142. {
  143. // An active reader. Wait until it has finished. To avoid lost
  144. // wake-ups, we need to use Dekker-like synchronization.
  145. // Note that we can reset writer_readers to zero when we see after
  146. // the barrier that the reader has finished in the meantime;
  147. // however, this is only possible because we are the only writer.
  148. // TODO Spin for a while on this reader flag.
  149. writer_readers.store (1, memory_order_relaxed);
  150. atomic_thread_fence (memory_order_seq_cst);
  151. if (it->shared_state.load (memory_order_relaxed)
  152. != ~(typeof it->shared_state)0)
  153. futex_wait(&writer_readers, 1);
  154. else
  155. writer_readers.store (0, memory_order_relaxed);
  156. }
  157. }
  158. return true;
  159. }
  160. // Acquire a RW lock for writing.
  161. void
  162. gtm_rwlock::write_lock ()
  163. {
  164. write_lock_generic (0);
  165. }
  166. // Upgrade a RW lock that has been locked for reading to a writing lock.
  167. // Do this without possibility of another writer incoming. Return false
  168. // if this attempt fails (i.e. another thread also upgraded).
  169. bool
  170. gtm_rwlock::write_upgrade (gtm_thread *tx)
  171. {
  172. return write_lock_generic (tx);
  173. }
  174. // Has to be called iff the previous upgrade was successful and after it is
  175. // safe for the transaction to not be marked as a reader anymore.
  176. void
  177. gtm_rwlock::write_upgrade_finish (gtm_thread *tx)
  178. {
  179. // We are not a reader anymore. This is only safe to do after we have
  180. // acquired the writer lock.
  181. tx->shared_state.store (-1, memory_order_release);
  182. }
  183. // Release a RW lock from reading.
  184. void
  185. gtm_rwlock::read_unlock (gtm_thread *tx)
  186. {
  187. // We only need release memory order here because of privatization safety
  188. // (this ensures that marking the transaction as inactive happens after
  189. // any prior data accesses by this transaction, and that neither the
  190. // compiler nor the hardware order this store earlier).
  191. // ??? We might be able to avoid this release here if the compiler can't
  192. // merge the release fence with the subsequent seq_cst fence.
  193. tx->shared_state.store (-1, memory_order_release);
  194. // If there is a writer waiting for readers, wake it up. We need the fence
  195. // to avoid lost wake-ups. Furthermore, the privatization safety
  196. // implementation in gtm_thread::try_commit() relies on the existence of
  197. // this seq_cst fence.
  198. // ??? We might not be the last active reader, so the wake-up might happen
  199. // too early. How do we avoid this without slowing down readers too much?
  200. // Each reader could scan the list of txns for other active readers but
  201. // this can result in many cache misses. Use combining instead?
  202. // TODO Sends out one wake-up for each reader in the worst case.
  203. atomic_thread_fence (memory_order_seq_cst);
  204. if (unlikely (writer_readers.load (memory_order_relaxed) > 0))
  205. {
  206. // No additional barrier needed here (see write_unlock()).
  207. writer_readers.store (0, memory_order_relaxed);
  208. futex_wake(&writer_readers, 1);
  209. }
  210. }
  211. // Release a RW lock from writing.
  212. void
  213. gtm_rwlock::write_unlock ()
  214. {
  215. // This needs to have seq_cst memory order.
  216. if (writers.fetch_sub (1) == 2)
  217. {
  218. // There might be waiting writers, so wake them.
  219. writers.store (0, memory_order_relaxed);
  220. if (futex_wake(&writers, 1) == 0)
  221. {
  222. // If we did not wake any waiting writers, we might indeed be the
  223. // last writer (this can happen because write_lock_generic()
  224. // exchanges 0 or 1 to 2 and thus might go to contended mode even if
  225. // no other thread holds the write lock currently). Therefore, we
  226. // have to wake up readers here as well. Execute a barrier after
  227. // the previous relaxed reset of writers (Dekker-style), and fall
  228. // through to the normal reader wake-up code.
  229. atomic_thread_fence (memory_order_seq_cst);
  230. }
  231. else
  232. return;
  233. }
  234. // No waiting writers, so wake up all waiting readers.
  235. // Because the fetch_and_sub is a full barrier already, we don't need
  236. // another barrier here (as in read_unlock()).
  237. if (readers.load (memory_order_relaxed) > 0)
  238. {
  239. // No additional barrier needed here. The previous load must be in
  240. // modification order because of the coherency constraints. Late stores
  241. // by a reader are not a problem because readers do Dekker-style
  242. // synchronization on writers.
  243. readers.store (0, memory_order_relaxed);
  244. futex_wake(&readers, INT_MAX);
  245. }
  246. }
  247. } // namespace GTM