tsan_clock.cc 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428
  1. //===-- tsan_clock.cc -----------------------------------------------------===//
  2. //
  3. // This file is distributed under the University of Illinois Open Source
  4. // License. See LICENSE.TXT for details.
  5. //
  6. //===----------------------------------------------------------------------===//
  7. //
  8. // This file is a part of ThreadSanitizer (TSan), a race detector.
  9. //
  10. //===----------------------------------------------------------------------===//
  11. #include "tsan_clock.h"
  12. #include "tsan_rtl.h"
  13. #include "sanitizer_common/sanitizer_placement_new.h"
  14. // SyncClock and ThreadClock implement vector clocks for sync variables
  15. // (mutexes, atomic variables, file descriptors, etc) and threads, respectively.
  16. // ThreadClock contains fixed-size vector clock for maximum number of threads.
  17. // SyncClock contains growable vector clock for currently necessary number of
  18. // threads.
  19. // Together they implement very simple model of operations, namely:
  20. //
  21. // void ThreadClock::acquire(const SyncClock *src) {
  22. // for (int i = 0; i < kMaxThreads; i++)
  23. // clock[i] = max(clock[i], src->clock[i]);
  24. // }
  25. //
  26. // void ThreadClock::release(SyncClock *dst) const {
  27. // for (int i = 0; i < kMaxThreads; i++)
  28. // dst->clock[i] = max(dst->clock[i], clock[i]);
  29. // }
  30. //
  31. // void ThreadClock::ReleaseStore(SyncClock *dst) const {
  32. // for (int i = 0; i < kMaxThreads; i++)
  33. // dst->clock[i] = clock[i];
  34. // }
  35. //
  36. // void ThreadClock::acq_rel(SyncClock *dst) {
  37. // acquire(dst);
  38. // release(dst);
  39. // }
  40. //
  41. // Conformance to this model is extensively verified in tsan_clock_test.cc.
  42. // However, the implementation is significantly more complex. The complexity
  43. // allows to implement important classes of use cases in O(1) instead of O(N).
  44. //
  45. // The use cases are:
  46. // 1. Singleton/once atomic that has a single release-store operation followed
  47. // by zillions of acquire-loads (the acquire-load is O(1)).
  48. // 2. Thread-local mutex (both lock and unlock can be O(1)).
  49. // 3. Leaf mutex (unlock is O(1)).
  50. // 4. A mutex shared by 2 threads (both lock and unlock can be O(1)).
  51. // 5. An atomic with a single writer (writes can be O(1)).
  52. // The implementation dynamically adopts to workload. So if an atomic is in
  53. // read-only phase, these reads will be O(1); if it later switches to read/write
  54. // phase, the implementation will correctly handle that by switching to O(N).
  55. //
  56. // Thread-safety note: all const operations on SyncClock's are conducted under
  57. // a shared lock; all non-const operations on SyncClock's are conducted under
  58. // an exclusive lock; ThreadClock's are private to respective threads and so
  59. // do not need any protection.
  60. //
  61. // Description of ThreadClock state:
  62. // clk_ - fixed size vector clock.
  63. // nclk_ - effective size of the vector clock (the rest is zeros).
  64. // tid_ - index of the thread associated with he clock ("current thread").
  65. // last_acquire_ - current thread time when it acquired something from
  66. // other threads.
  67. //
  68. // Description of SyncClock state:
  69. // clk_ - variable size vector clock, low kClkBits hold timestamp,
  70. // the remaining bits hold "acquired" flag (the actual value is thread's
  71. // reused counter);
  72. // if acquried == thr->reused_, then the respective thread has already
  73. // acquired this clock (except possibly dirty_tids_).
  74. // dirty_tids_ - holds up to two indeces in the vector clock that other threads
  75. // need to acquire regardless of "acquired" flag value;
  76. // release_store_tid_ - denotes that the clock state is a result of
  77. // release-store operation by the thread with release_store_tid_ index.
  78. // release_store_reused_ - reuse count of release_store_tid_.
  79. // We don't have ThreadState in these methods, so this is an ugly hack that
  80. // works only in C++.
  81. #ifndef TSAN_GO
  82. # define CPP_STAT_INC(typ) StatInc(cur_thread(), typ)
  83. #else
  84. # define CPP_STAT_INC(typ) (void)0
  85. #endif
  86. namespace __tsan {
  87. const unsigned kInvalidTid = (unsigned)-1;
  88. ThreadClock::ThreadClock(unsigned tid, unsigned reused)
  89. : tid_(tid)
  90. , reused_(reused + 1) { // 0 has special meaning
  91. CHECK_LT(tid, kMaxTidInClock);
  92. CHECK_EQ(reused_, ((u64)reused_ << kClkBits) >> kClkBits);
  93. nclk_ = tid_ + 1;
  94. last_acquire_ = 0;
  95. internal_memset(clk_, 0, sizeof(clk_));
  96. clk_[tid_].reused = reused_;
  97. }
  98. void ThreadClock::acquire(ClockCache *c, const SyncClock *src) {
  99. DCHECK(nclk_ <= kMaxTid);
  100. DCHECK(src->size_ <= kMaxTid);
  101. CPP_STAT_INC(StatClockAcquire);
  102. // Check if it's empty -> no need to do anything.
  103. const uptr nclk = src->size_;
  104. if (nclk == 0) {
  105. CPP_STAT_INC(StatClockAcquireEmpty);
  106. return;
  107. }
  108. // Check if we've already acquired src after the last release operation on src
  109. bool acquired = false;
  110. if (nclk > tid_) {
  111. CPP_STAT_INC(StatClockAcquireLarge);
  112. if (src->elem(tid_).reused == reused_) {
  113. CPP_STAT_INC(StatClockAcquireRepeat);
  114. for (unsigned i = 0; i < kDirtyTids; i++) {
  115. unsigned tid = src->dirty_tids_[i];
  116. if (tid != kInvalidTid) {
  117. u64 epoch = src->elem(tid).epoch;
  118. if (clk_[tid].epoch < epoch) {
  119. clk_[tid].epoch = epoch;
  120. acquired = true;
  121. }
  122. }
  123. }
  124. if (acquired) {
  125. CPP_STAT_INC(StatClockAcquiredSomething);
  126. last_acquire_ = clk_[tid_].epoch;
  127. }
  128. return;
  129. }
  130. }
  131. // O(N) acquire.
  132. CPP_STAT_INC(StatClockAcquireFull);
  133. nclk_ = max(nclk_, nclk);
  134. for (uptr i = 0; i < nclk; i++) {
  135. u64 epoch = src->elem(i).epoch;
  136. if (clk_[i].epoch < epoch) {
  137. clk_[i].epoch = epoch;
  138. acquired = true;
  139. }
  140. }
  141. // Remember that this thread has acquired this clock.
  142. if (nclk > tid_)
  143. src->elem(tid_).reused = reused_;
  144. if (acquired) {
  145. CPP_STAT_INC(StatClockAcquiredSomething);
  146. last_acquire_ = clk_[tid_].epoch;
  147. }
  148. }
  149. void ThreadClock::release(ClockCache *c, SyncClock *dst) const {
  150. DCHECK_LE(nclk_, kMaxTid);
  151. DCHECK_LE(dst->size_, kMaxTid);
  152. if (dst->size_ == 0) {
  153. // ReleaseStore will correctly set release_store_tid_,
  154. // which can be important for future operations.
  155. ReleaseStore(c, dst);
  156. return;
  157. }
  158. CPP_STAT_INC(StatClockRelease);
  159. // Check if we need to resize dst.
  160. if (dst->size_ < nclk_)
  161. dst->Resize(c, nclk_);
  162. // Check if we had not acquired anything from other threads
  163. // since the last release on dst. If so, we need to update
  164. // only dst->elem(tid_).
  165. if (dst->elem(tid_).epoch > last_acquire_) {
  166. UpdateCurrentThread(dst);
  167. if (dst->release_store_tid_ != tid_ ||
  168. dst->release_store_reused_ != reused_)
  169. dst->release_store_tid_ = kInvalidTid;
  170. return;
  171. }
  172. // O(N) release.
  173. CPP_STAT_INC(StatClockReleaseFull);
  174. // First, remember whether we've acquired dst.
  175. bool acquired = IsAlreadyAcquired(dst);
  176. if (acquired)
  177. CPP_STAT_INC(StatClockReleaseAcquired);
  178. // Update dst->clk_.
  179. for (uptr i = 0; i < nclk_; i++) {
  180. ClockElem &ce = dst->elem(i);
  181. ce.epoch = max(ce.epoch, clk_[i].epoch);
  182. ce.reused = 0;
  183. }
  184. // Clear 'acquired' flag in the remaining elements.
  185. if (nclk_ < dst->size_)
  186. CPP_STAT_INC(StatClockReleaseClearTail);
  187. for (uptr i = nclk_; i < dst->size_; i++)
  188. dst->elem(i).reused = 0;
  189. for (unsigned i = 0; i < kDirtyTids; i++)
  190. dst->dirty_tids_[i] = kInvalidTid;
  191. dst->release_store_tid_ = kInvalidTid;
  192. dst->release_store_reused_ = 0;
  193. // If we've acquired dst, remember this fact,
  194. // so that we don't need to acquire it on next acquire.
  195. if (acquired)
  196. dst->elem(tid_).reused = reused_;
  197. }
  198. void ThreadClock::ReleaseStore(ClockCache *c, SyncClock *dst) const {
  199. DCHECK(nclk_ <= kMaxTid);
  200. DCHECK(dst->size_ <= kMaxTid);
  201. CPP_STAT_INC(StatClockStore);
  202. // Check if we need to resize dst.
  203. if (dst->size_ < nclk_)
  204. dst->Resize(c, nclk_);
  205. if (dst->release_store_tid_ == tid_ &&
  206. dst->release_store_reused_ == reused_ &&
  207. dst->elem(tid_).epoch > last_acquire_) {
  208. CPP_STAT_INC(StatClockStoreFast);
  209. UpdateCurrentThread(dst);
  210. return;
  211. }
  212. // O(N) release-store.
  213. CPP_STAT_INC(StatClockStoreFull);
  214. for (uptr i = 0; i < nclk_; i++) {
  215. ClockElem &ce = dst->elem(i);
  216. ce.epoch = clk_[i].epoch;
  217. ce.reused = 0;
  218. }
  219. // Clear the tail of dst->clk_.
  220. if (nclk_ < dst->size_) {
  221. for (uptr i = nclk_; i < dst->size_; i++) {
  222. ClockElem &ce = dst->elem(i);
  223. ce.epoch = 0;
  224. ce.reused = 0;
  225. }
  226. CPP_STAT_INC(StatClockStoreTail);
  227. }
  228. for (unsigned i = 0; i < kDirtyTids; i++)
  229. dst->dirty_tids_[i] = kInvalidTid;
  230. dst->release_store_tid_ = tid_;
  231. dst->release_store_reused_ = reused_;
  232. // Rememeber that we don't need to acquire it in future.
  233. dst->elem(tid_).reused = reused_;
  234. }
  235. void ThreadClock::acq_rel(ClockCache *c, SyncClock *dst) {
  236. CPP_STAT_INC(StatClockAcquireRelease);
  237. acquire(c, dst);
  238. ReleaseStore(c, dst);
  239. }
  240. // Updates only single element related to the current thread in dst->clk_.
  241. void ThreadClock::UpdateCurrentThread(SyncClock *dst) const {
  242. // Update the threads time, but preserve 'acquired' flag.
  243. dst->elem(tid_).epoch = clk_[tid_].epoch;
  244. for (unsigned i = 0; i < kDirtyTids; i++) {
  245. if (dst->dirty_tids_[i] == tid_) {
  246. CPP_STAT_INC(StatClockReleaseFast1);
  247. return;
  248. }
  249. if (dst->dirty_tids_[i] == kInvalidTid) {
  250. CPP_STAT_INC(StatClockReleaseFast2);
  251. dst->dirty_tids_[i] = tid_;
  252. return;
  253. }
  254. }
  255. // Reset all 'acquired' flags, O(N).
  256. CPP_STAT_INC(StatClockReleaseSlow);
  257. for (uptr i = 0; i < dst->size_; i++)
  258. dst->elem(i).reused = 0;
  259. for (unsigned i = 0; i < kDirtyTids; i++)
  260. dst->dirty_tids_[i] = kInvalidTid;
  261. }
  262. // Checks whether the current threads has already acquired src.
  263. bool ThreadClock::IsAlreadyAcquired(const SyncClock *src) const {
  264. if (src->elem(tid_).reused != reused_)
  265. return false;
  266. for (unsigned i = 0; i < kDirtyTids; i++) {
  267. unsigned tid = src->dirty_tids_[i];
  268. if (tid != kInvalidTid) {
  269. if (clk_[tid].epoch < src->elem(tid).epoch)
  270. return false;
  271. }
  272. }
  273. return true;
  274. }
  275. void SyncClock::Resize(ClockCache *c, uptr nclk) {
  276. CPP_STAT_INC(StatClockReleaseResize);
  277. if (RoundUpTo(nclk, ClockBlock::kClockCount) <=
  278. RoundUpTo(size_, ClockBlock::kClockCount)) {
  279. // Growing within the same block.
  280. // Memory is already allocated, just increase the size.
  281. size_ = nclk;
  282. return;
  283. }
  284. if (nclk <= ClockBlock::kClockCount) {
  285. // Grow from 0 to one-level table.
  286. CHECK_EQ(size_, 0);
  287. CHECK_EQ(tab_, 0);
  288. CHECK_EQ(tab_idx_, 0);
  289. size_ = nclk;
  290. tab_idx_ = ctx->clock_alloc.Alloc(c);
  291. tab_ = ctx->clock_alloc.Map(tab_idx_);
  292. internal_memset(tab_, 0, sizeof(*tab_));
  293. return;
  294. }
  295. // Growing two-level table.
  296. if (size_ == 0) {
  297. // Allocate first level table.
  298. tab_idx_ = ctx->clock_alloc.Alloc(c);
  299. tab_ = ctx->clock_alloc.Map(tab_idx_);
  300. internal_memset(tab_, 0, sizeof(*tab_));
  301. } else if (size_ <= ClockBlock::kClockCount) {
  302. // Transform one-level table to two-level table.
  303. u32 old = tab_idx_;
  304. tab_idx_ = ctx->clock_alloc.Alloc(c);
  305. tab_ = ctx->clock_alloc.Map(tab_idx_);
  306. internal_memset(tab_, 0, sizeof(*tab_));
  307. tab_->table[0] = old;
  308. }
  309. // At this point we have first level table allocated.
  310. // Add second level tables as necessary.
  311. for (uptr i = RoundUpTo(size_, ClockBlock::kClockCount);
  312. i < nclk; i += ClockBlock::kClockCount) {
  313. u32 idx = ctx->clock_alloc.Alloc(c);
  314. ClockBlock *cb = ctx->clock_alloc.Map(idx);
  315. internal_memset(cb, 0, sizeof(*cb));
  316. CHECK_EQ(tab_->table[i/ClockBlock::kClockCount], 0);
  317. tab_->table[i/ClockBlock::kClockCount] = idx;
  318. }
  319. size_ = nclk;
  320. }
  321. // Sets a single element in the vector clock.
  322. // This function is called only from weird places like AcquireGlobal.
  323. void ThreadClock::set(unsigned tid, u64 v) {
  324. DCHECK_LT(tid, kMaxTid);
  325. DCHECK_GE(v, clk_[tid].epoch);
  326. clk_[tid].epoch = v;
  327. if (nclk_ <= tid)
  328. nclk_ = tid + 1;
  329. last_acquire_ = clk_[tid_].epoch;
  330. }
  331. void ThreadClock::DebugDump(int(*printf)(const char *s, ...)) {
  332. printf("clock=[");
  333. for (uptr i = 0; i < nclk_; i++)
  334. printf("%s%llu", i == 0 ? "" : ",", clk_[i].epoch);
  335. printf("] reused=[");
  336. for (uptr i = 0; i < nclk_; i++)
  337. printf("%s%llu", i == 0 ? "" : ",", clk_[i].reused);
  338. printf("] tid=%u/%u last_acq=%llu",
  339. tid_, reused_, last_acquire_);
  340. }
  341. SyncClock::SyncClock()
  342. : release_store_tid_(kInvalidTid)
  343. , release_store_reused_()
  344. , tab_()
  345. , tab_idx_()
  346. , size_() {
  347. for (uptr i = 0; i < kDirtyTids; i++)
  348. dirty_tids_[i] = kInvalidTid;
  349. }
  350. SyncClock::~SyncClock() {
  351. // Reset must be called before dtor.
  352. CHECK_EQ(size_, 0);
  353. CHECK_EQ(tab_, 0);
  354. CHECK_EQ(tab_idx_, 0);
  355. }
  356. void SyncClock::Reset(ClockCache *c) {
  357. if (size_ == 0) {
  358. // nothing
  359. } else if (size_ <= ClockBlock::kClockCount) {
  360. // One-level table.
  361. ctx->clock_alloc.Free(c, tab_idx_);
  362. } else {
  363. // Two-level table.
  364. for (uptr i = 0; i < size_; i += ClockBlock::kClockCount)
  365. ctx->clock_alloc.Free(c, tab_->table[i / ClockBlock::kClockCount]);
  366. ctx->clock_alloc.Free(c, tab_idx_);
  367. }
  368. tab_ = 0;
  369. tab_idx_ = 0;
  370. size_ = 0;
  371. release_store_tid_ = kInvalidTid;
  372. release_store_reused_ = 0;
  373. for (uptr i = 0; i < kDirtyTids; i++)
  374. dirty_tids_[i] = kInvalidTid;
  375. }
  376. ClockElem &SyncClock::elem(unsigned tid) const {
  377. DCHECK_LT(tid, size_);
  378. if (size_ <= ClockBlock::kClockCount)
  379. return tab_->clock[tid];
  380. u32 idx = tab_->table[tid / ClockBlock::kClockCount];
  381. ClockBlock *cb = ctx->clock_alloc.Map(idx);
  382. return cb->clock[tid % ClockBlock::kClockCount];
  383. }
  384. void SyncClock::DebugDump(int(*printf)(const char *s, ...)) {
  385. printf("clock=[");
  386. for (uptr i = 0; i < size_; i++)
  387. printf("%s%llu", i == 0 ? "" : ",", elem(i).epoch);
  388. printf("] reused=[");
  389. for (uptr i = 0; i < size_; i++)
  390. printf("%s%llu", i == 0 ? "" : ",", elem(i).reused);
  391. printf("] release_store_tid=%d/%d dirty_tids=%d/%d",
  392. release_store_tid_, release_store_reused_,
  393. dirty_tids_[0], dirty_tids_[1]);
  394. }
  395. } // namespace __tsan