123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428 |
- //===-- tsan_clock.cc -----------------------------------------------------===//
- //
- // This file is distributed under the University of Illinois Open Source
- // License. See LICENSE.TXT for details.
- //
- //===----------------------------------------------------------------------===//
- //
- // This file is a part of ThreadSanitizer (TSan), a race detector.
- //
- //===----------------------------------------------------------------------===//
- #include "tsan_clock.h"
- #include "tsan_rtl.h"
- #include "sanitizer_common/sanitizer_placement_new.h"
- // SyncClock and ThreadClock implement vector clocks for sync variables
- // (mutexes, atomic variables, file descriptors, etc) and threads, respectively.
- // ThreadClock contains fixed-size vector clock for maximum number of threads.
- // SyncClock contains growable vector clock for currently necessary number of
- // threads.
- // Together they implement very simple model of operations, namely:
- //
- // void ThreadClock::acquire(const SyncClock *src) {
- // for (int i = 0; i < kMaxThreads; i++)
- // clock[i] = max(clock[i], src->clock[i]);
- // }
- //
- // void ThreadClock::release(SyncClock *dst) const {
- // for (int i = 0; i < kMaxThreads; i++)
- // dst->clock[i] = max(dst->clock[i], clock[i]);
- // }
- //
- // void ThreadClock::ReleaseStore(SyncClock *dst) const {
- // for (int i = 0; i < kMaxThreads; i++)
- // dst->clock[i] = clock[i];
- // }
- //
- // void ThreadClock::acq_rel(SyncClock *dst) {
- // acquire(dst);
- // release(dst);
- // }
- //
- // Conformance to this model is extensively verified in tsan_clock_test.cc.
- // However, the implementation is significantly more complex. The complexity
- // allows to implement important classes of use cases in O(1) instead of O(N).
- //
- // The use cases are:
- // 1. Singleton/once atomic that has a single release-store operation followed
- // by zillions of acquire-loads (the acquire-load is O(1)).
- // 2. Thread-local mutex (both lock and unlock can be O(1)).
- // 3. Leaf mutex (unlock is O(1)).
- // 4. A mutex shared by 2 threads (both lock and unlock can be O(1)).
- // 5. An atomic with a single writer (writes can be O(1)).
- // The implementation dynamically adopts to workload. So if an atomic is in
- // read-only phase, these reads will be O(1); if it later switches to read/write
- // phase, the implementation will correctly handle that by switching to O(N).
- //
- // Thread-safety note: all const operations on SyncClock's are conducted under
- // a shared lock; all non-const operations on SyncClock's are conducted under
- // an exclusive lock; ThreadClock's are private to respective threads and so
- // do not need any protection.
- //
- // Description of ThreadClock state:
- // clk_ - fixed size vector clock.
- // nclk_ - effective size of the vector clock (the rest is zeros).
- // tid_ - index of the thread associated with he clock ("current thread").
- // last_acquire_ - current thread time when it acquired something from
- // other threads.
- //
- // Description of SyncClock state:
- // clk_ - variable size vector clock, low kClkBits hold timestamp,
- // the remaining bits hold "acquired" flag (the actual value is thread's
- // reused counter);
- // if acquried == thr->reused_, then the respective thread has already
- // acquired this clock (except possibly dirty_tids_).
- // dirty_tids_ - holds up to two indeces in the vector clock that other threads
- // need to acquire regardless of "acquired" flag value;
- // release_store_tid_ - denotes that the clock state is a result of
- // release-store operation by the thread with release_store_tid_ index.
- // release_store_reused_ - reuse count of release_store_tid_.
- // We don't have ThreadState in these methods, so this is an ugly hack that
- // works only in C++.
- #ifndef TSAN_GO
- # define CPP_STAT_INC(typ) StatInc(cur_thread(), typ)
- #else
- # define CPP_STAT_INC(typ) (void)0
- #endif
- namespace __tsan {
- const unsigned kInvalidTid = (unsigned)-1;
- ThreadClock::ThreadClock(unsigned tid, unsigned reused)
- : tid_(tid)
- , reused_(reused + 1) { // 0 has special meaning
- CHECK_LT(tid, kMaxTidInClock);
- CHECK_EQ(reused_, ((u64)reused_ << kClkBits) >> kClkBits);
- nclk_ = tid_ + 1;
- last_acquire_ = 0;
- internal_memset(clk_, 0, sizeof(clk_));
- clk_[tid_].reused = reused_;
- }
- void ThreadClock::acquire(ClockCache *c, const SyncClock *src) {
- DCHECK(nclk_ <= kMaxTid);
- DCHECK(src->size_ <= kMaxTid);
- CPP_STAT_INC(StatClockAcquire);
- // Check if it's empty -> no need to do anything.
- const uptr nclk = src->size_;
- if (nclk == 0) {
- CPP_STAT_INC(StatClockAcquireEmpty);
- return;
- }
- // Check if we've already acquired src after the last release operation on src
- bool acquired = false;
- if (nclk > tid_) {
- CPP_STAT_INC(StatClockAcquireLarge);
- if (src->elem(tid_).reused == reused_) {
- CPP_STAT_INC(StatClockAcquireRepeat);
- for (unsigned i = 0; i < kDirtyTids; i++) {
- unsigned tid = src->dirty_tids_[i];
- if (tid != kInvalidTid) {
- u64 epoch = src->elem(tid).epoch;
- if (clk_[tid].epoch < epoch) {
- clk_[tid].epoch = epoch;
- acquired = true;
- }
- }
- }
- if (acquired) {
- CPP_STAT_INC(StatClockAcquiredSomething);
- last_acquire_ = clk_[tid_].epoch;
- }
- return;
- }
- }
- // O(N) acquire.
- CPP_STAT_INC(StatClockAcquireFull);
- nclk_ = max(nclk_, nclk);
- for (uptr i = 0; i < nclk; i++) {
- u64 epoch = src->elem(i).epoch;
- if (clk_[i].epoch < epoch) {
- clk_[i].epoch = epoch;
- acquired = true;
- }
- }
- // Remember that this thread has acquired this clock.
- if (nclk > tid_)
- src->elem(tid_).reused = reused_;
- if (acquired) {
- CPP_STAT_INC(StatClockAcquiredSomething);
- last_acquire_ = clk_[tid_].epoch;
- }
- }
- void ThreadClock::release(ClockCache *c, SyncClock *dst) const {
- DCHECK_LE(nclk_, kMaxTid);
- DCHECK_LE(dst->size_, kMaxTid);
- if (dst->size_ == 0) {
- // ReleaseStore will correctly set release_store_tid_,
- // which can be important for future operations.
- ReleaseStore(c, dst);
- return;
- }
- CPP_STAT_INC(StatClockRelease);
- // Check if we need to resize dst.
- if (dst->size_ < nclk_)
- dst->Resize(c, nclk_);
- // Check if we had not acquired anything from other threads
- // since the last release on dst. If so, we need to update
- // only dst->elem(tid_).
- if (dst->elem(tid_).epoch > last_acquire_) {
- UpdateCurrentThread(dst);
- if (dst->release_store_tid_ != tid_ ||
- dst->release_store_reused_ != reused_)
- dst->release_store_tid_ = kInvalidTid;
- return;
- }
- // O(N) release.
- CPP_STAT_INC(StatClockReleaseFull);
- // First, remember whether we've acquired dst.
- bool acquired = IsAlreadyAcquired(dst);
- if (acquired)
- CPP_STAT_INC(StatClockReleaseAcquired);
- // Update dst->clk_.
- for (uptr i = 0; i < nclk_; i++) {
- ClockElem &ce = dst->elem(i);
- ce.epoch = max(ce.epoch, clk_[i].epoch);
- ce.reused = 0;
- }
- // Clear 'acquired' flag in the remaining elements.
- if (nclk_ < dst->size_)
- CPP_STAT_INC(StatClockReleaseClearTail);
- for (uptr i = nclk_; i < dst->size_; i++)
- dst->elem(i).reused = 0;
- for (unsigned i = 0; i < kDirtyTids; i++)
- dst->dirty_tids_[i] = kInvalidTid;
- dst->release_store_tid_ = kInvalidTid;
- dst->release_store_reused_ = 0;
- // If we've acquired dst, remember this fact,
- // so that we don't need to acquire it on next acquire.
- if (acquired)
- dst->elem(tid_).reused = reused_;
- }
- void ThreadClock::ReleaseStore(ClockCache *c, SyncClock *dst) const {
- DCHECK(nclk_ <= kMaxTid);
- DCHECK(dst->size_ <= kMaxTid);
- CPP_STAT_INC(StatClockStore);
- // Check if we need to resize dst.
- if (dst->size_ < nclk_)
- dst->Resize(c, nclk_);
- if (dst->release_store_tid_ == tid_ &&
- dst->release_store_reused_ == reused_ &&
- dst->elem(tid_).epoch > last_acquire_) {
- CPP_STAT_INC(StatClockStoreFast);
- UpdateCurrentThread(dst);
- return;
- }
- // O(N) release-store.
- CPP_STAT_INC(StatClockStoreFull);
- for (uptr i = 0; i < nclk_; i++) {
- ClockElem &ce = dst->elem(i);
- ce.epoch = clk_[i].epoch;
- ce.reused = 0;
- }
- // Clear the tail of dst->clk_.
- if (nclk_ < dst->size_) {
- for (uptr i = nclk_; i < dst->size_; i++) {
- ClockElem &ce = dst->elem(i);
- ce.epoch = 0;
- ce.reused = 0;
- }
- CPP_STAT_INC(StatClockStoreTail);
- }
- for (unsigned i = 0; i < kDirtyTids; i++)
- dst->dirty_tids_[i] = kInvalidTid;
- dst->release_store_tid_ = tid_;
- dst->release_store_reused_ = reused_;
- // Rememeber that we don't need to acquire it in future.
- dst->elem(tid_).reused = reused_;
- }
- void ThreadClock::acq_rel(ClockCache *c, SyncClock *dst) {
- CPP_STAT_INC(StatClockAcquireRelease);
- acquire(c, dst);
- ReleaseStore(c, dst);
- }
- // Updates only single element related to the current thread in dst->clk_.
- void ThreadClock::UpdateCurrentThread(SyncClock *dst) const {
- // Update the threads time, but preserve 'acquired' flag.
- dst->elem(tid_).epoch = clk_[tid_].epoch;
- for (unsigned i = 0; i < kDirtyTids; i++) {
- if (dst->dirty_tids_[i] == tid_) {
- CPP_STAT_INC(StatClockReleaseFast1);
- return;
- }
- if (dst->dirty_tids_[i] == kInvalidTid) {
- CPP_STAT_INC(StatClockReleaseFast2);
- dst->dirty_tids_[i] = tid_;
- return;
- }
- }
- // Reset all 'acquired' flags, O(N).
- CPP_STAT_INC(StatClockReleaseSlow);
- for (uptr i = 0; i < dst->size_; i++)
- dst->elem(i).reused = 0;
- for (unsigned i = 0; i < kDirtyTids; i++)
- dst->dirty_tids_[i] = kInvalidTid;
- }
- // Checks whether the current threads has already acquired src.
- bool ThreadClock::IsAlreadyAcquired(const SyncClock *src) const {
- if (src->elem(tid_).reused != reused_)
- return false;
- for (unsigned i = 0; i < kDirtyTids; i++) {
- unsigned tid = src->dirty_tids_[i];
- if (tid != kInvalidTid) {
- if (clk_[tid].epoch < src->elem(tid).epoch)
- return false;
- }
- }
- return true;
- }
- void SyncClock::Resize(ClockCache *c, uptr nclk) {
- CPP_STAT_INC(StatClockReleaseResize);
- if (RoundUpTo(nclk, ClockBlock::kClockCount) <=
- RoundUpTo(size_, ClockBlock::kClockCount)) {
- // Growing within the same block.
- // Memory is already allocated, just increase the size.
- size_ = nclk;
- return;
- }
- if (nclk <= ClockBlock::kClockCount) {
- // Grow from 0 to one-level table.
- CHECK_EQ(size_, 0);
- CHECK_EQ(tab_, 0);
- CHECK_EQ(tab_idx_, 0);
- size_ = nclk;
- tab_idx_ = ctx->clock_alloc.Alloc(c);
- tab_ = ctx->clock_alloc.Map(tab_idx_);
- internal_memset(tab_, 0, sizeof(*tab_));
- return;
- }
- // Growing two-level table.
- if (size_ == 0) {
- // Allocate first level table.
- tab_idx_ = ctx->clock_alloc.Alloc(c);
- tab_ = ctx->clock_alloc.Map(tab_idx_);
- internal_memset(tab_, 0, sizeof(*tab_));
- } else if (size_ <= ClockBlock::kClockCount) {
- // Transform one-level table to two-level table.
- u32 old = tab_idx_;
- tab_idx_ = ctx->clock_alloc.Alloc(c);
- tab_ = ctx->clock_alloc.Map(tab_idx_);
- internal_memset(tab_, 0, sizeof(*tab_));
- tab_->table[0] = old;
- }
- // At this point we have first level table allocated.
- // Add second level tables as necessary.
- for (uptr i = RoundUpTo(size_, ClockBlock::kClockCount);
- i < nclk; i += ClockBlock::kClockCount) {
- u32 idx = ctx->clock_alloc.Alloc(c);
- ClockBlock *cb = ctx->clock_alloc.Map(idx);
- internal_memset(cb, 0, sizeof(*cb));
- CHECK_EQ(tab_->table[i/ClockBlock::kClockCount], 0);
- tab_->table[i/ClockBlock::kClockCount] = idx;
- }
- size_ = nclk;
- }
- // Sets a single element in the vector clock.
- // This function is called only from weird places like AcquireGlobal.
- void ThreadClock::set(unsigned tid, u64 v) {
- DCHECK_LT(tid, kMaxTid);
- DCHECK_GE(v, clk_[tid].epoch);
- clk_[tid].epoch = v;
- if (nclk_ <= tid)
- nclk_ = tid + 1;
- last_acquire_ = clk_[tid_].epoch;
- }
- void ThreadClock::DebugDump(int(*printf)(const char *s, ...)) {
- printf("clock=[");
- for (uptr i = 0; i < nclk_; i++)
- printf("%s%llu", i == 0 ? "" : ",", clk_[i].epoch);
- printf("] reused=[");
- for (uptr i = 0; i < nclk_; i++)
- printf("%s%llu", i == 0 ? "" : ",", clk_[i].reused);
- printf("] tid=%u/%u last_acq=%llu",
- tid_, reused_, last_acquire_);
- }
- SyncClock::SyncClock()
- : release_store_tid_(kInvalidTid)
- , release_store_reused_()
- , tab_()
- , tab_idx_()
- , size_() {
- for (uptr i = 0; i < kDirtyTids; i++)
- dirty_tids_[i] = kInvalidTid;
- }
- SyncClock::~SyncClock() {
- // Reset must be called before dtor.
- CHECK_EQ(size_, 0);
- CHECK_EQ(tab_, 0);
- CHECK_EQ(tab_idx_, 0);
- }
- void SyncClock::Reset(ClockCache *c) {
- if (size_ == 0) {
- // nothing
- } else if (size_ <= ClockBlock::kClockCount) {
- // One-level table.
- ctx->clock_alloc.Free(c, tab_idx_);
- } else {
- // Two-level table.
- for (uptr i = 0; i < size_; i += ClockBlock::kClockCount)
- ctx->clock_alloc.Free(c, tab_->table[i / ClockBlock::kClockCount]);
- ctx->clock_alloc.Free(c, tab_idx_);
- }
- tab_ = 0;
- tab_idx_ = 0;
- size_ = 0;
- release_store_tid_ = kInvalidTid;
- release_store_reused_ = 0;
- for (uptr i = 0; i < kDirtyTids; i++)
- dirty_tids_[i] = kInvalidTid;
- }
- ClockElem &SyncClock::elem(unsigned tid) const {
- DCHECK_LT(tid, size_);
- if (size_ <= ClockBlock::kClockCount)
- return tab_->clock[tid];
- u32 idx = tab_->table[tid / ClockBlock::kClockCount];
- ClockBlock *cb = ctx->clock_alloc.Map(idx);
- return cb->clock[tid % ClockBlock::kClockCount];
- }
- void SyncClock::DebugDump(int(*printf)(const char *s, ...)) {
- printf("clock=[");
- for (uptr i = 0; i < size_; i++)
- printf("%s%llu", i == 0 ? "" : ",", elem(i).epoch);
- printf("] reused=[");
- for (uptr i = 0; i < size_; i++)
- printf("%s%llu", i == 0 ? "" : ",", elem(i).reused);
- printf("] release_store_tid=%d/%d dirty_tids=%d/%d",
- release_store_tid_, release_store_reused_,
- dirty_tids_[0], dirty_tids_[1]);
- }
- } // namespace __tsan
|