util.go 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181
  1. // Copyright (C) 2014 The Syncthing Authors.
  2. //
  3. // This Source Code Form is subject to the terms of the Mozilla Public
  4. // License, v. 2.0. If a copy of the MPL was not distributed with this file,
  5. // You can obtain one at https://mozilla.org/MPL/2.0/.
  6. package model
  7. import (
  8. "context"
  9. "errors"
  10. "fmt"
  11. "path/filepath"
  12. "strings"
  13. "sync"
  14. "time"
  15. "github.com/prometheus/client_golang/prometheus"
  16. "github.com/syncthing/syncthing/lib/events"
  17. "github.com/syncthing/syncthing/lib/fs"
  18. "github.com/syncthing/syncthing/lib/ur"
  19. )
  20. type Holdable interface {
  21. Holders() string
  22. }
  23. func newDeadlockDetector(timeout time.Duration, evLogger events.Logger, fatal func(error)) *deadlockDetector {
  24. return &deadlockDetector{
  25. warnTimeout: timeout,
  26. fatalTimeout: 10 * timeout,
  27. lockers: make(map[string]sync.Locker),
  28. evLogger: evLogger,
  29. fatal: fatal,
  30. }
  31. }
  32. type deadlockDetector struct {
  33. warnTimeout, fatalTimeout time.Duration
  34. lockers map[string]sync.Locker
  35. evLogger events.Logger
  36. fatal func(error)
  37. }
  38. func (d *deadlockDetector) Watch(name string, mut sync.Locker) {
  39. d.lockers[name] = mut
  40. go func() {
  41. for {
  42. time.Sleep(d.warnTimeout / 4)
  43. done := make(chan struct{}, 1)
  44. go func() {
  45. mut.Lock()
  46. _ = 1 // empty critical section
  47. mut.Unlock()
  48. done <- struct{}{}
  49. }()
  50. d.watchInner(name, done)
  51. }
  52. }()
  53. }
  54. func (d *deadlockDetector) watchInner(name string, done chan struct{}) {
  55. warn := time.NewTimer(d.warnTimeout)
  56. fatal := time.NewTimer(d.fatalTimeout)
  57. defer func() {
  58. warn.Stop()
  59. fatal.Stop()
  60. }()
  61. select {
  62. case <-warn.C:
  63. failure := ur.FailureDataWithGoroutines(fmt.Sprintf("potential deadlock detected at %s (short timeout)", name))
  64. failure.Extra["timeout"] = d.warnTimeout.String()
  65. d.evLogger.Log(events.Failure, failure)
  66. case <-done:
  67. return
  68. }
  69. select {
  70. case <-fatal.C:
  71. err := fmt.Errorf("potential deadlock detected at %s (long timeout)", name)
  72. failure := ur.FailureDataWithGoroutines(err.Error())
  73. failure.Extra["timeout"] = d.fatalTimeout.String()
  74. others := d.otherHolders()
  75. failure.Extra["other-holders"] = others
  76. d.evLogger.Log(events.Failure, failure)
  77. d.fatal(err)
  78. // Give it a minute to shut down gracefully, maybe shutting down
  79. // can get out of the deadlock (or it's not really a deadlock).
  80. time.Sleep(time.Minute)
  81. panic(fmt.Sprintf("%v:\n%v", err, others))
  82. case <-done:
  83. }
  84. }
  85. func (d *deadlockDetector) otherHolders() string {
  86. var b strings.Builder
  87. for otherName, otherMut := range d.lockers {
  88. if otherHolder, ok := otherMut.(Holdable); ok {
  89. b.WriteString("===" + otherName + "===\n" + otherHolder.Holders() + "\n")
  90. }
  91. }
  92. return b.String()
  93. }
  94. // inWritableDir calls fn(path), while making sure that the directory
  95. // containing `path` is writable for the duration of the call.
  96. func inWritableDir(fn func(string) error, targetFs fs.Filesystem, path string, ignorePerms bool) error {
  97. dir := filepath.Dir(path)
  98. info, err := targetFs.Stat(dir)
  99. if err != nil {
  100. return err
  101. }
  102. if !info.IsDir() {
  103. return errors.New("Not a directory: " + path)
  104. }
  105. const permBits = fs.ModePerm | fs.ModeSetuid | fs.ModeSetgid | fs.ModeSticky
  106. var parentErr error
  107. if mode := info.Mode() & permBits; mode&0o200 == 0 {
  108. // A non-writeable directory (for this user; we assume that's the
  109. // relevant part). Temporarily change the mode so we can delete the
  110. // file or directory inside it.
  111. parentErr = targetFs.Chmod(dir, mode|0o700)
  112. if parentErr != nil {
  113. l.Debugf("Failed to make parent directory writable: %v", parentErr)
  114. } else {
  115. // Chmod succeeded, we should change the permissions back on the way
  116. // out. If we fail we log the error as we have irrevocably messed up
  117. // at this point. :( (The operation we were called to wrap has
  118. // succeeded or failed on its own so returning an error to the
  119. // caller is inappropriate.)
  120. defer func() {
  121. if err := targetFs.Chmod(dir, mode); err != nil && !fs.IsNotExist(err) {
  122. logFn := l.Warnln
  123. if ignorePerms {
  124. logFn = l.Debugln
  125. }
  126. logFn("Failed to restore directory permissions after gaining write access:", err)
  127. }
  128. }()
  129. }
  130. }
  131. err = fn(path)
  132. if fs.IsPermission(err) && parentErr != nil {
  133. err = fmt.Errorf("error after failing to make parent directory writable: %w", err)
  134. }
  135. return err
  136. }
  137. // addTimeUntilCancelled adds time to the counter for the duration of the
  138. // Context. We do this piecemeal so that polling the counter during a long
  139. // operation shows a relevant value, instead of the counter just increasing
  140. // by a large amount at the end of the operation.
  141. func addTimeUntilCancelled(ctx context.Context, counter prometheus.Counter) {
  142. t0 := time.Now()
  143. defer func() {
  144. if dur := time.Since(t0).Seconds(); dur > 0 {
  145. counter.Add(dur)
  146. }
  147. }()
  148. ticker := time.NewTicker(time.Second)
  149. defer ticker.Stop()
  150. for {
  151. select {
  152. case t := <-ticker.C:
  153. if dur := t.Sub(t0).Seconds(); dur > 0 {
  154. counter.Add(dur)
  155. }
  156. t0 = t
  157. case <-ctx.Done():
  158. return
  159. }
  160. }
  161. }