svcutil.go 5.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241
  1. // Copyright (C) 2016 The Syncthing Authors.
  2. //
  3. // This Source Code Form is subject to the terms of the Mozilla Public
  4. // License, v. 2.0. If a copy of the MPL was not distributed with this file,
  5. // You can obtain one at https://mozilla.org/MPL/2.0/.
  6. package svcutil
  7. import (
  8. "context"
  9. "errors"
  10. "fmt"
  11. "time"
  12. "github.com/syncthing/syncthing/lib/logger"
  13. "github.com/syncthing/syncthing/lib/sync"
  14. "github.com/thejerf/suture/v4"
  15. )
  16. const ServiceTimeout = 10 * time.Second
  17. type FatalErr struct {
  18. Err error
  19. Status ExitStatus
  20. }
  21. // AsFatalErr wraps the given error creating a FatalErr. If the given error
  22. // already is of type FatalErr, it is not wrapped again.
  23. func AsFatalErr(err error, status ExitStatus) *FatalErr {
  24. var ferr *FatalErr
  25. if errors.As(err, &ferr) {
  26. return ferr
  27. }
  28. return &FatalErr{
  29. Err: err,
  30. Status: status,
  31. }
  32. }
  33. func IsFatal(err error) bool {
  34. ferr := &FatalErr{}
  35. return errors.As(err, &ferr)
  36. }
  37. func (e *FatalErr) Error() string {
  38. return e.Err.Error()
  39. }
  40. func (e *FatalErr) Unwrap() error {
  41. return e.Err
  42. }
  43. func (*FatalErr) Is(target error) bool {
  44. return target == suture.ErrTerminateSupervisorTree
  45. }
  46. // NoRestartErr wraps the given error err (which may be nil) to make sure that
  47. // `errors.Is(err, suture.ErrDoNotRestart) == true`.
  48. func NoRestartErr(err error) error {
  49. if err == nil {
  50. return suture.ErrDoNotRestart
  51. }
  52. return &noRestartErr{err}
  53. }
  54. type noRestartErr struct {
  55. err error
  56. }
  57. func (e *noRestartErr) Error() string {
  58. return e.err.Error()
  59. }
  60. func (e *noRestartErr) Unwrap() error {
  61. return e.err
  62. }
  63. func (*noRestartErr) Is(target error) bool {
  64. return target == suture.ErrDoNotRestart
  65. }
  66. type ExitStatus int
  67. const (
  68. ExitSuccess ExitStatus = 0
  69. ExitError ExitStatus = 1
  70. ExitNoUpgradeAvailable ExitStatus = 2
  71. ExitRestart ExitStatus = 3
  72. ExitUpgrade ExitStatus = 4
  73. )
  74. func (s ExitStatus) AsInt() int {
  75. return int(s)
  76. }
  77. type ServiceWithError interface {
  78. suture.Service
  79. fmt.Stringer
  80. Error() error
  81. }
  82. // AsService wraps the given function to implement suture.Service. In addition
  83. // it keeps track of the returned error and allows querying that error.
  84. func AsService(fn func(ctx context.Context) error, creator string) ServiceWithError {
  85. return &service{
  86. creator: creator,
  87. serve: fn,
  88. mut: sync.NewMutex(),
  89. }
  90. }
  91. type service struct {
  92. creator string
  93. serve func(ctx context.Context) error
  94. err error
  95. mut sync.Mutex
  96. }
  97. func (s *service) Serve(ctx context.Context) error {
  98. s.mut.Lock()
  99. s.err = nil
  100. s.mut.Unlock()
  101. // The error returned by serve() may well be a network timeout, which as
  102. // of Go 1.19 is a context.DeadlineExceeded, which Suture interprets as
  103. // a signal to stop the service instead of restarting it. This typically
  104. // isn't what we want, so we make sure to remove the context specific
  105. // error types unless *our* context is actually cancelled.
  106. err := asNonContextError(ctx, s.serve(ctx))
  107. s.mut.Lock()
  108. s.err = err
  109. s.mut.Unlock()
  110. return err
  111. }
  112. func (s *service) Error() error {
  113. s.mut.Lock()
  114. defer s.mut.Unlock()
  115. return s.err
  116. }
  117. func (s *service) String() string {
  118. return fmt.Sprintf("Service@%p created by %v", s, s.creator)
  119. }
  120. type doneService func()
  121. func (fn doneService) Serve(ctx context.Context) error {
  122. <-ctx.Done()
  123. fn()
  124. return nil
  125. }
  126. // OnSupervisorDone calls fn when sup is done.
  127. func OnSupervisorDone(sup *suture.Supervisor, fn func()) {
  128. sup.Add(doneService(fn))
  129. }
  130. func SpecWithDebugLogger(l logger.Logger) suture.Spec {
  131. return spec(func(e suture.Event) { l.Debugln(e) })
  132. }
  133. func SpecWithInfoLogger(l logger.Logger) suture.Spec {
  134. return spec(infoEventHook(l))
  135. }
  136. func spec(eventHook suture.EventHook) suture.Spec {
  137. return suture.Spec{
  138. EventHook: eventHook,
  139. Timeout: ServiceTimeout,
  140. PassThroughPanics: true,
  141. DontPropagateTermination: false,
  142. }
  143. }
  144. // infoEventHook prints service failures and failures to stop services at level
  145. // info. All other events and identical, consecutive failures are logged at
  146. // debug only.
  147. func infoEventHook(l logger.Logger) suture.EventHook {
  148. var prevTerminate suture.EventServiceTerminate
  149. return func(ei suture.Event) {
  150. switch e := ei.(type) {
  151. case suture.EventStopTimeout:
  152. l.Infof("%s: Service %s failed to terminate in a timely manner", e.SupervisorName, e.ServiceName)
  153. case suture.EventServicePanic:
  154. l.Warnln("Caught a service panic, which shouldn't happen")
  155. l.Infoln(e)
  156. case suture.EventServiceTerminate:
  157. msg := fmt.Sprintf("%s: service %s failed: %s", e.SupervisorName, e.ServiceName, e.Err)
  158. if e.ServiceName == prevTerminate.ServiceName && e.Err == prevTerminate.Err {
  159. l.Debugln(msg)
  160. } else {
  161. l.Infoln(msg)
  162. }
  163. prevTerminate = e
  164. l.Debugln(e) // Contains some backoff statistics
  165. case suture.EventBackoff:
  166. l.Debugf("%s: exiting the backoff state.", e.SupervisorName)
  167. case suture.EventResume:
  168. l.Debugf("%s: too many service failures - entering the backoff state.", e.SupervisorName)
  169. default:
  170. l.Warnln("Unknown suture supervisor event type", e.Type())
  171. l.Infoln(e)
  172. }
  173. }
  174. }
  175. // asNonContextError returns err, except if it is context.Canceled or
  176. // context.DeadlineExceeded in which case the error will be a simple string
  177. // representation instead. The given context is checked for cancellation,
  178. // and if it is cancelled then that error is returned instead of err.
  179. func asNonContextError(ctx context.Context, err error) error {
  180. select {
  181. case <-ctx.Done():
  182. return ctx.Err()
  183. default:
  184. }
  185. if errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) {
  186. return fmt.Errorf("%s (non-context)", err.Error())
  187. }
  188. return err
  189. }
  190. func CallWithContext(ctx context.Context, fn func() error) error {
  191. var err error
  192. done := make(chan struct{})
  193. go func() {
  194. err = fn()
  195. close(done)
  196. }()
  197. select {
  198. case <-done:
  199. return err
  200. case <-ctx.Done():
  201. return ctx.Err()
  202. }
  203. }