pthread_stop_world.c 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581
  1. #include "private/pthread_support.h"
  2. #if defined(GC_PTHREADS) && !defined(GC_SOLARIS_THREADS) \
  3. && !defined(GC_WIN32_THREADS) && !defined(GC_DARWIN_THREADS)
  4. #include <signal.h>
  5. #include <semaphore.h>
  6. #include <errno.h>
  7. #include <unistd.h>
  8. #include <sys/time.h>
  9. #ifndef HPUX
  10. # include <sys/select.h>
  11. /* Doesn't exist on HP/UX 11.11. */
  12. #endif
  13. void suspend_self();
  14. #if DEBUG_THREADS
  15. #ifndef NSIG
  16. # if defined(MAXSIG)
  17. # define NSIG (MAXSIG+1)
  18. # elif defined(_NSIG)
  19. # define NSIG _NSIG
  20. # elif defined(__SIGRTMAX)
  21. # define NSIG (__SIGRTMAX+1)
  22. # else
  23. --> please fix it
  24. # endif
  25. #endif
  26. void GC_print_sig_mask()
  27. {
  28. sigset_t blocked;
  29. int i;
  30. if (pthread_sigmask(SIG_BLOCK, NULL, &blocked) != 0)
  31. ABORT("pthread_sigmask");
  32. GC_printf0("Blocked: ");
  33. for (i = 1; i < NSIG; i++) {
  34. if (sigismember(&blocked, i)) { GC_printf1("%ld ",(long) i); }
  35. }
  36. GC_printf0("\n");
  37. }
  38. #endif
  39. /* Remove the signals that we want to allow in thread stopping */
  40. /* handler from a set. */
  41. void GC_remove_allowed_signals(sigset_t *set)
  42. {
  43. # ifdef NO_SIGNALS
  44. if (sigdelset(set, SIGINT) != 0
  45. || sigdelset(set, SIGQUIT) != 0
  46. || sigdelset(set, SIGABRT) != 0
  47. || sigdelset(set, SIGTERM) != 0) {
  48. ABORT("sigdelset() failed");
  49. }
  50. # endif
  51. # ifdef MPROTECT_VDB
  52. /* Handlers write to the thread structure, which is in the heap, */
  53. /* and hence can trigger a protection fault. */
  54. if (sigdelset(set, SIGSEGV) != 0
  55. # ifdef SIGBUS
  56. || sigdelset(set, SIGBUS) != 0
  57. # endif
  58. ) {
  59. ABORT("sigdelset() failed");
  60. }
  61. # endif
  62. }
  63. static sigset_t suspend_handler_mask;
  64. volatile sig_atomic_t GC_stop_count;
  65. /* Incremented at the beginning of GC_stop_world. */
  66. volatile sig_atomic_t GC_world_is_stopped = FALSE;
  67. /* FALSE ==> it is safe for threads to restart, i.e. */
  68. /* they will see another suspend signal before they */
  69. /* are expected to stop (unless they have voluntarily */
  70. /* stopped). */
  71. void GC_brief_async_signal_safe_sleep()
  72. {
  73. struct timeval tv;
  74. tv.tv_sec = 0;
  75. tv.tv_usec = 1000 * TIME_LIMIT / 2;
  76. select(0, 0, 0, 0, &tv);
  77. }
  78. #ifdef GC_OSF1_THREADS
  79. GC_bool GC_retry_signals = TRUE;
  80. #else
  81. GC_bool GC_retry_signals = FALSE;
  82. #endif
  83. /*
  84. * We use signals to stop threads during GC.
  85. *
  86. * Suspended threads wait in signal handler for SIG_THR_RESTART.
  87. * That's more portable than semaphores or condition variables.
  88. * (We do use sem_post from a signal handler, but that should be portable.)
  89. *
  90. * The thread suspension signal SIG_SUSPEND is now defined in gc_priv.h.
  91. * Note that we can't just stop a thread; we need it to save its stack
  92. * pointer(s) and acknowledge.
  93. */
  94. #ifndef SIG_THR_RESTART
  95. # if defined(GC_HPUX_THREADS) || defined(GC_OSF1_THREADS)
  96. # ifdef _SIGRTMIN
  97. # define SIG_THR_RESTART _SIGRTMIN + 5
  98. # else
  99. # define SIG_THR_RESTART SIGRTMIN + 5
  100. # endif
  101. # else
  102. # define SIG_THR_RESTART SIGXCPU
  103. # endif
  104. #endif
  105. sem_t GC_suspend_ack_sem;
  106. void GC_suspend_handler_inner(ptr_t sig_arg);
  107. #if defined(IA64) || defined(HP_PA) || defined(M68K)
  108. extern void GC_with_callee_saves_pushed();
  109. void GC_suspend_handler(int sig)
  110. {
  111. GC_thread me = GC_lookup_thread (pthread_self());
  112. if (me -> flags & SUSPENDED)
  113. suspend_self();
  114. else {
  115. int old_errno = errno;
  116. GC_with_callee_saves_pushed(GC_suspend_handler_inner, (ptr_t)(word)sig);
  117. errno = old_errno;
  118. }
  119. }
  120. #else
  121. /* We believe that in all other cases the full context is already */
  122. /* in the signal handler frame. */
  123. void GC_suspend_handler(int sig)
  124. {
  125. GC_thread me = GC_lookup_thread(pthread_self());
  126. if (me -> flags & SUSPENDED)
  127. suspend_self();
  128. else {
  129. int old_errno = errno;
  130. GC_suspend_handler_inner((ptr_t)(word)sig);
  131. errno = old_errno;
  132. }
  133. }
  134. #endif
  135. void GC_suspend_handler_inner(ptr_t sig_arg)
  136. {
  137. int sig = (int)(word)sig_arg;
  138. int dummy;
  139. pthread_t my_thread = pthread_self();
  140. GC_thread me;
  141. # ifdef PARALLEL_MARK
  142. word my_mark_no = GC_mark_no;
  143. /* Marker can't proceed until we acknowledge. Thus this is */
  144. /* guaranteed to be the mark_no correspending to our */
  145. /* suspension, i.e. the marker can't have incremented it yet. */
  146. # endif
  147. word my_stop_count = GC_stop_count;
  148. if (sig != SIG_SUSPEND) ABORT("Bad signal in suspend_handler");
  149. #if DEBUG_THREADS
  150. GC_printf1("Suspending 0x%lx\n", my_thread);
  151. #endif
  152. me = GC_lookup_thread(my_thread);
  153. /* The lookup here is safe, since I'm doing this on behalf */
  154. /* of a thread which holds the allocation lock in order */
  155. /* to stop the world. Thus concurrent modification of the */
  156. /* data structure is impossible. */
  157. if (me -> stop_info.last_stop_count == my_stop_count) {
  158. /* Duplicate signal. OK if we are retrying. */
  159. if (!GC_retry_signals) {
  160. WARN("Duplicate suspend signal in thread %lx\n",
  161. pthread_self());
  162. }
  163. return;
  164. }
  165. # ifdef SPARC
  166. me -> stop_info.stack_ptr = (ptr_t)GC_save_regs_in_stack();
  167. # else
  168. me -> stop_info.stack_ptr = (ptr_t)(&dummy);
  169. # endif
  170. # ifdef IA64
  171. me -> backing_store_ptr = (ptr_t)GC_save_regs_in_stack();
  172. # endif
  173. /* Tell the thread that wants to stop the world that this */
  174. /* thread has been stopped. Note that sem_post() is */
  175. /* the only async-signal-safe primitive in LinuxThreads. */
  176. sem_post(&GC_suspend_ack_sem);
  177. me -> stop_info.last_stop_count = my_stop_count;
  178. /* Wait until that thread tells us to restart by sending */
  179. /* this thread a SIG_THR_RESTART signal. */
  180. /* SIG_THR_RESTART should be masked at this point. Thus there */
  181. /* is no race. */
  182. /* We do not continue until we receive a SIG_THR_RESTART, */
  183. /* but we do not take that as authoritative. (We may be */
  184. /* accidentally restarted by one of the user signals we */
  185. /* don't block.) After we receive the signal, we use a */
  186. /* primitive and expensive mechanism to wait until it's */
  187. /* really safe to proceed. Under normal circumstances, */
  188. /* this code should not be executed. */
  189. sigsuspend(&suspend_handler_mask); /* Wait for signal */
  190. while (GC_world_is_stopped && GC_stop_count == my_stop_count) {
  191. GC_brief_async_signal_safe_sleep();
  192. # if DEBUG_THREADS
  193. GC_err_printf0("Sleeping in signal handler");
  194. # endif
  195. }
  196. /* If the RESTART signal gets lost, we can still lose. That should be */
  197. /* less likely than losing the SUSPEND signal, since we don't do much */
  198. /* between the sem_post and sigsuspend. */
  199. /* We'd need more handshaking to work around that. */
  200. /* Simply dropping the sigsuspend call should be safe, but is unlikely */
  201. /* to be efficient. */
  202. #if DEBUG_THREADS
  203. GC_printf1("Continuing 0x%lx\n", my_thread);
  204. #endif
  205. }
  206. void GC_restart_handler(int sig)
  207. {
  208. pthread_t my_thread = pthread_self();
  209. if (sig != SIG_THR_RESTART) ABORT("Bad signal in suspend_handler");
  210. /*
  211. ** Note: even if we don't do anything useful here,
  212. ** it would still be necessary to have a signal handler,
  213. ** rather than ignoring the signals, otherwise
  214. ** the signals will not be delivered at all, and
  215. ** will thus not interrupt the sigsuspend() above.
  216. */
  217. #if DEBUG_THREADS
  218. GC_printf1("In GC_restart_handler for 0x%lx\n", pthread_self());
  219. #endif
  220. }
  221. # ifdef IA64
  222. # define IF_IA64(x) x
  223. # else
  224. # define IF_IA64(x)
  225. # endif
  226. /* We hold allocation lock. Should do exactly the right thing if the */
  227. /* world is stopped. Should not fail if it isn't. */
  228. void GC_push_all_stacks()
  229. {
  230. GC_bool found_me = FALSE;
  231. int i;
  232. GC_thread p;
  233. ptr_t lo, hi;
  234. /* On IA64, we also need to scan the register backing store. */
  235. IF_IA64(ptr_t bs_lo; ptr_t bs_hi;)
  236. pthread_t me = pthread_self();
  237. if (!GC_thr_initialized) GC_thr_init();
  238. #if DEBUG_THREADS
  239. GC_printf1("Pushing stacks from thread 0x%lx\n", (unsigned long) me);
  240. #endif
  241. for (i = 0; i < THREAD_TABLE_SZ; i++) {
  242. for (p = GC_threads[i]; p != 0; p = p -> next) {
  243. if (p -> flags & FINISHED) continue;
  244. if (pthread_equal(p -> id, me)) {
  245. # ifdef SPARC
  246. lo = (ptr_t)GC_save_regs_in_stack();
  247. # else
  248. lo = GC_approx_sp();
  249. # endif
  250. found_me = TRUE;
  251. IF_IA64(bs_hi = (ptr_t)GC_save_regs_in_stack();)
  252. } else {
  253. lo = p -> stop_info.stack_ptr;
  254. IF_IA64(bs_hi = p -> backing_store_ptr;)
  255. }
  256. if ((p -> flags & MAIN_THREAD) == 0) {
  257. hi = p -> stack_end;
  258. IF_IA64(bs_lo = p -> backing_store_end);
  259. } else {
  260. /* The original stack. */
  261. hi = GC_stackbottom;
  262. IF_IA64(bs_lo = BACKING_STORE_BASE;)
  263. }
  264. #if DEBUG_THREADS
  265. GC_printf3("Stack for thread 0x%lx = [%lx,%lx)\n",
  266. (unsigned long) p -> id,
  267. (unsigned long) lo, (unsigned long) hi);
  268. #endif
  269. if (0 == lo) ABORT("GC_push_all_stacks: sp not set!\n");
  270. # ifdef STACK_GROWS_UP
  271. /* We got them backwards! */
  272. GC_push_all_stack(hi, lo);
  273. # else
  274. GC_push_all_stack(lo, hi);
  275. # endif
  276. # ifdef IA64
  277. # if DEBUG_THREADS
  278. GC_printf3("Reg stack for thread 0x%lx = [%lx,%lx)\n",
  279. (unsigned long) p -> id,
  280. (unsigned long) bs_lo, (unsigned long) bs_hi);
  281. # endif
  282. if (pthread_equal(p -> id, me)) {
  283. GC_push_all_eager(bs_lo, bs_hi);
  284. } else {
  285. GC_push_all_stack(bs_lo, bs_hi);
  286. }
  287. # endif
  288. }
  289. }
  290. if (!found_me && !GC_in_thread_creation)
  291. ABORT("Collecting from unknown thread.");
  292. }
  293. /* There seems to be a very rare thread stopping problem. To help us */
  294. /* debug that, we save the ids of the stopping thread. */
  295. pthread_t GC_stopping_thread;
  296. int GC_stopping_pid;
  297. /* We hold the allocation lock. Suspend all threads that might */
  298. /* still be running. Return the number of suspend signals that */
  299. /* were sent. */
  300. int GC_suspend_all()
  301. {
  302. int n_live_threads = 0;
  303. int i;
  304. GC_thread p;
  305. int result;
  306. pthread_t my_thread = pthread_self();
  307. GC_stopping_thread = my_thread; /* debugging only. */
  308. GC_stopping_pid = getpid(); /* debugging only. */
  309. for (i = 0; i < THREAD_TABLE_SZ; i++) {
  310. for (p = GC_threads[i]; p != 0; p = p -> next) {
  311. if (p -> id != my_thread) {
  312. if (p -> flags & FINISHED) continue;
  313. if (p -> stop_info.last_stop_count == GC_stop_count) continue;
  314. if (p -> thread_blocked) /* Will wait */ continue;
  315. n_live_threads++;
  316. #if DEBUG_THREADS
  317. GC_printf1("Sending suspend signal to 0x%lx\n", p -> id);
  318. #endif
  319. result = pthread_kill(p -> id, SIG_SUSPEND);
  320. switch(result) {
  321. case ESRCH:
  322. /* Not really there anymore. Possible? */
  323. n_live_threads--;
  324. break;
  325. case 0:
  326. break;
  327. default:
  328. ABORT("pthread_kill failed");
  329. }
  330. }
  331. }
  332. }
  333. return n_live_threads;
  334. }
  335. /* Caller holds allocation lock. */
  336. void GC_stop_world()
  337. {
  338. int i;
  339. int n_live_threads;
  340. int code;
  341. #if DEBUG_THREADS
  342. GC_printf1("Stopping the world from 0x%lx\n", pthread_self());
  343. #endif
  344. /* Make sure all free list construction has stopped before we start. */
  345. /* No new construction can start, since free list construction is */
  346. /* required to acquire and release the GC lock before it starts, */
  347. /* and we have the lock. */
  348. # ifdef PARALLEL_MARK
  349. GC_acquire_mark_lock();
  350. GC_ASSERT(GC_fl_builder_count == 0);
  351. /* We should have previously waited for it to become zero. */
  352. # endif /* PARALLEL_MARK */
  353. ++GC_stop_count;
  354. GC_world_is_stopped = TRUE;
  355. n_live_threads = GC_suspend_all();
  356. if (GC_retry_signals) {
  357. unsigned long wait_usecs = 0; /* Total wait since retry. */
  358. # define WAIT_UNIT 3000
  359. # define RETRY_INTERVAL 100000
  360. for (;;) {
  361. int ack_count;
  362. sem_getvalue(&GC_suspend_ack_sem, &ack_count);
  363. if (ack_count == n_live_threads) break;
  364. if (wait_usecs > RETRY_INTERVAL) {
  365. int newly_sent = GC_suspend_all();
  366. # ifdef CONDPRINT
  367. if (GC_print_stats) {
  368. GC_printf1("Resent %ld signals after timeout\n",
  369. newly_sent);
  370. }
  371. # endif
  372. sem_getvalue(&GC_suspend_ack_sem, &ack_count);
  373. if (newly_sent < n_live_threads - ack_count) {
  374. WARN("Lost some threads during GC_stop_world?!\n",0);
  375. n_live_threads = ack_count + newly_sent;
  376. }
  377. wait_usecs = 0;
  378. }
  379. usleep(WAIT_UNIT);
  380. wait_usecs += WAIT_UNIT;
  381. }
  382. }
  383. for (i = 0; i < n_live_threads; i++) {
  384. while (0 != (code = sem_wait(&GC_suspend_ack_sem))) {
  385. if (errno != EINTR) {
  386. GC_err_printf1("Sem_wait returned %ld\n", (unsigned long)code);
  387. ABORT("sem_wait for handler failed");
  388. }
  389. }
  390. }
  391. # ifdef PARALLEL_MARK
  392. GC_release_mark_lock();
  393. # endif
  394. #if DEBUG_THREADS
  395. GC_printf1("World stopped from 0x%lx\n", pthread_self());
  396. #endif
  397. GC_stopping_thread = 0; /* debugging only */
  398. }
  399. void suspend_self() {
  400. GC_thread me = GC_lookup_thread(pthread_self());
  401. if (me == NULL)
  402. ABORT("attempting to suspend unknown thread");
  403. me -> flags |= SUSPENDED;
  404. GC_start_blocking();
  405. while (me -> flags & SUSPENDED)
  406. GC_brief_async_signal_safe_sleep();
  407. GC_end_blocking();
  408. }
  409. void GC_suspend_thread(pthread_t thread) {
  410. if (thread == pthread_self())
  411. suspend_self();
  412. else {
  413. int result;
  414. GC_thread t = GC_lookup_thread(thread);
  415. if (t == NULL)
  416. ABORT("attempting to suspend unknown thread");
  417. t -> flags |= SUSPENDED;
  418. result = pthread_kill (t -> id, SIG_SUSPEND);
  419. switch (result) {
  420. case ESRCH:
  421. case 0:
  422. break;
  423. default:
  424. ABORT("pthread_kill failed");
  425. }
  426. }
  427. }
  428. void GC_resume_thread(pthread_t thread) {
  429. GC_thread t = GC_lookup_thread(thread);
  430. if (t == NULL)
  431. ABORT("attempting to resume unknown thread");
  432. t -> flags &= ~SUSPENDED;
  433. }
  434. int GC_is_thread_suspended(pthread_t thread) {
  435. GC_thread t = GC_lookup_thread(thread);
  436. if (t == NULL)
  437. ABORT("querying suspension state of unknown thread");
  438. return (t -> flags & SUSPENDED);
  439. }
  440. /* Caller holds allocation lock, and has held it continuously since */
  441. /* the world stopped. */
  442. void GC_start_world()
  443. {
  444. pthread_t my_thread = pthread_self();
  445. register int i;
  446. register GC_thread p;
  447. register int n_live_threads = 0;
  448. register int result;
  449. # if DEBUG_THREADS
  450. GC_printf0("World starting\n");
  451. # endif
  452. GC_world_is_stopped = FALSE;
  453. for (i = 0; i < THREAD_TABLE_SZ; i++) {
  454. for (p = GC_threads[i]; p != 0; p = p -> next) {
  455. if (p -> id != my_thread) {
  456. if (p -> flags & FINISHED) continue;
  457. if (p -> thread_blocked) continue;
  458. n_live_threads++;
  459. #if DEBUG_THREADS
  460. GC_printf1("Sending restart signal to 0x%lx\n", p -> id);
  461. #endif
  462. result = pthread_kill(p -> id, SIG_THR_RESTART);
  463. switch(result) {
  464. case ESRCH:
  465. /* Not really there anymore. Possible? */
  466. n_live_threads--;
  467. break;
  468. case 0:
  469. break;
  470. default:
  471. ABORT("pthread_kill failed");
  472. }
  473. }
  474. }
  475. }
  476. #if DEBUG_THREADS
  477. GC_printf0("World started\n");
  478. #endif
  479. }
  480. void GC_stop_init() {
  481. struct sigaction act;
  482. if (sem_init(&GC_suspend_ack_sem, 0, 0) != 0)
  483. ABORT("sem_init failed");
  484. act.sa_flags = SA_RESTART;
  485. if (sigfillset(&act.sa_mask) != 0) {
  486. ABORT("sigfillset() failed");
  487. }
  488. GC_remove_allowed_signals(&act.sa_mask);
  489. /* SIG_THR_RESTART is set in the resulting mask. */
  490. /* It is unmasked by the handler when necessary. */
  491. act.sa_handler = GC_suspend_handler;
  492. if (sigaction(SIG_SUSPEND, &act, NULL) != 0) {
  493. ABORT("Cannot set SIG_SUSPEND handler");
  494. }
  495. act.sa_handler = GC_restart_handler;
  496. if (sigaction(SIG_THR_RESTART, &act, NULL) != 0) {
  497. ABORT("Cannot set SIG_THR_RESTART handler");
  498. }
  499. /* Inititialize suspend_handler_mask. It excludes SIG_THR_RESTART. */
  500. if (sigfillset(&suspend_handler_mask) != 0) ABORT("sigfillset() failed");
  501. GC_remove_allowed_signals(&suspend_handler_mask);
  502. if (sigdelset(&suspend_handler_mask, SIG_THR_RESTART) != 0)
  503. ABORT("sigdelset() failed");
  504. /* Check for GC_RETRY_SIGNALS. */
  505. if (0 != GETENV("GC_RETRY_SIGNALS")) {
  506. GC_retry_signals = TRUE;
  507. }
  508. if (0 != GETENV("GC_NO_RETRY_SIGNALS")) {
  509. GC_retry_signals = FALSE;
  510. }
  511. # ifdef CONDPRINT
  512. if (GC_print_stats && GC_retry_signals) {
  513. GC_printf0("Will retry suspend signal if necessary.\n");
  514. }
  515. # endif
  516. }
  517. #endif