team.c 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949
  1. /* Copyright (C) 2005-2015 Free Software Foundation, Inc.
  2. Contributed by Richard Henderson <rth@redhat.com>.
  3. This file is part of the GNU Offloading and Multi Processing Library
  4. (libgomp).
  5. Libgomp is free software; you can redistribute it and/or modify it
  6. under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 3, or (at your option)
  8. any later version.
  9. Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
  10. WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  11. FOR A PARTICULAR PURPOSE. See the GNU General Public License for
  12. more details.
  13. Under Section 7 of GPL version 3, you are granted additional
  14. permissions described in the GCC Runtime Library Exception, version
  15. 3.1, as published by the Free Software Foundation.
  16. You should have received a copy of the GNU General Public License and
  17. a copy of the GCC Runtime Library Exception along with this program;
  18. see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
  19. <http://www.gnu.org/licenses/>. */
  20. /* This file handles the maintainence of threads in response to team
  21. creation and termination. */
  22. #include "libgomp.h"
  23. #include <stdlib.h>
  24. #include <string.h>
  25. /* This attribute contains PTHREAD_CREATE_DETACHED. */
  26. pthread_attr_t gomp_thread_attr;
  27. /* This key is for the thread destructor. */
  28. pthread_key_t gomp_thread_destructor;
  29. /* This is the libgomp per-thread data structure. */
  30. #if defined HAVE_TLS || defined USE_EMUTLS
  31. __thread struct gomp_thread gomp_tls_data;
  32. #else
  33. pthread_key_t gomp_tls_key;
  34. #endif
  35. /* This structure is used to communicate across pthread_create. */
  36. struct gomp_thread_start_data
  37. {
  38. void (*fn) (void *);
  39. void *fn_data;
  40. struct gomp_team_state ts;
  41. struct gomp_task *task;
  42. struct gomp_thread_pool *thread_pool;
  43. unsigned int place;
  44. bool nested;
  45. };
  46. /* This function is a pthread_create entry point. This contains the idle
  47. loop in which a thread waits to be called up to become part of a team. */
  48. static void *
  49. gomp_thread_start (void *xdata)
  50. {
  51. struct gomp_thread_start_data *data = xdata;
  52. struct gomp_thread *thr;
  53. struct gomp_thread_pool *pool;
  54. void (*local_fn) (void *);
  55. void *local_data;
  56. #if defined HAVE_TLS || defined USE_EMUTLS
  57. thr = &gomp_tls_data;
  58. #else
  59. struct gomp_thread local_thr;
  60. thr = &local_thr;
  61. pthread_setspecific (gomp_tls_key, thr);
  62. #endif
  63. gomp_sem_init (&thr->release, 0);
  64. /* Extract what we need from data. */
  65. local_fn = data->fn;
  66. local_data = data->fn_data;
  67. thr->thread_pool = data->thread_pool;
  68. thr->ts = data->ts;
  69. thr->task = data->task;
  70. thr->place = data->place;
  71. thr->ts.team->ordered_release[thr->ts.team_id] = &thr->release;
  72. /* Make thread pool local. */
  73. pool = thr->thread_pool;
  74. if (data->nested)
  75. {
  76. struct gomp_team *team = thr->ts.team;
  77. struct gomp_task *task = thr->task;
  78. gomp_barrier_wait (&team->barrier);
  79. local_fn (local_data);
  80. gomp_team_barrier_wait_final (&team->barrier);
  81. gomp_finish_task (task);
  82. gomp_barrier_wait_last (&team->barrier);
  83. }
  84. else
  85. {
  86. pool->threads[thr->ts.team_id] = thr;
  87. gomp_barrier_wait (&pool->threads_dock);
  88. do
  89. {
  90. struct gomp_team *team = thr->ts.team;
  91. struct gomp_task *task = thr->task;
  92. local_fn (local_data);
  93. gomp_team_barrier_wait_final (&team->barrier);
  94. gomp_finish_task (task);
  95. gomp_barrier_wait (&pool->threads_dock);
  96. local_fn = thr->fn;
  97. local_data = thr->data;
  98. thr->fn = NULL;
  99. }
  100. while (local_fn);
  101. }
  102. gomp_sem_destroy (&thr->release);
  103. thr->thread_pool = NULL;
  104. thr->task = NULL;
  105. return NULL;
  106. }
  107. /* Create a new team data structure. */
  108. struct gomp_team *
  109. gomp_new_team (unsigned nthreads)
  110. {
  111. struct gomp_team *team;
  112. size_t size;
  113. int i;
  114. size = sizeof (*team) + nthreads * (sizeof (team->ordered_release[0])
  115. + sizeof (team->implicit_task[0]));
  116. team = gomp_malloc (size);
  117. team->work_share_chunk = 8;
  118. #ifdef HAVE_SYNC_BUILTINS
  119. team->single_count = 0;
  120. #else
  121. gomp_mutex_init (&team->work_share_list_free_lock);
  122. #endif
  123. team->work_shares_to_free = &team->work_shares[0];
  124. gomp_init_work_share (&team->work_shares[0], false, nthreads);
  125. team->work_shares[0].next_alloc = NULL;
  126. team->work_share_list_free = NULL;
  127. team->work_share_list_alloc = &team->work_shares[1];
  128. for (i = 1; i < 7; i++)
  129. team->work_shares[i].next_free = &team->work_shares[i + 1];
  130. team->work_shares[i].next_free = NULL;
  131. team->nthreads = nthreads;
  132. gomp_barrier_init (&team->barrier, nthreads);
  133. gomp_sem_init (&team->master_release, 0);
  134. team->ordered_release = (void *) &team->implicit_task[nthreads];
  135. team->ordered_release[0] = &team->master_release;
  136. gomp_mutex_init (&team->task_lock);
  137. team->task_queue = NULL;
  138. team->task_count = 0;
  139. team->task_queued_count = 0;
  140. team->task_running_count = 0;
  141. team->work_share_cancelled = 0;
  142. team->team_cancelled = 0;
  143. return team;
  144. }
  145. /* Free a team data structure. */
  146. static void
  147. free_team (struct gomp_team *team)
  148. {
  149. gomp_barrier_destroy (&team->barrier);
  150. gomp_mutex_destroy (&team->task_lock);
  151. free (team);
  152. }
  153. /* Allocate and initialize a thread pool. */
  154. static struct gomp_thread_pool *gomp_new_thread_pool (void)
  155. {
  156. struct gomp_thread_pool *pool
  157. = gomp_malloc (sizeof(struct gomp_thread_pool));
  158. pool->threads = NULL;
  159. pool->threads_size = 0;
  160. pool->threads_used = 0;
  161. pool->last_team = NULL;
  162. return pool;
  163. }
  164. static void
  165. gomp_free_pool_helper (void *thread_pool)
  166. {
  167. struct gomp_thread *thr = gomp_thread ();
  168. struct gomp_thread_pool *pool
  169. = (struct gomp_thread_pool *) thread_pool;
  170. gomp_barrier_wait_last (&pool->threads_dock);
  171. gomp_sem_destroy (&thr->release);
  172. thr->thread_pool = NULL;
  173. thr->task = NULL;
  174. pthread_exit (NULL);
  175. }
  176. /* Free a thread pool and release its threads. */
  177. void
  178. gomp_free_thread (void *arg __attribute__((unused)))
  179. {
  180. struct gomp_thread *thr = gomp_thread ();
  181. struct gomp_thread_pool *pool = thr->thread_pool;
  182. if (pool)
  183. {
  184. if (pool->threads_used > 0)
  185. {
  186. int i;
  187. for (i = 1; i < pool->threads_used; i++)
  188. {
  189. struct gomp_thread *nthr = pool->threads[i];
  190. nthr->fn = gomp_free_pool_helper;
  191. nthr->data = pool;
  192. }
  193. /* This barrier undocks threads docked on pool->threads_dock. */
  194. gomp_barrier_wait (&pool->threads_dock);
  195. /* And this waits till all threads have called gomp_barrier_wait_last
  196. in gomp_free_pool_helper. */
  197. gomp_barrier_wait (&pool->threads_dock);
  198. /* Now it is safe to destroy the barrier and free the pool. */
  199. gomp_barrier_destroy (&pool->threads_dock);
  200. #ifdef HAVE_SYNC_BUILTINS
  201. __sync_fetch_and_add (&gomp_managed_threads,
  202. 1L - pool->threads_used);
  203. #else
  204. gomp_mutex_lock (&gomp_managed_threads_lock);
  205. gomp_managed_threads -= pool->threads_used - 1L;
  206. gomp_mutex_unlock (&gomp_managed_threads_lock);
  207. #endif
  208. }
  209. free (pool->threads);
  210. if (pool->last_team)
  211. free_team (pool->last_team);
  212. free (pool);
  213. thr->thread_pool = NULL;
  214. }
  215. if (thr->task != NULL)
  216. {
  217. struct gomp_task *task = thr->task;
  218. gomp_end_task ();
  219. free (task);
  220. }
  221. }
  222. /* Launch a team. */
  223. void
  224. gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads,
  225. unsigned flags, struct gomp_team *team)
  226. {
  227. struct gomp_thread_start_data *start_data;
  228. struct gomp_thread *thr, *nthr;
  229. struct gomp_task *task;
  230. struct gomp_task_icv *icv;
  231. bool nested;
  232. struct gomp_thread_pool *pool;
  233. unsigned i, n, old_threads_used = 0;
  234. pthread_attr_t thread_attr, *attr;
  235. unsigned long nthreads_var;
  236. char bind, bind_var;
  237. unsigned int s = 0, rest = 0, p = 0, k = 0;
  238. unsigned int affinity_count = 0;
  239. struct gomp_thread **affinity_thr = NULL;
  240. thr = gomp_thread ();
  241. nested = thr->ts.team != NULL;
  242. if (__builtin_expect (thr->thread_pool == NULL, 0))
  243. {
  244. thr->thread_pool = gomp_new_thread_pool ();
  245. thr->thread_pool->threads_busy = nthreads;
  246. pthread_setspecific (gomp_thread_destructor, thr);
  247. }
  248. pool = thr->thread_pool;
  249. task = thr->task;
  250. icv = task ? &task->icv : &gomp_global_icv;
  251. if (__builtin_expect (gomp_places_list != NULL, 0) && thr->place == 0)
  252. gomp_init_affinity ();
  253. /* Always save the previous state, even if this isn't a nested team.
  254. In particular, we should save any work share state from an outer
  255. orphaned work share construct. */
  256. team->prev_ts = thr->ts;
  257. thr->ts.team = team;
  258. thr->ts.team_id = 0;
  259. ++thr->ts.level;
  260. if (nthreads > 1)
  261. ++thr->ts.active_level;
  262. thr->ts.work_share = &team->work_shares[0];
  263. thr->ts.last_work_share = NULL;
  264. #ifdef HAVE_SYNC_BUILTINS
  265. thr->ts.single_count = 0;
  266. #endif
  267. thr->ts.static_trip = 0;
  268. thr->task = &team->implicit_task[0];
  269. nthreads_var = icv->nthreads_var;
  270. if (__builtin_expect (gomp_nthreads_var_list != NULL, 0)
  271. && thr->ts.level < gomp_nthreads_var_list_len)
  272. nthreads_var = gomp_nthreads_var_list[thr->ts.level];
  273. bind_var = icv->bind_var;
  274. if (bind_var != omp_proc_bind_false && (flags & 7) != omp_proc_bind_false)
  275. bind_var = flags & 7;
  276. bind = bind_var;
  277. if (__builtin_expect (gomp_bind_var_list != NULL, 0)
  278. && thr->ts.level < gomp_bind_var_list_len)
  279. bind_var = gomp_bind_var_list[thr->ts.level];
  280. gomp_init_task (thr->task, task, icv);
  281. team->implicit_task[0].icv.nthreads_var = nthreads_var;
  282. team->implicit_task[0].icv.bind_var = bind_var;
  283. if (nthreads == 1)
  284. return;
  285. i = 1;
  286. if (__builtin_expect (gomp_places_list != NULL, 0))
  287. {
  288. /* Depending on chosen proc_bind model, set subpartition
  289. for the master thread and initialize helper variables
  290. P and optionally S, K and/or REST used by later place
  291. computation for each additional thread. */
  292. p = thr->place - 1;
  293. switch (bind)
  294. {
  295. case omp_proc_bind_true:
  296. case omp_proc_bind_close:
  297. if (nthreads > thr->ts.place_partition_len)
  298. {
  299. /* T > P. S threads will be placed in each place,
  300. and the final REM threads placed one by one
  301. into the already occupied places. */
  302. s = nthreads / thr->ts.place_partition_len;
  303. rest = nthreads % thr->ts.place_partition_len;
  304. }
  305. else
  306. s = 1;
  307. k = 1;
  308. break;
  309. case omp_proc_bind_master:
  310. /* Each thread will be bound to master's place. */
  311. break;
  312. case omp_proc_bind_spread:
  313. if (nthreads <= thr->ts.place_partition_len)
  314. {
  315. /* T <= P. Each subpartition will have in between s
  316. and s+1 places (subpartitions starting at or
  317. after rest will have s places, earlier s+1 places),
  318. each thread will be bound to the first place in
  319. its subpartition (except for the master thread
  320. that can be bound to another place in its
  321. subpartition). */
  322. s = thr->ts.place_partition_len / nthreads;
  323. rest = thr->ts.place_partition_len % nthreads;
  324. rest = (s + 1) * rest + thr->ts.place_partition_off;
  325. if (p < rest)
  326. {
  327. p -= (p - thr->ts.place_partition_off) % (s + 1);
  328. thr->ts.place_partition_len = s + 1;
  329. }
  330. else
  331. {
  332. p -= (p - rest) % s;
  333. thr->ts.place_partition_len = s;
  334. }
  335. thr->ts.place_partition_off = p;
  336. }
  337. else
  338. {
  339. /* T > P. Each subpartition will have just a single
  340. place and we'll place between s and s+1
  341. threads into each subpartition. */
  342. s = nthreads / thr->ts.place_partition_len;
  343. rest = nthreads % thr->ts.place_partition_len;
  344. thr->ts.place_partition_off = p;
  345. thr->ts.place_partition_len = 1;
  346. k = 1;
  347. }
  348. break;
  349. }
  350. }
  351. else
  352. bind = omp_proc_bind_false;
  353. /* We only allow the reuse of idle threads for non-nested PARALLEL
  354. regions. This appears to be implied by the semantics of
  355. threadprivate variables, but perhaps that's reading too much into
  356. things. Certainly it does prevent any locking problems, since
  357. only the initial program thread will modify gomp_threads. */
  358. if (!nested)
  359. {
  360. old_threads_used = pool->threads_used;
  361. if (nthreads <= old_threads_used)
  362. n = nthreads;
  363. else if (old_threads_used == 0)
  364. {
  365. n = 0;
  366. gomp_barrier_init (&pool->threads_dock, nthreads);
  367. }
  368. else
  369. {
  370. n = old_threads_used;
  371. /* Increase the barrier threshold to make sure all new
  372. threads arrive before the team is released. */
  373. gomp_barrier_reinit (&pool->threads_dock, nthreads);
  374. }
  375. /* Not true yet, but soon will be. We're going to release all
  376. threads from the dock, and those that aren't part of the
  377. team will exit. */
  378. pool->threads_used = nthreads;
  379. /* If necessary, expand the size of the gomp_threads array. It is
  380. expected that changes in the number of threads are rare, thus we
  381. make no effort to expand gomp_threads_size geometrically. */
  382. if (nthreads >= pool->threads_size)
  383. {
  384. pool->threads_size = nthreads + 1;
  385. pool->threads
  386. = gomp_realloc (pool->threads,
  387. pool->threads_size
  388. * sizeof (struct gomp_thread_data *));
  389. }
  390. /* Release existing idle threads. */
  391. for (; i < n; ++i)
  392. {
  393. unsigned int place_partition_off = thr->ts.place_partition_off;
  394. unsigned int place_partition_len = thr->ts.place_partition_len;
  395. unsigned int place = 0;
  396. if (__builtin_expect (gomp_places_list != NULL, 0))
  397. {
  398. switch (bind)
  399. {
  400. case omp_proc_bind_true:
  401. case omp_proc_bind_close:
  402. if (k == s)
  403. {
  404. ++p;
  405. if (p == (team->prev_ts.place_partition_off
  406. + team->prev_ts.place_partition_len))
  407. p = team->prev_ts.place_partition_off;
  408. k = 1;
  409. if (i == nthreads - rest)
  410. s = 1;
  411. }
  412. else
  413. ++k;
  414. break;
  415. case omp_proc_bind_master:
  416. break;
  417. case omp_proc_bind_spread:
  418. if (k == 0)
  419. {
  420. /* T <= P. */
  421. if (p < rest)
  422. p += s + 1;
  423. else
  424. p += s;
  425. if (p == (team->prev_ts.place_partition_off
  426. + team->prev_ts.place_partition_len))
  427. p = team->prev_ts.place_partition_off;
  428. place_partition_off = p;
  429. if (p < rest)
  430. place_partition_len = s + 1;
  431. else
  432. place_partition_len = s;
  433. }
  434. else
  435. {
  436. /* T > P. */
  437. if (k == s)
  438. {
  439. ++p;
  440. if (p == (team->prev_ts.place_partition_off
  441. + team->prev_ts.place_partition_len))
  442. p = team->prev_ts.place_partition_off;
  443. k = 1;
  444. if (i == nthreads - rest)
  445. s = 1;
  446. }
  447. else
  448. ++k;
  449. place_partition_off = p;
  450. place_partition_len = 1;
  451. }
  452. break;
  453. }
  454. if (affinity_thr != NULL
  455. || (bind != omp_proc_bind_true
  456. && pool->threads[i]->place != p + 1)
  457. || pool->threads[i]->place <= place_partition_off
  458. || pool->threads[i]->place > (place_partition_off
  459. + place_partition_len))
  460. {
  461. unsigned int l;
  462. if (affinity_thr == NULL)
  463. {
  464. unsigned int j;
  465. if (team->prev_ts.place_partition_len > 64)
  466. affinity_thr
  467. = gomp_malloc (team->prev_ts.place_partition_len
  468. * sizeof (struct gomp_thread *));
  469. else
  470. affinity_thr
  471. = gomp_alloca (team->prev_ts.place_partition_len
  472. * sizeof (struct gomp_thread *));
  473. memset (affinity_thr, '\0',
  474. team->prev_ts.place_partition_len
  475. * sizeof (struct gomp_thread *));
  476. for (j = i; j < old_threads_used; j++)
  477. {
  478. if (pool->threads[j]->place
  479. > team->prev_ts.place_partition_off
  480. && (pool->threads[j]->place
  481. <= (team->prev_ts.place_partition_off
  482. + team->prev_ts.place_partition_len)))
  483. {
  484. l = pool->threads[j]->place - 1
  485. - team->prev_ts.place_partition_off;
  486. pool->threads[j]->data = affinity_thr[l];
  487. affinity_thr[l] = pool->threads[j];
  488. }
  489. pool->threads[j] = NULL;
  490. }
  491. if (nthreads > old_threads_used)
  492. memset (&pool->threads[old_threads_used],
  493. '\0', ((nthreads - old_threads_used)
  494. * sizeof (struct gomp_thread *)));
  495. n = nthreads;
  496. affinity_count = old_threads_used - i;
  497. }
  498. if (affinity_count == 0)
  499. break;
  500. l = p;
  501. if (affinity_thr[l - team->prev_ts.place_partition_off]
  502. == NULL)
  503. {
  504. if (bind != omp_proc_bind_true)
  505. continue;
  506. for (l = place_partition_off;
  507. l < place_partition_off + place_partition_len;
  508. l++)
  509. if (affinity_thr[l - team->prev_ts.place_partition_off]
  510. != NULL)
  511. break;
  512. if (l == place_partition_off + place_partition_len)
  513. continue;
  514. }
  515. nthr = affinity_thr[l - team->prev_ts.place_partition_off];
  516. affinity_thr[l - team->prev_ts.place_partition_off]
  517. = (struct gomp_thread *) nthr->data;
  518. affinity_count--;
  519. pool->threads[i] = nthr;
  520. }
  521. else
  522. nthr = pool->threads[i];
  523. place = p + 1;
  524. }
  525. else
  526. nthr = pool->threads[i];
  527. nthr->ts.team = team;
  528. nthr->ts.work_share = &team->work_shares[0];
  529. nthr->ts.last_work_share = NULL;
  530. nthr->ts.team_id = i;
  531. nthr->ts.level = team->prev_ts.level + 1;
  532. nthr->ts.active_level = thr->ts.active_level;
  533. nthr->ts.place_partition_off = place_partition_off;
  534. nthr->ts.place_partition_len = place_partition_len;
  535. #ifdef HAVE_SYNC_BUILTINS
  536. nthr->ts.single_count = 0;
  537. #endif
  538. nthr->ts.static_trip = 0;
  539. nthr->task = &team->implicit_task[i];
  540. nthr->place = place;
  541. gomp_init_task (nthr->task, task, icv);
  542. team->implicit_task[i].icv.nthreads_var = nthreads_var;
  543. team->implicit_task[i].icv.bind_var = bind_var;
  544. nthr->fn = fn;
  545. nthr->data = data;
  546. team->ordered_release[i] = &nthr->release;
  547. }
  548. if (__builtin_expect (affinity_thr != NULL, 0))
  549. {
  550. /* If AFFINITY_THR is non-NULL just because we had to
  551. permute some threads in the pool, but we've managed
  552. to find exactly as many old threads as we'd find
  553. without affinity, we don't need to handle this
  554. specially anymore. */
  555. if (nthreads <= old_threads_used
  556. ? (affinity_count == old_threads_used - nthreads)
  557. : (i == old_threads_used))
  558. {
  559. if (team->prev_ts.place_partition_len > 64)
  560. free (affinity_thr);
  561. affinity_thr = NULL;
  562. affinity_count = 0;
  563. }
  564. else
  565. {
  566. i = 1;
  567. /* We are going to compute the places/subpartitions
  568. again from the beginning. So, we need to reinitialize
  569. vars modified by the switch (bind) above inside
  570. of the loop, to the state they had after the initial
  571. switch (bind). */
  572. switch (bind)
  573. {
  574. case omp_proc_bind_true:
  575. case omp_proc_bind_close:
  576. if (nthreads > thr->ts.place_partition_len)
  577. /* T > P. S has been changed, so needs
  578. to be recomputed. */
  579. s = nthreads / thr->ts.place_partition_len;
  580. k = 1;
  581. p = thr->place - 1;
  582. break;
  583. case omp_proc_bind_master:
  584. /* No vars have been changed. */
  585. break;
  586. case omp_proc_bind_spread:
  587. p = thr->ts.place_partition_off;
  588. if (k != 0)
  589. {
  590. /* T > P. */
  591. s = nthreads / team->prev_ts.place_partition_len;
  592. k = 1;
  593. }
  594. break;
  595. }
  596. /* Increase the barrier threshold to make sure all new
  597. threads and all the threads we're going to let die
  598. arrive before the team is released. */
  599. if (affinity_count)
  600. gomp_barrier_reinit (&pool->threads_dock,
  601. nthreads + affinity_count);
  602. }
  603. }
  604. if (i == nthreads)
  605. goto do_release;
  606. }
  607. if (__builtin_expect (nthreads + affinity_count > old_threads_used, 0))
  608. {
  609. long diff = (long) (nthreads + affinity_count) - (long) old_threads_used;
  610. if (old_threads_used == 0)
  611. --diff;
  612. #ifdef HAVE_SYNC_BUILTINS
  613. __sync_fetch_and_add (&gomp_managed_threads, diff);
  614. #else
  615. gomp_mutex_lock (&gomp_managed_threads_lock);
  616. gomp_managed_threads += diff;
  617. gomp_mutex_unlock (&gomp_managed_threads_lock);
  618. #endif
  619. }
  620. attr = &gomp_thread_attr;
  621. if (__builtin_expect (gomp_places_list != NULL, 0))
  622. {
  623. size_t stacksize;
  624. pthread_attr_init (&thread_attr);
  625. pthread_attr_setdetachstate (&thread_attr, PTHREAD_CREATE_DETACHED);
  626. if (! pthread_attr_getstacksize (&gomp_thread_attr, &stacksize))
  627. pthread_attr_setstacksize (&thread_attr, stacksize);
  628. attr = &thread_attr;
  629. }
  630. start_data = gomp_alloca (sizeof (struct gomp_thread_start_data)
  631. * (nthreads-i));
  632. /* Launch new threads. */
  633. for (; i < nthreads; ++i)
  634. {
  635. pthread_t pt;
  636. int err;
  637. start_data->ts.place_partition_off = thr->ts.place_partition_off;
  638. start_data->ts.place_partition_len = thr->ts.place_partition_len;
  639. start_data->place = 0;
  640. if (__builtin_expect (gomp_places_list != NULL, 0))
  641. {
  642. switch (bind)
  643. {
  644. case omp_proc_bind_true:
  645. case omp_proc_bind_close:
  646. if (k == s)
  647. {
  648. ++p;
  649. if (p == (team->prev_ts.place_partition_off
  650. + team->prev_ts.place_partition_len))
  651. p = team->prev_ts.place_partition_off;
  652. k = 1;
  653. if (i == nthreads - rest)
  654. s = 1;
  655. }
  656. else
  657. ++k;
  658. break;
  659. case omp_proc_bind_master:
  660. break;
  661. case omp_proc_bind_spread:
  662. if (k == 0)
  663. {
  664. /* T <= P. */
  665. if (p < rest)
  666. p += s + 1;
  667. else
  668. p += s;
  669. if (p == (team->prev_ts.place_partition_off
  670. + team->prev_ts.place_partition_len))
  671. p = team->prev_ts.place_partition_off;
  672. start_data->ts.place_partition_off = p;
  673. if (p < rest)
  674. start_data->ts.place_partition_len = s + 1;
  675. else
  676. start_data->ts.place_partition_len = s;
  677. }
  678. else
  679. {
  680. /* T > P. */
  681. if (k == s)
  682. {
  683. ++p;
  684. if (p == (team->prev_ts.place_partition_off
  685. + team->prev_ts.place_partition_len))
  686. p = team->prev_ts.place_partition_off;
  687. k = 1;
  688. if (i == nthreads - rest)
  689. s = 1;
  690. }
  691. else
  692. ++k;
  693. start_data->ts.place_partition_off = p;
  694. start_data->ts.place_partition_len = 1;
  695. }
  696. break;
  697. }
  698. start_data->place = p + 1;
  699. if (affinity_thr != NULL && pool->threads[i] != NULL)
  700. continue;
  701. gomp_init_thread_affinity (attr, p);
  702. }
  703. start_data->fn = fn;
  704. start_data->fn_data = data;
  705. start_data->ts.team = team;
  706. start_data->ts.work_share = &team->work_shares[0];
  707. start_data->ts.last_work_share = NULL;
  708. start_data->ts.team_id = i;
  709. start_data->ts.level = team->prev_ts.level + 1;
  710. start_data->ts.active_level = thr->ts.active_level;
  711. #ifdef HAVE_SYNC_BUILTINS
  712. start_data->ts.single_count = 0;
  713. #endif
  714. start_data->ts.static_trip = 0;
  715. start_data->task = &team->implicit_task[i];
  716. gomp_init_task (start_data->task, task, icv);
  717. team->implicit_task[i].icv.nthreads_var = nthreads_var;
  718. team->implicit_task[i].icv.bind_var = bind_var;
  719. start_data->thread_pool = pool;
  720. start_data->nested = nested;
  721. err = pthread_create (&pt, attr, gomp_thread_start, start_data++);
  722. if (err != 0)
  723. gomp_fatal ("Thread creation failed: %s", strerror (err));
  724. }
  725. if (__builtin_expect (gomp_places_list != NULL, 0))
  726. pthread_attr_destroy (&thread_attr);
  727. do_release:
  728. gomp_barrier_wait (nested ? &team->barrier : &pool->threads_dock);
  729. /* Decrease the barrier threshold to match the number of threads
  730. that should arrive back at the end of this team. The extra
  731. threads should be exiting. Note that we arrange for this test
  732. to never be true for nested teams. If AFFINITY_COUNT is non-zero,
  733. the barrier as well as gomp_managed_threads was temporarily
  734. set to NTHREADS + AFFINITY_COUNT. For NTHREADS < OLD_THREADS_COUNT,
  735. AFFINITY_COUNT if non-zero will be always at least
  736. OLD_THREADS_COUNT - NTHREADS. */
  737. if (__builtin_expect (nthreads < old_threads_used, 0)
  738. || __builtin_expect (affinity_count, 0))
  739. {
  740. long diff = (long) nthreads - (long) old_threads_used;
  741. if (affinity_count)
  742. diff = -affinity_count;
  743. gomp_barrier_reinit (&pool->threads_dock, nthreads);
  744. #ifdef HAVE_SYNC_BUILTINS
  745. __sync_fetch_and_add (&gomp_managed_threads, diff);
  746. #else
  747. gomp_mutex_lock (&gomp_managed_threads_lock);
  748. gomp_managed_threads += diff;
  749. gomp_mutex_unlock (&gomp_managed_threads_lock);
  750. #endif
  751. }
  752. if (__builtin_expect (affinity_thr != NULL, 0)
  753. && team->prev_ts.place_partition_len > 64)
  754. free (affinity_thr);
  755. }
  756. /* Terminate the current team. This is only to be called by the master
  757. thread. We assume that we must wait for the other threads. */
  758. void
  759. gomp_team_end (void)
  760. {
  761. struct gomp_thread *thr = gomp_thread ();
  762. struct gomp_team *team = thr->ts.team;
  763. /* This barrier handles all pending explicit threads.
  764. As #pragma omp cancel parallel might get awaited count in
  765. team->barrier in a inconsistent state, we need to use a different
  766. counter here. */
  767. gomp_team_barrier_wait_final (&team->barrier);
  768. if (__builtin_expect (team->team_cancelled, 0))
  769. {
  770. struct gomp_work_share *ws = team->work_shares_to_free;
  771. do
  772. {
  773. struct gomp_work_share *next_ws = gomp_ptrlock_get (&ws->next_ws);
  774. if (next_ws == NULL)
  775. gomp_ptrlock_set (&ws->next_ws, ws);
  776. gomp_fini_work_share (ws);
  777. ws = next_ws;
  778. }
  779. while (ws != NULL);
  780. }
  781. else
  782. gomp_fini_work_share (thr->ts.work_share);
  783. gomp_end_task ();
  784. thr->ts = team->prev_ts;
  785. if (__builtin_expect (thr->ts.team != NULL, 0))
  786. {
  787. #ifdef HAVE_SYNC_BUILTINS
  788. __sync_fetch_and_add (&gomp_managed_threads, 1L - team->nthreads);
  789. #else
  790. gomp_mutex_lock (&gomp_managed_threads_lock);
  791. gomp_managed_threads -= team->nthreads - 1L;
  792. gomp_mutex_unlock (&gomp_managed_threads_lock);
  793. #endif
  794. /* This barrier has gomp_barrier_wait_last counterparts
  795. and ensures the team can be safely destroyed. */
  796. gomp_barrier_wait (&team->barrier);
  797. }
  798. if (__builtin_expect (team->work_shares[0].next_alloc != NULL, 0))
  799. {
  800. struct gomp_work_share *ws = team->work_shares[0].next_alloc;
  801. do
  802. {
  803. struct gomp_work_share *next_ws = ws->next_alloc;
  804. free (ws);
  805. ws = next_ws;
  806. }
  807. while (ws != NULL);
  808. }
  809. gomp_sem_destroy (&team->master_release);
  810. #ifndef HAVE_SYNC_BUILTINS
  811. gomp_mutex_destroy (&team->work_share_list_free_lock);
  812. #endif
  813. if (__builtin_expect (thr->ts.team != NULL, 0)
  814. || __builtin_expect (team->nthreads == 1, 0))
  815. free_team (team);
  816. else
  817. {
  818. struct gomp_thread_pool *pool = thr->thread_pool;
  819. if (pool->last_team)
  820. free_team (pool->last_team);
  821. pool->last_team = team;
  822. }
  823. }
  824. /* Constructors for this file. */
  825. static void __attribute__((constructor))
  826. initialize_team (void)
  827. {
  828. #if !defined HAVE_TLS && !defined USE_EMUTLS
  829. static struct gomp_thread initial_thread_tls_data;
  830. pthread_key_create (&gomp_tls_key, NULL);
  831. pthread_setspecific (gomp_tls_key, &initial_thread_tls_data);
  832. #endif
  833. if (pthread_key_create (&gomp_thread_destructor, gomp_free_thread) != 0)
  834. gomp_fatal ("could not create thread pool destructor.");
  835. }
  836. static void __attribute__((destructor))
  837. team_destructor (void)
  838. {
  839. /* Without this dlclose on libgomp could lead to subsequent
  840. crashes. */
  841. pthread_key_delete (gomp_thread_destructor);
  842. }
  843. struct gomp_task_icv *
  844. gomp_new_icv (void)
  845. {
  846. struct gomp_thread *thr = gomp_thread ();
  847. struct gomp_task *task = gomp_malloc (sizeof (struct gomp_task));
  848. gomp_init_task (task, NULL, &gomp_global_icv);
  849. thr->task = task;
  850. pthread_setspecific (gomp_thread_destructor, thr);
  851. return &task->icv;
  852. }