loop_ull.c 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573
  1. /* Copyright (C) 2005-2015 Free Software Foundation, Inc.
  2. Contributed by Richard Henderson <rth@redhat.com>.
  3. This file is part of the GNU Offloading and Multi Processing Library
  4. (libgomp).
  5. Libgomp is free software; you can redistribute it and/or modify it
  6. under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 3, or (at your option)
  8. any later version.
  9. Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
  10. WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  11. FOR A PARTICULAR PURPOSE. See the GNU General Public License for
  12. more details.
  13. Under Section 7 of GPL version 3, you are granted additional
  14. permissions described in the GCC Runtime Library Exception, version
  15. 3.1, as published by the Free Software Foundation.
  16. You should have received a copy of the GNU General Public License and
  17. a copy of the GCC Runtime Library Exception along with this program;
  18. see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
  19. <http://www.gnu.org/licenses/>. */
  20. /* This file handles the LOOP (FOR/DO) construct. */
  21. #include <limits.h>
  22. #include <stdlib.h>
  23. #include "libgomp.h"
  24. typedef unsigned long long gomp_ull;
  25. /* Initialize the given work share construct from the given arguments. */
  26. static inline void
  27. gomp_loop_ull_init (struct gomp_work_share *ws, bool up, gomp_ull start,
  28. gomp_ull end, gomp_ull incr, enum gomp_schedule_type sched,
  29. gomp_ull chunk_size)
  30. {
  31. ws->sched = sched;
  32. ws->chunk_size_ull = chunk_size;
  33. /* Canonicalize loops that have zero iterations to ->next == ->end. */
  34. ws->end_ull = ((up && start > end) || (!up && start < end))
  35. ? start : end;
  36. ws->incr_ull = incr;
  37. ws->next_ull = start;
  38. ws->mode = 0;
  39. if (sched == GFS_DYNAMIC)
  40. {
  41. ws->chunk_size_ull *= incr;
  42. #if defined HAVE_SYNC_BUILTINS && defined __LP64__
  43. {
  44. /* For dynamic scheduling prepare things to make each iteration
  45. faster. */
  46. struct gomp_thread *thr = gomp_thread ();
  47. struct gomp_team *team = thr->ts.team;
  48. long nthreads = team ? team->nthreads : 1;
  49. if (__builtin_expect (up, 1))
  50. {
  51. /* Cheap overflow protection. */
  52. if (__builtin_expect ((nthreads | ws->chunk_size_ull)
  53. < 1ULL << (sizeof (gomp_ull)
  54. * __CHAR_BIT__ / 2 - 1), 1))
  55. ws->mode = ws->end_ull < (__LONG_LONG_MAX__ * 2ULL + 1
  56. - (nthreads + 1) * ws->chunk_size_ull);
  57. }
  58. /* Cheap overflow protection. */
  59. else if (__builtin_expect ((nthreads | -ws->chunk_size_ull)
  60. < 1ULL << (sizeof (gomp_ull)
  61. * __CHAR_BIT__ / 2 - 1), 1))
  62. ws->mode = ws->end_ull > ((nthreads + 1) * -ws->chunk_size_ull
  63. - (__LONG_LONG_MAX__ * 2ULL + 1));
  64. }
  65. #endif
  66. }
  67. if (!up)
  68. ws->mode |= 2;
  69. }
  70. /* The *_start routines are called when first encountering a loop construct
  71. that is not bound directly to a parallel construct. The first thread
  72. that arrives will create the work-share construct; subsequent threads
  73. will see the construct exists and allocate work from it.
  74. START, END, INCR are the bounds of the loop; due to the restrictions of
  75. OpenMP, these values must be the same in every thread. This is not
  76. verified (nor is it entirely verifiable, since START is not necessarily
  77. retained intact in the work-share data structure). CHUNK_SIZE is the
  78. scheduling parameter; again this must be identical in all threads.
  79. Returns true if there's any work for this thread to perform. If so,
  80. *ISTART and *IEND are filled with the bounds of the iteration block
  81. allocated to this thread. Returns false if all work was assigned to
  82. other threads prior to this thread's arrival. */
  83. static bool
  84. gomp_loop_ull_static_start (bool up, gomp_ull start, gomp_ull end,
  85. gomp_ull incr, gomp_ull chunk_size,
  86. gomp_ull *istart, gomp_ull *iend)
  87. {
  88. struct gomp_thread *thr = gomp_thread ();
  89. thr->ts.static_trip = 0;
  90. if (gomp_work_share_start (false))
  91. {
  92. gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
  93. GFS_STATIC, chunk_size);
  94. gomp_work_share_init_done ();
  95. }
  96. return !gomp_iter_ull_static_next (istart, iend);
  97. }
  98. static bool
  99. gomp_loop_ull_dynamic_start (bool up, gomp_ull start, gomp_ull end,
  100. gomp_ull incr, gomp_ull chunk_size,
  101. gomp_ull *istart, gomp_ull *iend)
  102. {
  103. struct gomp_thread *thr = gomp_thread ();
  104. bool ret;
  105. if (gomp_work_share_start (false))
  106. {
  107. gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
  108. GFS_DYNAMIC, chunk_size);
  109. gomp_work_share_init_done ();
  110. }
  111. #if defined HAVE_SYNC_BUILTINS && defined __LP64__
  112. ret = gomp_iter_ull_dynamic_next (istart, iend);
  113. #else
  114. gomp_mutex_lock (&thr->ts.work_share->lock);
  115. ret = gomp_iter_ull_dynamic_next_locked (istart, iend);
  116. gomp_mutex_unlock (&thr->ts.work_share->lock);
  117. #endif
  118. return ret;
  119. }
  120. static bool
  121. gomp_loop_ull_guided_start (bool up, gomp_ull start, gomp_ull end,
  122. gomp_ull incr, gomp_ull chunk_size,
  123. gomp_ull *istart, gomp_ull *iend)
  124. {
  125. struct gomp_thread *thr = gomp_thread ();
  126. bool ret;
  127. if (gomp_work_share_start (false))
  128. {
  129. gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
  130. GFS_GUIDED, chunk_size);
  131. gomp_work_share_init_done ();
  132. }
  133. #if defined HAVE_SYNC_BUILTINS && defined __LP64__
  134. ret = gomp_iter_ull_guided_next (istart, iend);
  135. #else
  136. gomp_mutex_lock (&thr->ts.work_share->lock);
  137. ret = gomp_iter_ull_guided_next_locked (istart, iend);
  138. gomp_mutex_unlock (&thr->ts.work_share->lock);
  139. #endif
  140. return ret;
  141. }
  142. bool
  143. GOMP_loop_ull_runtime_start (bool up, gomp_ull start, gomp_ull end,
  144. gomp_ull incr, gomp_ull *istart, gomp_ull *iend)
  145. {
  146. struct gomp_task_icv *icv = gomp_icv (false);
  147. switch (icv->run_sched_var)
  148. {
  149. case GFS_STATIC:
  150. return gomp_loop_ull_static_start (up, start, end, incr,
  151. icv->run_sched_modifier,
  152. istart, iend);
  153. case GFS_DYNAMIC:
  154. return gomp_loop_ull_dynamic_start (up, start, end, incr,
  155. icv->run_sched_modifier,
  156. istart, iend);
  157. case GFS_GUIDED:
  158. return gomp_loop_ull_guided_start (up, start, end, incr,
  159. icv->run_sched_modifier,
  160. istart, iend);
  161. case GFS_AUTO:
  162. /* For now map to schedule(static), later on we could play with feedback
  163. driven choice. */
  164. return gomp_loop_ull_static_start (up, start, end, incr,
  165. 0, istart, iend);
  166. default:
  167. abort ();
  168. }
  169. }
  170. /* The *_ordered_*_start routines are similar. The only difference is that
  171. this work-share construct is initialized to expect an ORDERED section. */
  172. static bool
  173. gomp_loop_ull_ordered_static_start (bool up, gomp_ull start, gomp_ull end,
  174. gomp_ull incr, gomp_ull chunk_size,
  175. gomp_ull *istart, gomp_ull *iend)
  176. {
  177. struct gomp_thread *thr = gomp_thread ();
  178. thr->ts.static_trip = 0;
  179. if (gomp_work_share_start (true))
  180. {
  181. gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
  182. GFS_STATIC, chunk_size);
  183. gomp_ordered_static_init ();
  184. gomp_work_share_init_done ();
  185. }
  186. return !gomp_iter_ull_static_next (istart, iend);
  187. }
  188. static bool
  189. gomp_loop_ull_ordered_dynamic_start (bool up, gomp_ull start, gomp_ull end,
  190. gomp_ull incr, gomp_ull chunk_size,
  191. gomp_ull *istart, gomp_ull *iend)
  192. {
  193. struct gomp_thread *thr = gomp_thread ();
  194. bool ret;
  195. if (gomp_work_share_start (true))
  196. {
  197. gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
  198. GFS_DYNAMIC, chunk_size);
  199. gomp_mutex_lock (&thr->ts.work_share->lock);
  200. gomp_work_share_init_done ();
  201. }
  202. else
  203. gomp_mutex_lock (&thr->ts.work_share->lock);
  204. ret = gomp_iter_ull_dynamic_next_locked (istart, iend);
  205. if (ret)
  206. gomp_ordered_first ();
  207. gomp_mutex_unlock (&thr->ts.work_share->lock);
  208. return ret;
  209. }
  210. static bool
  211. gomp_loop_ull_ordered_guided_start (bool up, gomp_ull start, gomp_ull end,
  212. gomp_ull incr, gomp_ull chunk_size,
  213. gomp_ull *istart, gomp_ull *iend)
  214. {
  215. struct gomp_thread *thr = gomp_thread ();
  216. bool ret;
  217. if (gomp_work_share_start (true))
  218. {
  219. gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
  220. GFS_GUIDED, chunk_size);
  221. gomp_mutex_lock (&thr->ts.work_share->lock);
  222. gomp_work_share_init_done ();
  223. }
  224. else
  225. gomp_mutex_lock (&thr->ts.work_share->lock);
  226. ret = gomp_iter_ull_guided_next_locked (istart, iend);
  227. if (ret)
  228. gomp_ordered_first ();
  229. gomp_mutex_unlock (&thr->ts.work_share->lock);
  230. return ret;
  231. }
  232. bool
  233. GOMP_loop_ull_ordered_runtime_start (bool up, gomp_ull start, gomp_ull end,
  234. gomp_ull incr, gomp_ull *istart,
  235. gomp_ull *iend)
  236. {
  237. struct gomp_task_icv *icv = gomp_icv (false);
  238. switch (icv->run_sched_var)
  239. {
  240. case GFS_STATIC:
  241. return gomp_loop_ull_ordered_static_start (up, start, end, incr,
  242. icv->run_sched_modifier,
  243. istart, iend);
  244. case GFS_DYNAMIC:
  245. return gomp_loop_ull_ordered_dynamic_start (up, start, end, incr,
  246. icv->run_sched_modifier,
  247. istart, iend);
  248. case GFS_GUIDED:
  249. return gomp_loop_ull_ordered_guided_start (up, start, end, incr,
  250. icv->run_sched_modifier,
  251. istart, iend);
  252. case GFS_AUTO:
  253. /* For now map to schedule(static), later on we could play with feedback
  254. driven choice. */
  255. return gomp_loop_ull_ordered_static_start (up, start, end, incr,
  256. 0, istart, iend);
  257. default:
  258. abort ();
  259. }
  260. }
  261. /* The *_next routines are called when the thread completes processing of
  262. the iteration block currently assigned to it. If the work-share
  263. construct is bound directly to a parallel construct, then the iteration
  264. bounds may have been set up before the parallel. In which case, this
  265. may be the first iteration for the thread.
  266. Returns true if there is work remaining to be performed; *ISTART and
  267. *IEND are filled with a new iteration block. Returns false if all work
  268. has been assigned. */
  269. static bool
  270. gomp_loop_ull_static_next (gomp_ull *istart, gomp_ull *iend)
  271. {
  272. return !gomp_iter_ull_static_next (istart, iend);
  273. }
  274. static bool
  275. gomp_loop_ull_dynamic_next (gomp_ull *istart, gomp_ull *iend)
  276. {
  277. bool ret;
  278. #if defined HAVE_SYNC_BUILTINS && defined __LP64__
  279. ret = gomp_iter_ull_dynamic_next (istart, iend);
  280. #else
  281. struct gomp_thread *thr = gomp_thread ();
  282. gomp_mutex_lock (&thr->ts.work_share->lock);
  283. ret = gomp_iter_ull_dynamic_next_locked (istart, iend);
  284. gomp_mutex_unlock (&thr->ts.work_share->lock);
  285. #endif
  286. return ret;
  287. }
  288. static bool
  289. gomp_loop_ull_guided_next (gomp_ull *istart, gomp_ull *iend)
  290. {
  291. bool ret;
  292. #if defined HAVE_SYNC_BUILTINS && defined __LP64__
  293. ret = gomp_iter_ull_guided_next (istart, iend);
  294. #else
  295. struct gomp_thread *thr = gomp_thread ();
  296. gomp_mutex_lock (&thr->ts.work_share->lock);
  297. ret = gomp_iter_ull_guided_next_locked (istart, iend);
  298. gomp_mutex_unlock (&thr->ts.work_share->lock);
  299. #endif
  300. return ret;
  301. }
  302. bool
  303. GOMP_loop_ull_runtime_next (gomp_ull *istart, gomp_ull *iend)
  304. {
  305. struct gomp_thread *thr = gomp_thread ();
  306. switch (thr->ts.work_share->sched)
  307. {
  308. case GFS_STATIC:
  309. case GFS_AUTO:
  310. return gomp_loop_ull_static_next (istart, iend);
  311. case GFS_DYNAMIC:
  312. return gomp_loop_ull_dynamic_next (istart, iend);
  313. case GFS_GUIDED:
  314. return gomp_loop_ull_guided_next (istart, iend);
  315. default:
  316. abort ();
  317. }
  318. }
  319. /* The *_ordered_*_next routines are called when the thread completes
  320. processing of the iteration block currently assigned to it.
  321. Returns true if there is work remaining to be performed; *ISTART and
  322. *IEND are filled with a new iteration block. Returns false if all work
  323. has been assigned. */
  324. static bool
  325. gomp_loop_ull_ordered_static_next (gomp_ull *istart, gomp_ull *iend)
  326. {
  327. struct gomp_thread *thr = gomp_thread ();
  328. int test;
  329. gomp_ordered_sync ();
  330. gomp_mutex_lock (&thr->ts.work_share->lock);
  331. test = gomp_iter_ull_static_next (istart, iend);
  332. if (test >= 0)
  333. gomp_ordered_static_next ();
  334. gomp_mutex_unlock (&thr->ts.work_share->lock);
  335. return test == 0;
  336. }
  337. static bool
  338. gomp_loop_ull_ordered_dynamic_next (gomp_ull *istart, gomp_ull *iend)
  339. {
  340. struct gomp_thread *thr = gomp_thread ();
  341. bool ret;
  342. gomp_ordered_sync ();
  343. gomp_mutex_lock (&thr->ts.work_share->lock);
  344. ret = gomp_iter_ull_dynamic_next_locked (istart, iend);
  345. if (ret)
  346. gomp_ordered_next ();
  347. else
  348. gomp_ordered_last ();
  349. gomp_mutex_unlock (&thr->ts.work_share->lock);
  350. return ret;
  351. }
  352. static bool
  353. gomp_loop_ull_ordered_guided_next (gomp_ull *istart, gomp_ull *iend)
  354. {
  355. struct gomp_thread *thr = gomp_thread ();
  356. bool ret;
  357. gomp_ordered_sync ();
  358. gomp_mutex_lock (&thr->ts.work_share->lock);
  359. ret = gomp_iter_ull_guided_next_locked (istart, iend);
  360. if (ret)
  361. gomp_ordered_next ();
  362. else
  363. gomp_ordered_last ();
  364. gomp_mutex_unlock (&thr->ts.work_share->lock);
  365. return ret;
  366. }
  367. bool
  368. GOMP_loop_ull_ordered_runtime_next (gomp_ull *istart, gomp_ull *iend)
  369. {
  370. struct gomp_thread *thr = gomp_thread ();
  371. switch (thr->ts.work_share->sched)
  372. {
  373. case GFS_STATIC:
  374. case GFS_AUTO:
  375. return gomp_loop_ull_ordered_static_next (istart, iend);
  376. case GFS_DYNAMIC:
  377. return gomp_loop_ull_ordered_dynamic_next (istart, iend);
  378. case GFS_GUIDED:
  379. return gomp_loop_ull_ordered_guided_next (istart, iend);
  380. default:
  381. abort ();
  382. }
  383. }
  384. /* We use static functions above so that we're sure that the "runtime"
  385. function can defer to the proper routine without interposition. We
  386. export the static function with a strong alias when possible, or with
  387. a wrapper function otherwise. */
  388. #ifdef HAVE_ATTRIBUTE_ALIAS
  389. extern __typeof(gomp_loop_ull_static_start) GOMP_loop_ull_static_start
  390. __attribute__((alias ("gomp_loop_ull_static_start")));
  391. extern __typeof(gomp_loop_ull_dynamic_start) GOMP_loop_ull_dynamic_start
  392. __attribute__((alias ("gomp_loop_ull_dynamic_start")));
  393. extern __typeof(gomp_loop_ull_guided_start) GOMP_loop_ull_guided_start
  394. __attribute__((alias ("gomp_loop_ull_guided_start")));
  395. extern __typeof(gomp_loop_ull_ordered_static_start) GOMP_loop_ull_ordered_static_start
  396. __attribute__((alias ("gomp_loop_ull_ordered_static_start")));
  397. extern __typeof(gomp_loop_ull_ordered_dynamic_start) GOMP_loop_ull_ordered_dynamic_start
  398. __attribute__((alias ("gomp_loop_ull_ordered_dynamic_start")));
  399. extern __typeof(gomp_loop_ull_ordered_guided_start) GOMP_loop_ull_ordered_guided_start
  400. __attribute__((alias ("gomp_loop_ull_ordered_guided_start")));
  401. extern __typeof(gomp_loop_ull_static_next) GOMP_loop_ull_static_next
  402. __attribute__((alias ("gomp_loop_ull_static_next")));
  403. extern __typeof(gomp_loop_ull_dynamic_next) GOMP_loop_ull_dynamic_next
  404. __attribute__((alias ("gomp_loop_ull_dynamic_next")));
  405. extern __typeof(gomp_loop_ull_guided_next) GOMP_loop_ull_guided_next
  406. __attribute__((alias ("gomp_loop_ull_guided_next")));
  407. extern __typeof(gomp_loop_ull_ordered_static_next) GOMP_loop_ull_ordered_static_next
  408. __attribute__((alias ("gomp_loop_ull_ordered_static_next")));
  409. extern __typeof(gomp_loop_ull_ordered_dynamic_next) GOMP_loop_ull_ordered_dynamic_next
  410. __attribute__((alias ("gomp_loop_ull_ordered_dynamic_next")));
  411. extern __typeof(gomp_loop_ull_ordered_guided_next) GOMP_loop_ull_ordered_guided_next
  412. __attribute__((alias ("gomp_loop_ull_ordered_guided_next")));
  413. #else
  414. bool
  415. GOMP_loop_ull_static_start (bool up, gomp_ull start, gomp_ull end,
  416. gomp_ull incr, gomp_ull chunk_size,
  417. gomp_ull *istart, gomp_ull *iend)
  418. {
  419. return gomp_loop_ull_static_start (up, start, end, incr, chunk_size, istart,
  420. iend);
  421. }
  422. bool
  423. GOMP_loop_ull_dynamic_start (bool up, gomp_ull start, gomp_ull end,
  424. gomp_ull incr, gomp_ull chunk_size,
  425. gomp_ull *istart, gomp_ull *iend)
  426. {
  427. return gomp_loop_ull_dynamic_start (up, start, end, incr, chunk_size, istart,
  428. iend);
  429. }
  430. bool
  431. GOMP_loop_ull_guided_start (bool up, gomp_ull start, gomp_ull end,
  432. gomp_ull incr, gomp_ull chunk_size,
  433. gomp_ull *istart, gomp_ull *iend)
  434. {
  435. return gomp_loop_ull_guided_start (up, start, end, incr, chunk_size, istart,
  436. iend);
  437. }
  438. bool
  439. GOMP_loop_ull_ordered_static_start (bool up, gomp_ull start, gomp_ull end,
  440. gomp_ull incr, gomp_ull chunk_size,
  441. gomp_ull *istart, gomp_ull *iend)
  442. {
  443. return gomp_loop_ull_ordered_static_start (up, start, end, incr, chunk_size,
  444. istart, iend);
  445. }
  446. bool
  447. GOMP_loop_ull_ordered_dynamic_start (bool up, gomp_ull start, gomp_ull end,
  448. gomp_ull incr, gomp_ull chunk_size,
  449. gomp_ull *istart, gomp_ull *iend)
  450. {
  451. return gomp_loop_ull_ordered_dynamic_start (up, start, end, incr, chunk_size,
  452. istart, iend);
  453. }
  454. bool
  455. GOMP_loop_ull_ordered_guided_start (bool up, gomp_ull start, gomp_ull end,
  456. gomp_ull incr, gomp_ull chunk_size,
  457. gomp_ull *istart, gomp_ull *iend)
  458. {
  459. return gomp_loop_ull_ordered_guided_start (up, start, end, incr, chunk_size,
  460. istart, iend);
  461. }
  462. bool
  463. GOMP_loop_ull_static_next (gomp_ull *istart, gomp_ull *iend)
  464. {
  465. return gomp_loop_ull_static_next (istart, iend);
  466. }
  467. bool
  468. GOMP_loop_ull_dynamic_next (gomp_ull *istart, gomp_ull *iend)
  469. {
  470. return gomp_loop_ull_dynamic_next (istart, iend);
  471. }
  472. bool
  473. GOMP_loop_ull_guided_next (gomp_ull *istart, gomp_ull *iend)
  474. {
  475. return gomp_loop_ull_guided_next (istart, iend);
  476. }
  477. bool
  478. GOMP_loop_ull_ordered_static_next (gomp_ull *istart, gomp_ull *iend)
  479. {
  480. return gomp_loop_ull_ordered_static_next (istart, iend);
  481. }
  482. bool
  483. GOMP_loop_ull_ordered_dynamic_next (gomp_ull *istart, gomp_ull *iend)
  484. {
  485. return gomp_loop_ull_ordered_dynamic_next (istart, iend);
  486. }
  487. bool
  488. GOMP_loop_ull_ordered_guided_next (gomp_ull *istart, gomp_ull *iend)
  489. {
  490. return gomp_loop_ull_ordered_guided_next (istart, iend);
  491. }
  492. #endif