oacc-parallel.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478
  1. /* Copyright (C) 2013-2015 Free Software Foundation, Inc.
  2. Contributed by Mentor Embedded.
  3. This file is part of the GNU Offloading and Multi Processing Library
  4. (libgomp).
  5. Libgomp is free software; you can redistribute it and/or modify it
  6. under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 3, or (at your option)
  8. any later version.
  9. Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
  10. WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  11. FOR A PARTICULAR PURPOSE. See the GNU General Public License for
  12. more details.
  13. Under Section 7 of GPL version 3, you are granted additional
  14. permissions described in the GCC Runtime Library Exception, version
  15. 3.1, as published by the Free Software Foundation.
  16. You should have received a copy of the GNU General Public License and
  17. a copy of the GCC Runtime Library Exception along with this program;
  18. see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
  19. <http://www.gnu.org/licenses/>. */
  20. /* This file handles OpenACC constructs. */
  21. #include "openacc.h"
  22. #include "libgomp.h"
  23. #include "libgomp_g.h"
  24. #include "gomp-constants.h"
  25. #include "oacc-int.h"
  26. #ifdef HAVE_INTTYPES_H
  27. # include <inttypes.h> /* For PRIu64. */
  28. #endif
  29. #include <string.h>
  30. #include <stdarg.h>
  31. #include <assert.h>
  32. static int
  33. find_pset (int pos, size_t mapnum, unsigned short *kinds)
  34. {
  35. if (pos + 1 >= mapnum)
  36. return 0;
  37. unsigned char kind = kinds[pos+1] & 0xff;
  38. return kind == GOMP_MAP_TO_PSET;
  39. }
  40. static void goacc_wait (int async, int num_waits, va_list ap);
  41. void
  42. GOACC_parallel (int device, void (*fn) (void *),
  43. size_t mapnum, void **hostaddrs, size_t *sizes,
  44. unsigned short *kinds,
  45. int num_gangs, int num_workers, int vector_length,
  46. int async, int num_waits, ...)
  47. {
  48. bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK;
  49. va_list ap;
  50. struct goacc_thread *thr;
  51. struct gomp_device_descr *acc_dev;
  52. struct target_mem_desc *tgt;
  53. void **devaddrs;
  54. unsigned int i;
  55. struct splay_tree_key_s k;
  56. splay_tree_key tgt_fn_key;
  57. void (*tgt_fn);
  58. if (num_gangs != 1)
  59. gomp_fatal ("num_gangs (%d) different from one is not yet supported",
  60. num_gangs);
  61. if (num_workers != 1)
  62. gomp_fatal ("num_workers (%d) different from one is not yet supported",
  63. num_workers);
  64. #ifdef HAVE_INTTYPES_H
  65. gomp_debug (0, "%s: mapnum=%"PRIu64", hostaddrs=%p, size=%p, kinds=%p, "
  66. "async = %d\n",
  67. __FUNCTION__, (uint64_t) mapnum, hostaddrs, sizes, kinds, async);
  68. #else
  69. gomp_debug (0, "%s: mapnum=%lu, hostaddrs=%p, sizes=%p, kinds=%p, async=%d\n",
  70. __FUNCTION__, (unsigned long) mapnum, hostaddrs, sizes, kinds,
  71. async);
  72. #endif
  73. goacc_lazy_initialize ();
  74. thr = goacc_thread ();
  75. acc_dev = thr->dev;
  76. /* Host fallback if "if" clause is false or if the current device is set to
  77. the host. */
  78. if (host_fallback)
  79. {
  80. goacc_save_and_set_bind (acc_device_host);
  81. fn (hostaddrs);
  82. goacc_restore_bind ();
  83. return;
  84. }
  85. else if (acc_device_type (acc_dev->type) == acc_device_host)
  86. {
  87. fn (hostaddrs);
  88. return;
  89. }
  90. va_start (ap, num_waits);
  91. if (num_waits > 0)
  92. goacc_wait (async, num_waits, ap);
  93. va_end (ap);
  94. acc_dev->openacc.async_set_async_func (async);
  95. if (!(acc_dev->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC))
  96. {
  97. k.host_start = (uintptr_t) fn;
  98. k.host_end = k.host_start + 1;
  99. gomp_mutex_lock (&acc_dev->lock);
  100. tgt_fn_key = splay_tree_lookup (&acc_dev->mem_map, &k);
  101. gomp_mutex_unlock (&acc_dev->lock);
  102. if (tgt_fn_key == NULL)
  103. gomp_fatal ("target function wasn't mapped");
  104. tgt_fn = (void (*)) tgt_fn_key->tgt_offset;
  105. }
  106. else
  107. tgt_fn = (void (*)) fn;
  108. tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, NULL, sizes, kinds, true,
  109. false);
  110. devaddrs = gomp_alloca (sizeof (void *) * mapnum);
  111. for (i = 0; i < mapnum; i++)
  112. devaddrs[i] = (void *) (tgt->list[i]->tgt->tgt_start
  113. + tgt->list[i]->tgt_offset);
  114. acc_dev->openacc.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs, sizes, kinds,
  115. num_gangs, num_workers, vector_length, async,
  116. tgt);
  117. /* If running synchronously, unmap immediately. */
  118. if (async < acc_async_noval)
  119. gomp_unmap_vars (tgt, true);
  120. else
  121. {
  122. gomp_copy_from_async (tgt);
  123. acc_dev->openacc.register_async_cleanup_func (tgt);
  124. }
  125. acc_dev->openacc.async_set_async_func (acc_async_sync);
  126. }
  127. void
  128. GOACC_data_start (int device, size_t mapnum,
  129. void **hostaddrs, size_t *sizes, unsigned short *kinds)
  130. {
  131. bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK;
  132. struct target_mem_desc *tgt;
  133. #ifdef HAVE_INTTYPES_H
  134. gomp_debug (0, "%s: mapnum=%"PRIu64", hostaddrs=%p, size=%p, kinds=%p\n",
  135. __FUNCTION__, (uint64_t) mapnum, hostaddrs, sizes, kinds);
  136. #else
  137. gomp_debug (0, "%s: mapnum=%lu, hostaddrs=%p, sizes=%p, kinds=%p\n",
  138. __FUNCTION__, (unsigned long) mapnum, hostaddrs, sizes, kinds);
  139. #endif
  140. goacc_lazy_initialize ();
  141. struct goacc_thread *thr = goacc_thread ();
  142. struct gomp_device_descr *acc_dev = thr->dev;
  143. /* Host fallback or 'do nothing'. */
  144. if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
  145. || host_fallback)
  146. {
  147. tgt = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, true, false);
  148. tgt->prev = thr->mapped_data;
  149. thr->mapped_data = tgt;
  150. return;
  151. }
  152. gomp_debug (0, " %s: prepare mappings\n", __FUNCTION__);
  153. tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, NULL, sizes, kinds, true,
  154. false);
  155. gomp_debug (0, " %s: mappings prepared\n", __FUNCTION__);
  156. tgt->prev = thr->mapped_data;
  157. thr->mapped_data = tgt;
  158. }
  159. void
  160. GOACC_data_end (void)
  161. {
  162. struct goacc_thread *thr = goacc_thread ();
  163. struct target_mem_desc *tgt = thr->mapped_data;
  164. gomp_debug (0, " %s: restore mappings\n", __FUNCTION__);
  165. thr->mapped_data = tgt->prev;
  166. gomp_unmap_vars (tgt, true);
  167. gomp_debug (0, " %s: mappings restored\n", __FUNCTION__);
  168. }
  169. void
  170. GOACC_enter_exit_data (int device, size_t mapnum,
  171. void **hostaddrs, size_t *sizes, unsigned short *kinds,
  172. int async, int num_waits, ...)
  173. {
  174. struct goacc_thread *thr;
  175. struct gomp_device_descr *acc_dev;
  176. bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK;
  177. bool data_enter = false;
  178. size_t i;
  179. goacc_lazy_initialize ();
  180. thr = goacc_thread ();
  181. acc_dev = thr->dev;
  182. if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
  183. || host_fallback)
  184. return;
  185. if (num_waits > 0)
  186. {
  187. va_list ap;
  188. va_start (ap, num_waits);
  189. goacc_wait (async, num_waits, ap);
  190. va_end (ap);
  191. }
  192. acc_dev->openacc.async_set_async_func (async);
  193. /* Determine if this is an "acc enter data". */
  194. for (i = 0; i < mapnum; ++i)
  195. {
  196. unsigned char kind = kinds[i] & 0xff;
  197. if (kind == GOMP_MAP_POINTER || kind == GOMP_MAP_TO_PSET)
  198. continue;
  199. if (kind == GOMP_MAP_FORCE_ALLOC
  200. || kind == GOMP_MAP_FORCE_PRESENT
  201. || kind == GOMP_MAP_FORCE_TO)
  202. {
  203. data_enter = true;
  204. break;
  205. }
  206. if (kind == GOMP_MAP_FORCE_DEALLOC
  207. || kind == GOMP_MAP_FORCE_FROM)
  208. break;
  209. gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
  210. kind);
  211. }
  212. if (data_enter)
  213. {
  214. for (i = 0; i < mapnum; i++)
  215. {
  216. unsigned char kind = kinds[i] & 0xff;
  217. /* Scan for PSETs. */
  218. int psets = find_pset (i, mapnum, kinds);
  219. if (!psets)
  220. {
  221. switch (kind)
  222. {
  223. case GOMP_MAP_POINTER:
  224. gomp_acc_insert_pointer (1, &hostaddrs[i], &sizes[i],
  225. &kinds[i]);
  226. break;
  227. case GOMP_MAP_FORCE_ALLOC:
  228. acc_create (hostaddrs[i], sizes[i]);
  229. break;
  230. case GOMP_MAP_FORCE_PRESENT:
  231. acc_present_or_copyin (hostaddrs[i], sizes[i]);
  232. break;
  233. case GOMP_MAP_FORCE_TO:
  234. acc_present_or_copyin (hostaddrs[i], sizes[i]);
  235. break;
  236. default:
  237. gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
  238. kind);
  239. break;
  240. }
  241. }
  242. else
  243. {
  244. gomp_acc_insert_pointer (3, &hostaddrs[i], &sizes[i], &kinds[i]);
  245. /* Increment 'i' by two because OpenACC requires fortran
  246. arrays to be contiguous, so each PSET is associated with
  247. one of MAP_FORCE_ALLOC/MAP_FORCE_PRESET/MAP_FORCE_TO, and
  248. one MAP_POINTER. */
  249. i += 2;
  250. }
  251. }
  252. }
  253. else
  254. for (i = 0; i < mapnum; ++i)
  255. {
  256. unsigned char kind = kinds[i] & 0xff;
  257. int psets = find_pset (i, mapnum, kinds);
  258. if (!psets)
  259. {
  260. switch (kind)
  261. {
  262. case GOMP_MAP_POINTER:
  263. gomp_acc_remove_pointer (hostaddrs[i], (kinds[i] & 0xff)
  264. == GOMP_MAP_FORCE_FROM,
  265. async, 1);
  266. break;
  267. case GOMP_MAP_FORCE_DEALLOC:
  268. acc_delete (hostaddrs[i], sizes[i]);
  269. break;
  270. case GOMP_MAP_FORCE_FROM:
  271. acc_copyout (hostaddrs[i], sizes[i]);
  272. break;
  273. default:
  274. gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
  275. kind);
  276. break;
  277. }
  278. }
  279. else
  280. {
  281. gomp_acc_remove_pointer (hostaddrs[i], (kinds[i] & 0xff)
  282. == GOMP_MAP_FORCE_FROM, async, 3);
  283. /* See the above comment. */
  284. i += 2;
  285. }
  286. }
  287. acc_dev->openacc.async_set_async_func (acc_async_sync);
  288. }
  289. static void
  290. goacc_wait (int async, int num_waits, va_list ap)
  291. {
  292. struct goacc_thread *thr = goacc_thread ();
  293. struct gomp_device_descr *acc_dev = thr->dev;
  294. int i;
  295. assert (num_waits >= 0);
  296. if (async == acc_async_sync && num_waits == 0)
  297. {
  298. acc_wait_all ();
  299. return;
  300. }
  301. if (async == acc_async_sync && num_waits)
  302. {
  303. for (i = 0; i < num_waits; i++)
  304. {
  305. int qid = va_arg (ap, int);
  306. if (acc_async_test (qid))
  307. continue;
  308. acc_wait (qid);
  309. }
  310. return;
  311. }
  312. if (async == acc_async_noval && num_waits == 0)
  313. {
  314. acc_dev->openacc.async_wait_all_async_func (acc_async_noval);
  315. return;
  316. }
  317. for (i = 0; i < num_waits; i++)
  318. {
  319. int qid = va_arg (ap, int);
  320. if (acc_async_test (qid))
  321. continue;
  322. /* If we're waiting on the same asynchronous queue as we're launching on,
  323. the queue itself will order work as required, so there's no need to
  324. wait explicitly. */
  325. if (qid != async)
  326. acc_dev->openacc.async_wait_async_func (qid, async);
  327. }
  328. }
  329. void
  330. GOACC_update (int device, size_t mapnum,
  331. void **hostaddrs, size_t *sizes, unsigned short *kinds,
  332. int async, int num_waits, ...)
  333. {
  334. bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK;
  335. size_t i;
  336. goacc_lazy_initialize ();
  337. struct goacc_thread *thr = goacc_thread ();
  338. struct gomp_device_descr *acc_dev = thr->dev;
  339. if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
  340. || host_fallback)
  341. return;
  342. if (num_waits > 0)
  343. {
  344. va_list ap;
  345. va_start (ap, num_waits);
  346. goacc_wait (async, num_waits, ap);
  347. va_end (ap);
  348. }
  349. acc_dev->openacc.async_set_async_func (async);
  350. for (i = 0; i < mapnum; ++i)
  351. {
  352. unsigned char kind = kinds[i] & 0xff;
  353. switch (kind)
  354. {
  355. case GOMP_MAP_POINTER:
  356. case GOMP_MAP_TO_PSET:
  357. break;
  358. case GOMP_MAP_FORCE_TO:
  359. acc_update_device (hostaddrs[i], sizes[i]);
  360. break;
  361. case GOMP_MAP_FORCE_FROM:
  362. acc_update_self (hostaddrs[i], sizes[i]);
  363. break;
  364. default:
  365. gomp_fatal (">>>> GOACC_update UNHANDLED kind 0x%.2x", kind);
  366. break;
  367. }
  368. }
  369. acc_dev->openacc.async_set_async_func (acc_async_sync);
  370. }
  371. void
  372. GOACC_wait (int async, int num_waits, ...)
  373. {
  374. va_list ap;
  375. va_start (ap, num_waits);
  376. goacc_wait (async, num_waits, ap);
  377. va_end (ap);
  378. }
  379. int
  380. GOACC_get_num_threads (void)
  381. {
  382. return 1;
  383. }
  384. int
  385. GOACC_get_thread_num (void)
  386. {
  387. return 0;
  388. }