oacc-mem.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589
  1. /* OpenACC Runtime initialization routines
  2. Copyright (C) 2013-2015 Free Software Foundation, Inc.
  3. Contributed by Mentor Embedded.
  4. This file is part of the GNU Offloading and Multi Processing Library
  5. (libgomp).
  6. Libgomp is free software; you can redistribute it and/or modify it
  7. under the terms of the GNU General Public License as published by
  8. the Free Software Foundation; either version 3, or (at your option)
  9. any later version.
  10. Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
  11. WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  12. FOR A PARTICULAR PURPOSE. See the GNU General Public License for
  13. more details.
  14. Under Section 7 of GPL version 3, you are granted additional
  15. permissions described in the GCC Runtime Library Exception, version
  16. 3.1, as published by the Free Software Foundation.
  17. You should have received a copy of the GNU General Public License and
  18. a copy of the GCC Runtime Library Exception along with this program;
  19. see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
  20. <http://www.gnu.org/licenses/>. */
  21. #include "openacc.h"
  22. #include "config.h"
  23. #include "libgomp.h"
  24. #include "gomp-constants.h"
  25. #include "oacc-int.h"
  26. #include "splay-tree.h"
  27. #include <stdint.h>
  28. #include <assert.h>
  29. /* Return block containing [H->S), or NULL if not contained. */
  30. static splay_tree_key
  31. lookup_host (struct gomp_device_descr *dev, void *h, size_t s)
  32. {
  33. struct splay_tree_key_s node;
  34. splay_tree_key key;
  35. node.host_start = (uintptr_t) h;
  36. node.host_end = (uintptr_t) h + s;
  37. gomp_mutex_lock (&dev->lock);
  38. key = splay_tree_lookup (&dev->mem_map, &node);
  39. gomp_mutex_unlock (&dev->lock);
  40. return key;
  41. }
  42. /* Return block containing [D->S), or NULL if not contained.
  43. The list isn't ordered by device address, so we have to iterate
  44. over the whole array. This is not expected to be a common
  45. operation. */
  46. static splay_tree_key
  47. lookup_dev (struct target_mem_desc *tgt, void *d, size_t s)
  48. {
  49. int i;
  50. struct target_mem_desc *t;
  51. if (!tgt)
  52. return NULL;
  53. gomp_mutex_lock (&tgt->device_descr->lock);
  54. for (t = tgt; t != NULL; t = t->prev)
  55. {
  56. if (t->tgt_start <= (uintptr_t) d && t->tgt_end >= (uintptr_t) d + s)
  57. break;
  58. }
  59. gomp_mutex_unlock (&tgt->device_descr->lock);
  60. if (!t)
  61. return NULL;
  62. for (i = 0; i < t->list_count; i++)
  63. {
  64. void * offset;
  65. splay_tree_key k = &t->array[i].key;
  66. offset = d - t->tgt_start + k->tgt_offset;
  67. if (k->host_start + offset <= (void *) k->host_end)
  68. return k;
  69. }
  70. return NULL;
  71. }
  72. /* OpenACC is silent on how memory exhaustion is indicated. We return
  73. NULL. */
  74. void *
  75. acc_malloc (size_t s)
  76. {
  77. if (!s)
  78. return NULL;
  79. goacc_lazy_initialize ();
  80. struct goacc_thread *thr = goacc_thread ();
  81. assert (thr->dev);
  82. return thr->dev->alloc_func (thr->dev->target_id, s);
  83. }
  84. /* OpenACC 2.0a (3.2.16) doesn't specify what to do in the event
  85. the device address is mapped. We choose to check if it mapped,
  86. and if it is, to unmap it. */
  87. void
  88. acc_free (void *d)
  89. {
  90. splay_tree_key k;
  91. struct goacc_thread *thr = goacc_thread ();
  92. if (!d)
  93. return;
  94. assert (thr && thr->dev);
  95. /* We don't have to call lazy open here, as the ptr value must have
  96. been returned by acc_malloc. It's not permitted to pass NULL in
  97. (unless you got that null from acc_malloc). */
  98. if ((k = lookup_dev (thr->dev->openacc.data_environ, d, 1)))
  99. {
  100. void *offset;
  101. offset = d - k->tgt->tgt_start + k->tgt_offset;
  102. acc_unmap_data ((void *)(k->host_start + offset));
  103. }
  104. thr->dev->free_func (thr->dev->target_id, d);
  105. }
  106. void
  107. acc_memcpy_to_device (void *d, void *h, size_t s)
  108. {
  109. /* No need to call lazy open here, as the device pointer must have
  110. been obtained from a routine that did that. */
  111. struct goacc_thread *thr = goacc_thread ();
  112. assert (thr && thr->dev);
  113. thr->dev->host2dev_func (thr->dev->target_id, d, h, s);
  114. }
  115. void
  116. acc_memcpy_from_device (void *h, void *d, size_t s)
  117. {
  118. /* No need to call lazy open here, as the device pointer must have
  119. been obtained from a routine that did that. */
  120. struct goacc_thread *thr = goacc_thread ();
  121. assert (thr && thr->dev);
  122. thr->dev->dev2host_func (thr->dev->target_id, h, d, s);
  123. }
  124. /* Return the device pointer that corresponds to host data H. Or NULL
  125. if no mapping. */
  126. void *
  127. acc_deviceptr (void *h)
  128. {
  129. splay_tree_key n;
  130. void *d;
  131. void *offset;
  132. goacc_lazy_initialize ();
  133. struct goacc_thread *thr = goacc_thread ();
  134. n = lookup_host (thr->dev, h, 1);
  135. if (!n)
  136. return NULL;
  137. offset = h - n->host_start;
  138. d = n->tgt->tgt_start + n->tgt_offset + offset;
  139. return d;
  140. }
  141. /* Return the host pointer that corresponds to device data D. Or NULL
  142. if no mapping. */
  143. void *
  144. acc_hostptr (void *d)
  145. {
  146. splay_tree_key n;
  147. void *h;
  148. void *offset;
  149. goacc_lazy_initialize ();
  150. struct goacc_thread *thr = goacc_thread ();
  151. n = lookup_dev (thr->dev->openacc.data_environ, d, 1);
  152. if (!n)
  153. return NULL;
  154. offset = d - n->tgt->tgt_start + n->tgt_offset;
  155. h = n->host_start + offset;
  156. return h;
  157. }
  158. /* Return 1 if host data [H,+S] is present on the device. */
  159. int
  160. acc_is_present (void *h, size_t s)
  161. {
  162. splay_tree_key n;
  163. if (!s || !h)
  164. return 0;
  165. goacc_lazy_initialize ();
  166. struct goacc_thread *thr = goacc_thread ();
  167. struct gomp_device_descr *acc_dev = thr->dev;
  168. n = lookup_host (acc_dev, h, s);
  169. if (n && ((uintptr_t)h < n->host_start
  170. || (uintptr_t)h + s > n->host_end
  171. || s > n->host_end - n->host_start))
  172. n = NULL;
  173. return n != NULL;
  174. }
  175. /* Create a mapping for host [H,+S] -> device [D,+S] */
  176. void
  177. acc_map_data (void *h, void *d, size_t s)
  178. {
  179. struct target_mem_desc *tgt;
  180. size_t mapnum = 1;
  181. void *hostaddrs = h;
  182. void *devaddrs = d;
  183. size_t sizes = s;
  184. unsigned short kinds = GOMP_MAP_ALLOC;
  185. goacc_lazy_initialize ();
  186. struct goacc_thread *thr = goacc_thread ();
  187. struct gomp_device_descr *acc_dev = thr->dev;
  188. if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
  189. {
  190. if (d != h)
  191. gomp_fatal ("cannot map data on shared-memory system");
  192. tgt = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, true, false);
  193. }
  194. else
  195. {
  196. struct goacc_thread *thr = goacc_thread ();
  197. if (!d || !h || !s)
  198. gomp_fatal ("[%p,+%d]->[%p,+%d] is a bad map",
  199. (void *)h, (int)s, (void *)d, (int)s);
  200. if (lookup_host (acc_dev, h, s))
  201. gomp_fatal ("host address [%p, +%d] is already mapped", (void *)h,
  202. (int)s);
  203. if (lookup_dev (thr->dev->openacc.data_environ, d, s))
  204. gomp_fatal ("device address [%p, +%d] is already mapped", (void *)d,
  205. (int)s);
  206. tgt = gomp_map_vars (acc_dev, mapnum, &hostaddrs, &devaddrs, &sizes,
  207. &kinds, true, false);
  208. }
  209. tgt->prev = acc_dev->openacc.data_environ;
  210. acc_dev->openacc.data_environ = tgt;
  211. }
  212. void
  213. acc_unmap_data (void *h)
  214. {
  215. struct goacc_thread *thr = goacc_thread ();
  216. struct gomp_device_descr *acc_dev = thr->dev;
  217. /* No need to call lazy open, as the address must have been mapped. */
  218. size_t host_size;
  219. splay_tree_key n = lookup_host (acc_dev, h, 1);
  220. struct target_mem_desc *t;
  221. if (!n)
  222. gomp_fatal ("%p is not a mapped block", (void *)h);
  223. host_size = n->host_end - n->host_start;
  224. if (n->host_start != (uintptr_t) h)
  225. gomp_fatal ("[%p,%d] surrounds1 %p",
  226. (void *) n->host_start, (int) host_size, (void *) h);
  227. t = n->tgt;
  228. if (t->refcount == 2)
  229. {
  230. struct target_mem_desc *tp;
  231. /* This is the last reference, so pull the descriptor off the
  232. chain. This avoids gomp_unmap_vars via gomp_unmap_tgt from
  233. freeing the device memory. */
  234. t->tgt_end = 0;
  235. t->to_free = 0;
  236. gomp_mutex_lock (&acc_dev->lock);
  237. for (tp = NULL, t = acc_dev->openacc.data_environ; t != NULL;
  238. tp = t, t = t->prev)
  239. if (n->tgt == t)
  240. {
  241. if (tp)
  242. tp->prev = t->prev;
  243. else
  244. acc_dev->openacc.data_environ = t->prev;
  245. break;
  246. }
  247. gomp_mutex_unlock (&acc_dev->lock);
  248. }
  249. gomp_unmap_vars (t, true);
  250. }
  251. #define FLAG_PRESENT (1 << 0)
  252. #define FLAG_CREATE (1 << 1)
  253. #define FLAG_COPY (1 << 2)
  254. static void *
  255. present_create_copy (unsigned f, void *h, size_t s)
  256. {
  257. void *d;
  258. splay_tree_key n;
  259. if (!h || !s)
  260. gomp_fatal ("[%p,+%d] is a bad range", (void *)h, (int)s);
  261. goacc_lazy_initialize ();
  262. struct goacc_thread *thr = goacc_thread ();
  263. struct gomp_device_descr *acc_dev = thr->dev;
  264. n = lookup_host (acc_dev, h, s);
  265. if (n)
  266. {
  267. /* Present. */
  268. d = (void *) (n->tgt->tgt_start + n->tgt_offset);
  269. if (!(f & FLAG_PRESENT))
  270. gomp_fatal ("[%p,+%d] already mapped to [%p,+%d]",
  271. (void *)h, (int)s, (void *)d, (int)s);
  272. if ((h + s) > (void *)n->host_end)
  273. gomp_fatal ("[%p,+%d] not mapped", (void *)h, (int)s);
  274. }
  275. else if (!(f & FLAG_CREATE))
  276. {
  277. gomp_fatal ("[%p,+%d] not mapped", (void *)h, (int)s);
  278. }
  279. else
  280. {
  281. struct target_mem_desc *tgt;
  282. size_t mapnum = 1;
  283. unsigned short kinds;
  284. void *hostaddrs = h;
  285. if (f & FLAG_COPY)
  286. kinds = GOMP_MAP_TO;
  287. else
  288. kinds = GOMP_MAP_ALLOC;
  289. tgt = gomp_map_vars (acc_dev, mapnum, &hostaddrs, NULL, &s, &kinds, true,
  290. false);
  291. gomp_mutex_lock (&acc_dev->lock);
  292. d = tgt->to_free;
  293. tgt->prev = acc_dev->openacc.data_environ;
  294. acc_dev->openacc.data_environ = tgt;
  295. gomp_mutex_unlock (&acc_dev->lock);
  296. }
  297. return d;
  298. }
  299. void *
  300. acc_create (void *h, size_t s)
  301. {
  302. return present_create_copy (FLAG_CREATE, h, s);
  303. }
  304. void *
  305. acc_copyin (void *h, size_t s)
  306. {
  307. return present_create_copy (FLAG_CREATE | FLAG_COPY, h, s);
  308. }
  309. void *
  310. acc_present_or_create (void *h, size_t s)
  311. {
  312. return present_create_copy (FLAG_PRESENT | FLAG_CREATE, h, s);
  313. }
  314. void *
  315. acc_present_or_copyin (void *h, size_t s)
  316. {
  317. return present_create_copy (FLAG_PRESENT | FLAG_CREATE | FLAG_COPY, h, s);
  318. }
  319. #define FLAG_COPYOUT (1 << 0)
  320. static void
  321. delete_copyout (unsigned f, void *h, size_t s)
  322. {
  323. size_t host_size;
  324. splay_tree_key n;
  325. void *d;
  326. struct goacc_thread *thr = goacc_thread ();
  327. struct gomp_device_descr *acc_dev = thr->dev;
  328. n = lookup_host (acc_dev, h, s);
  329. /* No need to call lazy open, as the data must already have been
  330. mapped. */
  331. if (!n)
  332. gomp_fatal ("[%p,%d] is not mapped", (void *)h, (int)s);
  333. d = (void *) (n->tgt->tgt_start + n->tgt_offset);
  334. host_size = n->host_end - n->host_start;
  335. if (n->host_start != (uintptr_t) h || host_size != s)
  336. gomp_fatal ("[%p,%d] surrounds2 [%p,+%d]",
  337. (void *) n->host_start, (int) host_size, (void *) h, (int) s);
  338. if (f & FLAG_COPYOUT)
  339. acc_dev->dev2host_func (acc_dev->target_id, h, d, s);
  340. acc_unmap_data (h);
  341. acc_dev->free_func (acc_dev->target_id, d);
  342. }
  343. void
  344. acc_delete (void *h , size_t s)
  345. {
  346. delete_copyout (0, h, s);
  347. }
  348. void acc_copyout (void *h, size_t s)
  349. {
  350. delete_copyout (FLAG_COPYOUT, h, s);
  351. }
  352. static void
  353. update_dev_host (int is_dev, void *h, size_t s)
  354. {
  355. splay_tree_key n;
  356. void *d;
  357. struct goacc_thread *thr = goacc_thread ();
  358. struct gomp_device_descr *acc_dev = thr->dev;
  359. n = lookup_host (acc_dev, h, s);
  360. /* No need to call lazy open, as the data must already have been
  361. mapped. */
  362. if (!n)
  363. gomp_fatal ("[%p,%d] is not mapped", h, (int)s);
  364. d = (void *) (n->tgt->tgt_start + n->tgt_offset);
  365. if (is_dev)
  366. acc_dev->host2dev_func (acc_dev->target_id, d, h, s);
  367. else
  368. acc_dev->dev2host_func (acc_dev->target_id, h, d, s);
  369. }
  370. void
  371. acc_update_device (void *h, size_t s)
  372. {
  373. update_dev_host (1, h, s);
  374. }
  375. void
  376. acc_update_self (void *h, size_t s)
  377. {
  378. update_dev_host (0, h, s);
  379. }
  380. void
  381. gomp_acc_insert_pointer (size_t mapnum, void **hostaddrs, size_t *sizes,
  382. void *kinds)
  383. {
  384. struct target_mem_desc *tgt;
  385. struct goacc_thread *thr = goacc_thread ();
  386. struct gomp_device_descr *acc_dev = thr->dev;
  387. gomp_debug (0, " %s: prepare mappings\n", __FUNCTION__);
  388. tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs,
  389. NULL, sizes, kinds, true, false);
  390. gomp_debug (0, " %s: mappings prepared\n", __FUNCTION__);
  391. tgt->prev = acc_dev->openacc.data_environ;
  392. acc_dev->openacc.data_environ = tgt;
  393. }
  394. void
  395. gomp_acc_remove_pointer (void *h, bool force_copyfrom, int async, int mapnum)
  396. {
  397. struct goacc_thread *thr = goacc_thread ();
  398. struct gomp_device_descr *acc_dev = thr->dev;
  399. splay_tree_key n;
  400. struct target_mem_desc *t;
  401. int minrefs = (mapnum == 1) ? 2 : 3;
  402. n = lookup_host (acc_dev, h, 1);
  403. if (!n)
  404. gomp_fatal ("%p is not a mapped block", (void *)h);
  405. gomp_debug (0, " %s: restore mappings\n", __FUNCTION__);
  406. t = n->tgt;
  407. struct target_mem_desc *tp;
  408. gomp_mutex_lock (&acc_dev->lock);
  409. if (t->refcount == minrefs)
  410. {
  411. /* This is the last reference, so pull the descriptor off the
  412. chain. This avoids gomp_unmap_vars via gomp_unmap_tgt from
  413. freeing the device memory. */
  414. t->tgt_end = 0;
  415. t->to_free = 0;
  416. for (tp = NULL, t = acc_dev->openacc.data_environ; t != NULL;
  417. tp = t, t = t->prev)
  418. {
  419. if (n->tgt == t)
  420. {
  421. if (tp)
  422. tp->prev = t->prev;
  423. else
  424. acc_dev->openacc.data_environ = t->prev;
  425. break;
  426. }
  427. }
  428. }
  429. if (force_copyfrom)
  430. t->list[0]->copy_from = 1;
  431. gomp_mutex_unlock (&acc_dev->lock);
  432. /* If running synchronously, unmap immediately. */
  433. if (async < acc_async_noval)
  434. gomp_unmap_vars (t, true);
  435. else
  436. {
  437. gomp_copy_from_async (t);
  438. acc_dev->openacc.register_async_cleanup_func (t);
  439. }
  440. gomp_debug (0, " %s: mappings restored\n", __FUNCTION__);
  441. }