target.c 38 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323
  1. /* Copyright (C) 2013-2015 Free Software Foundation, Inc.
  2. Contributed by Jakub Jelinek <jakub@redhat.com>.
  3. This file is part of the GNU Offloading and Multi Processing Library
  4. (libgomp).
  5. Libgomp is free software; you can redistribute it and/or modify it
  6. under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 3, or (at your option)
  8. any later version.
  9. Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
  10. WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  11. FOR A PARTICULAR PURPOSE. See the GNU General Public License for
  12. more details.
  13. Under Section 7 of GPL version 3, you are granted additional
  14. permissions described in the GCC Runtime Library Exception, version
  15. 3.1, as published by the Free Software Foundation.
  16. You should have received a copy of the GNU General Public License and
  17. a copy of the GCC Runtime Library Exception along with this program;
  18. see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
  19. <http://www.gnu.org/licenses/>. */
  20. /* This file contains the support of offloading. */
  21. #include "config.h"
  22. #include "libgomp.h"
  23. #include "oacc-plugin.h"
  24. #include "oacc-int.h"
  25. #include "gomp-constants.h"
  26. #include <limits.h>
  27. #include <stdbool.h>
  28. #include <stdlib.h>
  29. #ifdef HAVE_INTTYPES_H
  30. # include <inttypes.h> /* For PRIu64. */
  31. #endif
  32. #include <string.h>
  33. #include <assert.h>
  34. #ifdef PLUGIN_SUPPORT
  35. #include <dlfcn.h>
  36. #include "plugin-suffix.h"
  37. #endif
  38. static void gomp_target_init (void);
  39. /* The whole initialization code for offloading plugins is only run one. */
  40. static pthread_once_t gomp_is_initialized = PTHREAD_ONCE_INIT;
  41. /* Mutex for offload image registration. */
  42. static gomp_mutex_t register_lock;
  43. /* This structure describes an offload image.
  44. It contains type of the target device, pointer to host table descriptor, and
  45. pointer to target data. */
  46. struct offload_image_descr {
  47. enum offload_target_type type;
  48. void *host_table;
  49. void *target_data;
  50. };
  51. /* Array of descriptors of offload images. */
  52. static struct offload_image_descr *offload_images;
  53. /* Total number of offload images. */
  54. static int num_offload_images;
  55. /* Array of descriptors for all available devices. */
  56. static struct gomp_device_descr *devices;
  57. /* Total number of available devices. */
  58. static int num_devices;
  59. /* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */
  60. static int num_devices_openmp;
  61. /* Similar to gomp_realloc, but release register_lock before gomp_fatal. */
  62. static void *
  63. gomp_realloc_unlock (void *old, size_t size)
  64. {
  65. void *ret = realloc (old, size);
  66. if (ret == NULL)
  67. {
  68. gomp_mutex_unlock (&register_lock);
  69. gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size);
  70. }
  71. return ret;
  72. }
  73. /* The comparison function. */
  74. attribute_hidden int
  75. splay_compare (splay_tree_key x, splay_tree_key y)
  76. {
  77. if (x->host_start == x->host_end
  78. && y->host_start == y->host_end)
  79. return 0;
  80. if (x->host_end <= y->host_start)
  81. return -1;
  82. if (x->host_start >= y->host_end)
  83. return 1;
  84. return 0;
  85. }
  86. #include "splay-tree.h"
  87. attribute_hidden void
  88. gomp_init_targets_once (void)
  89. {
  90. (void) pthread_once (&gomp_is_initialized, gomp_target_init);
  91. }
  92. attribute_hidden int
  93. gomp_get_num_devices (void)
  94. {
  95. gomp_init_targets_once ();
  96. return num_devices_openmp;
  97. }
  98. static struct gomp_device_descr *
  99. resolve_device (int device_id)
  100. {
  101. if (device_id == GOMP_DEVICE_ICV)
  102. {
  103. struct gomp_task_icv *icv = gomp_icv (false);
  104. device_id = icv->default_device_var;
  105. }
  106. if (device_id < 0 || device_id >= gomp_get_num_devices ())
  107. return NULL;
  108. return &devices[device_id];
  109. }
  110. /* Handle the case where splay_tree_lookup found oldn for newn.
  111. Helper function of gomp_map_vars. */
  112. static inline void
  113. gomp_map_vars_existing (struct gomp_device_descr *devicep, splay_tree_key oldn,
  114. splay_tree_key newn, unsigned char kind)
  115. {
  116. if ((kind & GOMP_MAP_FLAG_FORCE)
  117. || oldn->host_start > newn->host_start
  118. || oldn->host_end < newn->host_end)
  119. {
  120. gomp_mutex_unlock (&devicep->lock);
  121. gomp_fatal ("Trying to map into device [%p..%p) object when "
  122. "[%p..%p) is already mapped",
  123. (void *) newn->host_start, (void *) newn->host_end,
  124. (void *) oldn->host_start, (void *) oldn->host_end);
  125. }
  126. oldn->refcount++;
  127. }
  128. static int
  129. get_kind (bool is_openacc, void *kinds, int idx)
  130. {
  131. return is_openacc ? ((unsigned short *) kinds)[idx]
  132. : ((unsigned char *) kinds)[idx];
  133. }
  134. attribute_hidden struct target_mem_desc *
  135. gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
  136. void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds,
  137. bool is_openacc, bool is_target)
  138. {
  139. size_t i, tgt_align, tgt_size, not_found_cnt = 0;
  140. const int rshift = is_openacc ? 8 : 3;
  141. const int typemask = is_openacc ? 0xff : 0x7;
  142. struct splay_tree_s *mem_map = &devicep->mem_map;
  143. struct splay_tree_key_s cur_node;
  144. struct target_mem_desc *tgt
  145. = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum);
  146. tgt->list_count = mapnum;
  147. tgt->refcount = 1;
  148. tgt->device_descr = devicep;
  149. if (mapnum == 0)
  150. return tgt;
  151. tgt_align = sizeof (void *);
  152. tgt_size = 0;
  153. if (is_target)
  154. {
  155. size_t align = 4 * sizeof (void *);
  156. tgt_align = align;
  157. tgt_size = mapnum * sizeof (void *);
  158. }
  159. gomp_mutex_lock (&devicep->lock);
  160. for (i = 0; i < mapnum; i++)
  161. {
  162. int kind = get_kind (is_openacc, kinds, i);
  163. if (hostaddrs[i] == NULL)
  164. {
  165. tgt->list[i] = NULL;
  166. continue;
  167. }
  168. cur_node.host_start = (uintptr_t) hostaddrs[i];
  169. if (!GOMP_MAP_POINTER_P (kind & typemask))
  170. cur_node.host_end = cur_node.host_start + sizes[i];
  171. else
  172. cur_node.host_end = cur_node.host_start + sizeof (void *);
  173. splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
  174. if (n)
  175. {
  176. tgt->list[i] = n;
  177. gomp_map_vars_existing (devicep, n, &cur_node, kind & typemask);
  178. }
  179. else
  180. {
  181. tgt->list[i] = NULL;
  182. size_t align = (size_t) 1 << (kind >> rshift);
  183. not_found_cnt++;
  184. if (tgt_align < align)
  185. tgt_align = align;
  186. tgt_size = (tgt_size + align - 1) & ~(align - 1);
  187. tgt_size += cur_node.host_end - cur_node.host_start;
  188. if ((kind & typemask) == GOMP_MAP_TO_PSET)
  189. {
  190. size_t j;
  191. for (j = i + 1; j < mapnum; j++)
  192. if (!GOMP_MAP_POINTER_P (get_kind (is_openacc, kinds, j)
  193. & typemask))
  194. break;
  195. else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
  196. || ((uintptr_t) hostaddrs[j] + sizeof (void *)
  197. > cur_node.host_end))
  198. break;
  199. else
  200. {
  201. tgt->list[j] = NULL;
  202. i++;
  203. }
  204. }
  205. }
  206. }
  207. if (devaddrs)
  208. {
  209. if (mapnum != 1)
  210. {
  211. gomp_mutex_unlock (&devicep->lock);
  212. gomp_fatal ("unexpected aggregation");
  213. }
  214. tgt->to_free = devaddrs[0];
  215. tgt->tgt_start = (uintptr_t) tgt->to_free;
  216. tgt->tgt_end = tgt->tgt_start + sizes[0];
  217. }
  218. else if (not_found_cnt || is_target)
  219. {
  220. /* Allocate tgt_align aligned tgt_size block of memory. */
  221. /* FIXME: Perhaps change interface to allocate properly aligned
  222. memory. */
  223. tgt->to_free = devicep->alloc_func (devicep->target_id,
  224. tgt_size + tgt_align - 1);
  225. tgt->tgt_start = (uintptr_t) tgt->to_free;
  226. tgt->tgt_start = (tgt->tgt_start + tgt_align - 1) & ~(tgt_align - 1);
  227. tgt->tgt_end = tgt->tgt_start + tgt_size;
  228. }
  229. else
  230. {
  231. tgt->to_free = NULL;
  232. tgt->tgt_start = 0;
  233. tgt->tgt_end = 0;
  234. }
  235. tgt_size = 0;
  236. if (is_target)
  237. tgt_size = mapnum * sizeof (void *);
  238. tgt->array = NULL;
  239. if (not_found_cnt)
  240. {
  241. tgt->array = gomp_malloc (not_found_cnt * sizeof (*tgt->array));
  242. splay_tree_node array = tgt->array;
  243. size_t j;
  244. for (i = 0; i < mapnum; i++)
  245. if (tgt->list[i] == NULL)
  246. {
  247. int kind = get_kind (is_openacc, kinds, i);
  248. if (hostaddrs[i] == NULL)
  249. continue;
  250. splay_tree_key k = &array->key;
  251. k->host_start = (uintptr_t) hostaddrs[i];
  252. if (!GOMP_MAP_POINTER_P (kind & typemask))
  253. k->host_end = k->host_start + sizes[i];
  254. else
  255. k->host_end = k->host_start + sizeof (void *);
  256. splay_tree_key n = splay_tree_lookup (mem_map, k);
  257. if (n)
  258. {
  259. tgt->list[i] = n;
  260. gomp_map_vars_existing (devicep, n, k, kind & typemask);
  261. }
  262. else
  263. {
  264. size_t align = (size_t) 1 << (kind >> rshift);
  265. tgt->list[i] = k;
  266. tgt_size = (tgt_size + align - 1) & ~(align - 1);
  267. k->tgt = tgt;
  268. k->tgt_offset = tgt_size;
  269. tgt_size += k->host_end - k->host_start;
  270. k->copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask);
  271. k->refcount = 1;
  272. k->async_refcount = 0;
  273. tgt->refcount++;
  274. array->left = NULL;
  275. array->right = NULL;
  276. splay_tree_insert (mem_map, array);
  277. switch (kind & typemask)
  278. {
  279. case GOMP_MAP_ALLOC:
  280. case GOMP_MAP_FROM:
  281. case GOMP_MAP_FORCE_ALLOC:
  282. case GOMP_MAP_FORCE_FROM:
  283. break;
  284. case GOMP_MAP_TO:
  285. case GOMP_MAP_TOFROM:
  286. case GOMP_MAP_FORCE_TO:
  287. case GOMP_MAP_FORCE_TOFROM:
  288. /* FIXME: Perhaps add some smarts, like if copying
  289. several adjacent fields from host to target, use some
  290. host buffer to avoid sending each var individually. */
  291. devicep->host2dev_func (devicep->target_id,
  292. (void *) (tgt->tgt_start
  293. + k->tgt_offset),
  294. (void *) k->host_start,
  295. k->host_end - k->host_start);
  296. break;
  297. case GOMP_MAP_POINTER:
  298. cur_node.host_start
  299. = (uintptr_t) *(void **) k->host_start;
  300. if (cur_node.host_start == (uintptr_t) NULL)
  301. {
  302. cur_node.tgt_offset = (uintptr_t) NULL;
  303. /* FIXME: see above FIXME comment. */
  304. devicep->host2dev_func (devicep->target_id,
  305. (void *) (tgt->tgt_start
  306. + k->tgt_offset),
  307. (void *) &cur_node.tgt_offset,
  308. sizeof (void *));
  309. break;
  310. }
  311. /* Add bias to the pointer value. */
  312. cur_node.host_start += sizes[i];
  313. cur_node.host_end = cur_node.host_start + 1;
  314. n = splay_tree_lookup (mem_map, &cur_node);
  315. if (n == NULL)
  316. {
  317. /* Could be possibly zero size array section. */
  318. cur_node.host_end--;
  319. n = splay_tree_lookup (mem_map, &cur_node);
  320. if (n == NULL)
  321. {
  322. cur_node.host_start--;
  323. n = splay_tree_lookup (mem_map, &cur_node);
  324. cur_node.host_start++;
  325. }
  326. }
  327. if (n == NULL)
  328. {
  329. gomp_mutex_unlock (&devicep->lock);
  330. gomp_fatal ("Pointer target of array section "
  331. "wasn't mapped");
  332. }
  333. cur_node.host_start -= n->host_start;
  334. cur_node.tgt_offset = n->tgt->tgt_start + n->tgt_offset
  335. + cur_node.host_start;
  336. /* At this point tgt_offset is target address of the
  337. array section. Now subtract bias to get what we want
  338. to initialize the pointer with. */
  339. cur_node.tgt_offset -= sizes[i];
  340. /* FIXME: see above FIXME comment. */
  341. devicep->host2dev_func (devicep->target_id,
  342. (void *) (tgt->tgt_start
  343. + k->tgt_offset),
  344. (void *) &cur_node.tgt_offset,
  345. sizeof (void *));
  346. break;
  347. case GOMP_MAP_TO_PSET:
  348. /* FIXME: see above FIXME comment. */
  349. devicep->host2dev_func (devicep->target_id,
  350. (void *) (tgt->tgt_start
  351. + k->tgt_offset),
  352. (void *) k->host_start,
  353. k->host_end - k->host_start);
  354. for (j = i + 1; j < mapnum; j++)
  355. if (!GOMP_MAP_POINTER_P (get_kind (is_openacc, kinds, j)
  356. & typemask))
  357. break;
  358. else if ((uintptr_t) hostaddrs[j] < k->host_start
  359. || ((uintptr_t) hostaddrs[j] + sizeof (void *)
  360. > k->host_end))
  361. break;
  362. else
  363. {
  364. tgt->list[j] = k;
  365. k->refcount++;
  366. cur_node.host_start
  367. = (uintptr_t) *(void **) hostaddrs[j];
  368. if (cur_node.host_start == (uintptr_t) NULL)
  369. {
  370. cur_node.tgt_offset = (uintptr_t) NULL;
  371. /* FIXME: see above FIXME comment. */
  372. devicep->host2dev_func (devicep->target_id,
  373. (void *) (tgt->tgt_start + k->tgt_offset
  374. + ((uintptr_t) hostaddrs[j]
  375. - k->host_start)),
  376. (void *) &cur_node.tgt_offset,
  377. sizeof (void *));
  378. i++;
  379. continue;
  380. }
  381. /* Add bias to the pointer value. */
  382. cur_node.host_start += sizes[j];
  383. cur_node.host_end = cur_node.host_start + 1;
  384. n = splay_tree_lookup (mem_map, &cur_node);
  385. if (n == NULL)
  386. {
  387. /* Could be possibly zero size array section. */
  388. cur_node.host_end--;
  389. n = splay_tree_lookup (mem_map, &cur_node);
  390. if (n == NULL)
  391. {
  392. cur_node.host_start--;
  393. n = splay_tree_lookup (mem_map, &cur_node);
  394. cur_node.host_start++;
  395. }
  396. }
  397. if (n == NULL)
  398. {
  399. gomp_mutex_unlock (&devicep->lock);
  400. gomp_fatal ("Pointer target of array section "
  401. "wasn't mapped");
  402. }
  403. cur_node.host_start -= n->host_start;
  404. cur_node.tgt_offset = n->tgt->tgt_start
  405. + n->tgt_offset
  406. + cur_node.host_start;
  407. /* At this point tgt_offset is target address of the
  408. array section. Now subtract bias to get what we
  409. want to initialize the pointer with. */
  410. cur_node.tgt_offset -= sizes[j];
  411. /* FIXME: see above FIXME comment. */
  412. devicep->host2dev_func (devicep->target_id,
  413. (void *) (tgt->tgt_start + k->tgt_offset
  414. + ((uintptr_t) hostaddrs[j]
  415. - k->host_start)),
  416. (void *) &cur_node.tgt_offset,
  417. sizeof (void *));
  418. i++;
  419. }
  420. break;
  421. case GOMP_MAP_FORCE_PRESENT:
  422. {
  423. /* We already looked up the memory region above and it
  424. was missing. */
  425. size_t size = k->host_end - k->host_start;
  426. gomp_mutex_unlock (&devicep->lock);
  427. #ifdef HAVE_INTTYPES_H
  428. gomp_fatal ("present clause: !acc_is_present (%p, "
  429. "%"PRIu64" (0x%"PRIx64"))",
  430. (void *) k->host_start,
  431. (uint64_t) size, (uint64_t) size);
  432. #else
  433. gomp_fatal ("present clause: !acc_is_present (%p, "
  434. "%lu (0x%lx))", (void *) k->host_start,
  435. (unsigned long) size, (unsigned long) size);
  436. #endif
  437. }
  438. break;
  439. case GOMP_MAP_FORCE_DEVICEPTR:
  440. assert (k->host_end - k->host_start == sizeof (void *));
  441. devicep->host2dev_func (devicep->target_id,
  442. (void *) (tgt->tgt_start
  443. + k->tgt_offset),
  444. (void *) k->host_start,
  445. sizeof (void *));
  446. break;
  447. default:
  448. gomp_mutex_unlock (&devicep->lock);
  449. gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__,
  450. kind);
  451. }
  452. array++;
  453. }
  454. }
  455. }
  456. if (is_target)
  457. {
  458. for (i = 0; i < mapnum; i++)
  459. {
  460. if (tgt->list[i] == NULL)
  461. cur_node.tgt_offset = (uintptr_t) NULL;
  462. else
  463. cur_node.tgt_offset = tgt->list[i]->tgt->tgt_start
  464. + tgt->list[i]->tgt_offset;
  465. /* FIXME: see above FIXME comment. */
  466. devicep->host2dev_func (devicep->target_id,
  467. (void *) (tgt->tgt_start
  468. + i * sizeof (void *)),
  469. (void *) &cur_node.tgt_offset,
  470. sizeof (void *));
  471. }
  472. }
  473. gomp_mutex_unlock (&devicep->lock);
  474. return tgt;
  475. }
  476. static void
  477. gomp_unmap_tgt (struct target_mem_desc *tgt)
  478. {
  479. /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */
  480. if (tgt->tgt_end)
  481. tgt->device_descr->free_func (tgt->device_descr->target_id, tgt->to_free);
  482. free (tgt->array);
  483. free (tgt);
  484. }
  485. /* Decrease the refcount for a set of mapped variables, and queue asychronous
  486. copies from the device back to the host after any work that has been issued.
  487. Because the regions are still "live", increment an asynchronous reference
  488. count to indicate that they should not be unmapped from host-side data
  489. structures until the asynchronous copy has completed. */
  490. attribute_hidden void
  491. gomp_copy_from_async (struct target_mem_desc *tgt)
  492. {
  493. struct gomp_device_descr *devicep = tgt->device_descr;
  494. size_t i;
  495. gomp_mutex_lock (&devicep->lock);
  496. for (i = 0; i < tgt->list_count; i++)
  497. if (tgt->list[i] == NULL)
  498. ;
  499. else if (tgt->list[i]->refcount > 1)
  500. {
  501. tgt->list[i]->refcount--;
  502. tgt->list[i]->async_refcount++;
  503. }
  504. else
  505. {
  506. splay_tree_key k = tgt->list[i];
  507. if (k->copy_from)
  508. devicep->dev2host_func (devicep->target_id, (void *) k->host_start,
  509. (void *) (k->tgt->tgt_start + k->tgt_offset),
  510. k->host_end - k->host_start);
  511. }
  512. gomp_mutex_unlock (&devicep->lock);
  513. }
  514. /* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant
  515. variables back from device to host: if it is false, it is assumed that this
  516. has been done already, i.e. by gomp_copy_from_async above. */
  517. attribute_hidden void
  518. gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom)
  519. {
  520. struct gomp_device_descr *devicep = tgt->device_descr;
  521. if (tgt->list_count == 0)
  522. {
  523. free (tgt);
  524. return;
  525. }
  526. gomp_mutex_lock (&devicep->lock);
  527. size_t i;
  528. for (i = 0; i < tgt->list_count; i++)
  529. if (tgt->list[i] == NULL)
  530. ;
  531. else if (tgt->list[i]->refcount > 1)
  532. tgt->list[i]->refcount--;
  533. else if (tgt->list[i]->async_refcount > 0)
  534. tgt->list[i]->async_refcount--;
  535. else
  536. {
  537. splay_tree_key k = tgt->list[i];
  538. if (k->copy_from && do_copyfrom)
  539. devicep->dev2host_func (devicep->target_id, (void *) k->host_start,
  540. (void *) (k->tgt->tgt_start + k->tgt_offset),
  541. k->host_end - k->host_start);
  542. splay_tree_remove (&devicep->mem_map, k);
  543. if (k->tgt->refcount > 1)
  544. k->tgt->refcount--;
  545. else
  546. gomp_unmap_tgt (k->tgt);
  547. }
  548. if (tgt->refcount > 1)
  549. tgt->refcount--;
  550. else
  551. gomp_unmap_tgt (tgt);
  552. gomp_mutex_unlock (&devicep->lock);
  553. }
  554. static void
  555. gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs,
  556. size_t *sizes, void *kinds, bool is_openacc)
  557. {
  558. size_t i;
  559. struct splay_tree_key_s cur_node;
  560. const int typemask = is_openacc ? 0xff : 0x7;
  561. if (!devicep)
  562. return;
  563. if (mapnum == 0)
  564. return;
  565. gomp_mutex_lock (&devicep->lock);
  566. for (i = 0; i < mapnum; i++)
  567. if (sizes[i])
  568. {
  569. cur_node.host_start = (uintptr_t) hostaddrs[i];
  570. cur_node.host_end = cur_node.host_start + sizes[i];
  571. splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node);
  572. if (n)
  573. {
  574. int kind = get_kind (is_openacc, kinds, i);
  575. if (n->host_start > cur_node.host_start
  576. || n->host_end < cur_node.host_end)
  577. {
  578. gomp_mutex_unlock (&devicep->lock);
  579. gomp_fatal ("Trying to update [%p..%p) object when "
  580. "only [%p..%p) is mapped",
  581. (void *) cur_node.host_start,
  582. (void *) cur_node.host_end,
  583. (void *) n->host_start,
  584. (void *) n->host_end);
  585. }
  586. if (GOMP_MAP_COPY_TO_P (kind & typemask))
  587. devicep->host2dev_func (devicep->target_id,
  588. (void *) (n->tgt->tgt_start
  589. + n->tgt_offset
  590. + cur_node.host_start
  591. - n->host_start),
  592. (void *) cur_node.host_start,
  593. cur_node.host_end - cur_node.host_start);
  594. if (GOMP_MAP_COPY_FROM_P (kind & typemask))
  595. devicep->dev2host_func (devicep->target_id,
  596. (void *) cur_node.host_start,
  597. (void *) (n->tgt->tgt_start
  598. + n->tgt_offset
  599. + cur_node.host_start
  600. - n->host_start),
  601. cur_node.host_end - cur_node.host_start);
  602. }
  603. else
  604. {
  605. gomp_mutex_unlock (&devicep->lock);
  606. gomp_fatal ("Trying to update [%p..%p) object that is not mapped",
  607. (void *) cur_node.host_start,
  608. (void *) cur_node.host_end);
  609. }
  610. }
  611. gomp_mutex_unlock (&devicep->lock);
  612. }
  613. /* Load image pointed by TARGET_DATA to the device, specified by DEVICEP.
  614. And insert to splay tree the mapping between addresses from HOST_TABLE and
  615. from loaded target image. */
  616. static void
  617. gomp_offload_image_to_device (struct gomp_device_descr *devicep,
  618. void *host_table, void *target_data,
  619. bool is_register_lock)
  620. {
  621. void **host_func_table = ((void ***) host_table)[0];
  622. void **host_funcs_end = ((void ***) host_table)[1];
  623. void **host_var_table = ((void ***) host_table)[2];
  624. void **host_vars_end = ((void ***) host_table)[3];
  625. /* The func table contains only addresses, the var table contains addresses
  626. and corresponding sizes. */
  627. int num_funcs = host_funcs_end - host_func_table;
  628. int num_vars = (host_vars_end - host_var_table) / 2;
  629. /* Load image to device and get target addresses for the image. */
  630. struct addr_pair *target_table = NULL;
  631. int i, num_target_entries
  632. = devicep->load_image_func (devicep->target_id, target_data, &target_table);
  633. if (num_target_entries != num_funcs + num_vars)
  634. {
  635. gomp_mutex_unlock (&devicep->lock);
  636. if (is_register_lock)
  637. gomp_mutex_unlock (&register_lock);
  638. gomp_fatal ("Can't map target functions or variables");
  639. }
  640. /* Insert host-target address mapping into splay tree. */
  641. struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
  642. tgt->array = gomp_malloc ((num_funcs + num_vars) * sizeof (*tgt->array));
  643. tgt->refcount = 1;
  644. tgt->tgt_start = 0;
  645. tgt->tgt_end = 0;
  646. tgt->to_free = NULL;
  647. tgt->prev = NULL;
  648. tgt->list_count = 0;
  649. tgt->device_descr = devicep;
  650. splay_tree_node array = tgt->array;
  651. for (i = 0; i < num_funcs; i++)
  652. {
  653. splay_tree_key k = &array->key;
  654. k->host_start = (uintptr_t) host_func_table[i];
  655. k->host_end = k->host_start + 1;
  656. k->tgt = tgt;
  657. k->tgt_offset = target_table[i].start;
  658. k->refcount = 1;
  659. k->async_refcount = 0;
  660. k->copy_from = false;
  661. array->left = NULL;
  662. array->right = NULL;
  663. splay_tree_insert (&devicep->mem_map, array);
  664. array++;
  665. }
  666. for (i = 0; i < num_vars; i++)
  667. {
  668. struct addr_pair *target_var = &target_table[num_funcs + i];
  669. if (target_var->end - target_var->start
  670. != (uintptr_t) host_var_table[i * 2 + 1])
  671. {
  672. gomp_mutex_unlock (&devicep->lock);
  673. if (is_register_lock)
  674. gomp_mutex_unlock (&register_lock);
  675. gomp_fatal ("Can't map target variables (size mismatch)");
  676. }
  677. splay_tree_key k = &array->key;
  678. k->host_start = (uintptr_t) host_var_table[i * 2];
  679. k->host_end = k->host_start + (uintptr_t) host_var_table[i * 2 + 1];
  680. k->tgt = tgt;
  681. k->tgt_offset = target_var->start;
  682. k->refcount = 1;
  683. k->async_refcount = 0;
  684. k->copy_from = false;
  685. array->left = NULL;
  686. array->right = NULL;
  687. splay_tree_insert (&devicep->mem_map, array);
  688. array++;
  689. }
  690. free (target_table);
  691. }
  692. /* This function should be called from every offload image while loading.
  693. It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
  694. the target, and TARGET_DATA needed by target plugin. */
  695. void
  696. GOMP_offload_register (void *host_table, enum offload_target_type target_type,
  697. void *target_data)
  698. {
  699. int i;
  700. gomp_mutex_lock (&register_lock);
  701. /* Load image to all initialized devices. */
  702. for (i = 0; i < num_devices; i++)
  703. {
  704. struct gomp_device_descr *devicep = &devices[i];
  705. gomp_mutex_lock (&devicep->lock);
  706. if (devicep->type == target_type && devicep->is_initialized)
  707. gomp_offload_image_to_device (devicep, host_table, target_data, true);
  708. gomp_mutex_unlock (&devicep->lock);
  709. }
  710. /* Insert image to array of pending images. */
  711. offload_images
  712. = gomp_realloc_unlock (offload_images,
  713. (num_offload_images + 1)
  714. * sizeof (struct offload_image_descr));
  715. offload_images[num_offload_images].type = target_type;
  716. offload_images[num_offload_images].host_table = host_table;
  717. offload_images[num_offload_images].target_data = target_data;
  718. num_offload_images++;
  719. gomp_mutex_unlock (&register_lock);
  720. }
  721. /* This function should be called from every offload image while unloading.
  722. It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
  723. the target, and TARGET_DATA needed by target plugin. */
  724. void
  725. GOMP_offload_unregister (void *host_table, enum offload_target_type target_type,
  726. void *target_data)
  727. {
  728. void **host_func_table = ((void ***) host_table)[0];
  729. void **host_funcs_end = ((void ***) host_table)[1];
  730. void **host_var_table = ((void ***) host_table)[2];
  731. void **host_vars_end = ((void ***) host_table)[3];
  732. int i;
  733. /* The func table contains only addresses, the var table contains addresses
  734. and corresponding sizes. */
  735. int num_funcs = host_funcs_end - host_func_table;
  736. int num_vars = (host_vars_end - host_var_table) / 2;
  737. gomp_mutex_lock (&register_lock);
  738. /* Unload image from all initialized devices. */
  739. for (i = 0; i < num_devices; i++)
  740. {
  741. int j;
  742. struct gomp_device_descr *devicep = &devices[i];
  743. gomp_mutex_lock (&devicep->lock);
  744. if (devicep->type != target_type || !devicep->is_initialized)
  745. {
  746. gomp_mutex_unlock (&devicep->lock);
  747. continue;
  748. }
  749. devicep->unload_image_func (devicep->target_id, target_data);
  750. /* Remove mapping from splay tree. */
  751. struct splay_tree_key_s k;
  752. splay_tree_key node = NULL;
  753. if (num_funcs > 0)
  754. {
  755. k.host_start = (uintptr_t) host_func_table[0];
  756. k.host_end = k.host_start + 1;
  757. node = splay_tree_lookup (&devicep->mem_map, &k);
  758. }
  759. else if (num_vars > 0)
  760. {
  761. k.host_start = (uintptr_t) host_var_table[0];
  762. k.host_end = k.host_start + (uintptr_t) host_var_table[1];
  763. node = splay_tree_lookup (&devicep->mem_map, &k);
  764. }
  765. for (j = 0; j < num_funcs; j++)
  766. {
  767. k.host_start = (uintptr_t) host_func_table[j];
  768. k.host_end = k.host_start + 1;
  769. splay_tree_remove (&devicep->mem_map, &k);
  770. }
  771. for (j = 0; j < num_vars; j++)
  772. {
  773. k.host_start = (uintptr_t) host_var_table[j * 2];
  774. k.host_end = k.host_start + (uintptr_t) host_var_table[j * 2 + 1];
  775. splay_tree_remove (&devicep->mem_map, &k);
  776. }
  777. if (node)
  778. {
  779. free (node->tgt);
  780. free (node);
  781. }
  782. gomp_mutex_unlock (&devicep->lock);
  783. }
  784. /* Remove image from array of pending images. */
  785. for (i = 0; i < num_offload_images; i++)
  786. if (offload_images[i].target_data == target_data)
  787. {
  788. offload_images[i] = offload_images[--num_offload_images];
  789. break;
  790. }
  791. gomp_mutex_unlock (&register_lock);
  792. }
  793. /* This function initializes the target device, specified by DEVICEP. DEVICEP
  794. must be locked on entry, and remains locked on return. */
  795. attribute_hidden void
  796. gomp_init_device (struct gomp_device_descr *devicep)
  797. {
  798. int i;
  799. devicep->init_device_func (devicep->target_id);
  800. /* Load to device all images registered by the moment. */
  801. for (i = 0; i < num_offload_images; i++)
  802. {
  803. struct offload_image_descr *image = &offload_images[i];
  804. if (image->type == devicep->type)
  805. gomp_offload_image_to_device (devicep, image->host_table,
  806. image->target_data, false);
  807. }
  808. devicep->is_initialized = true;
  809. }
  810. /* Free address mapping tables. MM must be locked on entry, and remains locked
  811. on return. */
  812. attribute_hidden void
  813. gomp_free_memmap (struct splay_tree_s *mem_map)
  814. {
  815. while (mem_map->root)
  816. {
  817. struct target_mem_desc *tgt = mem_map->root->key.tgt;
  818. splay_tree_remove (mem_map, &mem_map->root->key);
  819. free (tgt->array);
  820. free (tgt);
  821. }
  822. }
  823. /* This function de-initializes the target device, specified by DEVICEP.
  824. DEVICEP must be locked on entry, and remains locked on return. */
  825. attribute_hidden void
  826. gomp_fini_device (struct gomp_device_descr *devicep)
  827. {
  828. if (devicep->is_initialized)
  829. devicep->fini_device_func (devicep->target_id);
  830. devicep->is_initialized = false;
  831. }
  832. /* Called when encountering a target directive. If DEVICE
  833. is GOMP_DEVICE_ICV, it means use device-var ICV. If it is
  834. GOMP_DEVICE_HOST_FALLBACK (or any value
  835. larger than last available hw device), use host fallback.
  836. FN is address of host code, UNUSED is part of the current ABI, but
  837. we're not actually using it. HOSTADDRS, SIZES and KINDS are arrays
  838. with MAPNUM entries, with addresses of the host objects,
  839. sizes of the host objects (resp. for pointer kind pointer bias
  840. and assumed sizeof (void *) size) and kinds. */
  841. void
  842. GOMP_target (int device, void (*fn) (void *), const void *unused,
  843. size_t mapnum, void **hostaddrs, size_t *sizes,
  844. unsigned char *kinds)
  845. {
  846. struct gomp_device_descr *devicep = resolve_device (device);
  847. if (devicep == NULL
  848. || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
  849. {
  850. /* Host fallback. */
  851. struct gomp_thread old_thr, *thr = gomp_thread ();
  852. old_thr = *thr;
  853. memset (thr, '\0', sizeof (*thr));
  854. if (gomp_places_list)
  855. {
  856. thr->place = old_thr.place;
  857. thr->ts.place_partition_len = gomp_places_list_len;
  858. }
  859. fn (hostaddrs);
  860. gomp_free_thread (thr);
  861. *thr = old_thr;
  862. return;
  863. }
  864. gomp_mutex_lock (&devicep->lock);
  865. if (!devicep->is_initialized)
  866. gomp_init_device (devicep);
  867. gomp_mutex_unlock (&devicep->lock);
  868. void *fn_addr;
  869. if (devicep->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC)
  870. fn_addr = (void *) fn;
  871. else
  872. {
  873. gomp_mutex_lock (&devicep->lock);
  874. struct splay_tree_key_s k;
  875. k.host_start = (uintptr_t) fn;
  876. k.host_end = k.host_start + 1;
  877. splay_tree_key tgt_fn = splay_tree_lookup (&devicep->mem_map, &k);
  878. if (tgt_fn == NULL)
  879. {
  880. gomp_mutex_unlock (&devicep->lock);
  881. gomp_fatal ("Target function wasn't mapped");
  882. }
  883. gomp_mutex_unlock (&devicep->lock);
  884. fn_addr = (void *) tgt_fn->tgt_offset;
  885. }
  886. struct target_mem_desc *tgt_vars
  887. = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
  888. true);
  889. struct gomp_thread old_thr, *thr = gomp_thread ();
  890. old_thr = *thr;
  891. memset (thr, '\0', sizeof (*thr));
  892. if (gomp_places_list)
  893. {
  894. thr->place = old_thr.place;
  895. thr->ts.place_partition_len = gomp_places_list_len;
  896. }
  897. devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start);
  898. gomp_free_thread (thr);
  899. *thr = old_thr;
  900. gomp_unmap_vars (tgt_vars, true);
  901. }
  902. void
  903. GOMP_target_data (int device, const void *unused, size_t mapnum,
  904. void **hostaddrs, size_t *sizes, unsigned char *kinds)
  905. {
  906. struct gomp_device_descr *devicep = resolve_device (device);
  907. if (devicep == NULL
  908. || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
  909. {
  910. /* Host fallback. */
  911. struct gomp_task_icv *icv = gomp_icv (false);
  912. if (icv->target_data)
  913. {
  914. /* Even when doing a host fallback, if there are any active
  915. #pragma omp target data constructs, need to remember the
  916. new #pragma omp target data, otherwise GOMP_target_end_data
  917. would get out of sync. */
  918. struct target_mem_desc *tgt
  919. = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, false, false);
  920. tgt->prev = icv->target_data;
  921. icv->target_data = tgt;
  922. }
  923. return;
  924. }
  925. gomp_mutex_lock (&devicep->lock);
  926. if (!devicep->is_initialized)
  927. gomp_init_device (devicep);
  928. gomp_mutex_unlock (&devicep->lock);
  929. struct target_mem_desc *tgt
  930. = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
  931. false);
  932. struct gomp_task_icv *icv = gomp_icv (true);
  933. tgt->prev = icv->target_data;
  934. icv->target_data = tgt;
  935. }
  936. void
  937. GOMP_target_end_data (void)
  938. {
  939. struct gomp_task_icv *icv = gomp_icv (false);
  940. if (icv->target_data)
  941. {
  942. struct target_mem_desc *tgt = icv->target_data;
  943. icv->target_data = tgt->prev;
  944. gomp_unmap_vars (tgt, true);
  945. }
  946. }
  947. void
  948. GOMP_target_update (int device, const void *unused, size_t mapnum,
  949. void **hostaddrs, size_t *sizes, unsigned char *kinds)
  950. {
  951. struct gomp_device_descr *devicep = resolve_device (device);
  952. if (devicep == NULL
  953. || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
  954. return;
  955. gomp_mutex_lock (&devicep->lock);
  956. if (!devicep->is_initialized)
  957. gomp_init_device (devicep);
  958. gomp_mutex_unlock (&devicep->lock);
  959. gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, false);
  960. }
  961. void
  962. GOMP_teams (unsigned int num_teams, unsigned int thread_limit)
  963. {
  964. if (thread_limit)
  965. {
  966. struct gomp_task_icv *icv = gomp_icv (true);
  967. icv->thread_limit_var
  968. = thread_limit > INT_MAX ? UINT_MAX : thread_limit;
  969. }
  970. (void) num_teams;
  971. }
  972. #ifdef PLUGIN_SUPPORT
  973. /* This function tries to load a plugin for DEVICE. Name of plugin is passed
  974. in PLUGIN_NAME.
  975. The handles of the found functions are stored in the corresponding fields
  976. of DEVICE. The function returns TRUE on success and FALSE otherwise. */
  977. static bool
  978. gomp_load_plugin_for_device (struct gomp_device_descr *device,
  979. const char *plugin_name)
  980. {
  981. const char *err = NULL, *last_missing = NULL;
  982. int optional_present, optional_total;
  983. /* Clear any existing error. */
  984. dlerror ();
  985. void *plugin_handle = dlopen (plugin_name, RTLD_LAZY);
  986. if (!plugin_handle)
  987. {
  988. err = dlerror ();
  989. goto out;
  990. }
  991. /* Check if all required functions are available in the plugin and store
  992. their handlers. */
  993. #define DLSYM(f) \
  994. do \
  995. { \
  996. device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f); \
  997. err = dlerror (); \
  998. if (err != NULL) \
  999. goto out; \
  1000. } \
  1001. while (0)
  1002. /* Similar, but missing functions are not an error. */
  1003. #define DLSYM_OPT(f, n) \
  1004. do \
  1005. { \
  1006. const char *tmp_err; \
  1007. device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n); \
  1008. tmp_err = dlerror (); \
  1009. if (tmp_err == NULL) \
  1010. optional_present++; \
  1011. else \
  1012. last_missing = #n; \
  1013. optional_total++; \
  1014. } \
  1015. while (0)
  1016. DLSYM (get_name);
  1017. DLSYM (get_caps);
  1018. DLSYM (get_type);
  1019. DLSYM (get_num_devices);
  1020. DLSYM (init_device);
  1021. DLSYM (fini_device);
  1022. DLSYM (load_image);
  1023. DLSYM (unload_image);
  1024. DLSYM (alloc);
  1025. DLSYM (free);
  1026. DLSYM (dev2host);
  1027. DLSYM (host2dev);
  1028. device->capabilities = device->get_caps_func ();
  1029. if (device->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
  1030. DLSYM (run);
  1031. if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
  1032. {
  1033. optional_present = optional_total = 0;
  1034. DLSYM_OPT (openacc.exec, openacc_parallel);
  1035. DLSYM_OPT (openacc.register_async_cleanup,
  1036. openacc_register_async_cleanup);
  1037. DLSYM_OPT (openacc.async_test, openacc_async_test);
  1038. DLSYM_OPT (openacc.async_test_all, openacc_async_test_all);
  1039. DLSYM_OPT (openacc.async_wait, openacc_async_wait);
  1040. DLSYM_OPT (openacc.async_wait_async, openacc_async_wait_async);
  1041. DLSYM_OPT (openacc.async_wait_all, openacc_async_wait_all);
  1042. DLSYM_OPT (openacc.async_wait_all_async, openacc_async_wait_all_async);
  1043. DLSYM_OPT (openacc.async_set_async, openacc_async_set_async);
  1044. DLSYM_OPT (openacc.create_thread_data, openacc_create_thread_data);
  1045. DLSYM_OPT (openacc.destroy_thread_data, openacc_destroy_thread_data);
  1046. /* Require all the OpenACC handlers if we have
  1047. GOMP_OFFLOAD_CAP_OPENACC_200. */
  1048. if (optional_present != optional_total)
  1049. {
  1050. err = "plugin missing OpenACC handler function";
  1051. goto out;
  1052. }
  1053. optional_present = optional_total = 0;
  1054. DLSYM_OPT (openacc.cuda.get_current_device,
  1055. openacc_get_current_cuda_device);
  1056. DLSYM_OPT (openacc.cuda.get_current_context,
  1057. openacc_get_current_cuda_context);
  1058. DLSYM_OPT (openacc.cuda.get_stream, openacc_get_cuda_stream);
  1059. DLSYM_OPT (openacc.cuda.set_stream, openacc_set_cuda_stream);
  1060. /* Make sure all the CUDA functions are there if any of them are. */
  1061. if (optional_present && optional_present != optional_total)
  1062. {
  1063. err = "plugin missing OpenACC CUDA handler function";
  1064. goto out;
  1065. }
  1066. }
  1067. #undef DLSYM
  1068. #undef DLSYM_OPT
  1069. out:
  1070. if (err != NULL)
  1071. {
  1072. gomp_error ("while loading %s: %s", plugin_name, err);
  1073. if (last_missing)
  1074. gomp_error ("missing function was %s", last_missing);
  1075. if (plugin_handle)
  1076. dlclose (plugin_handle);
  1077. }
  1078. return err == NULL;
  1079. }
  1080. /* This function initializes the runtime needed for offloading.
  1081. It parses the list of offload targets and tries to load the plugins for
  1082. these targets. On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
  1083. will be set, and the array DEVICES initialized, containing descriptors for
  1084. corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows
  1085. by the others. */
  1086. static void
  1087. gomp_target_init (void)
  1088. {
  1089. const char *prefix ="libgomp-plugin-";
  1090. const char *suffix = SONAME_SUFFIX (1);
  1091. const char *cur, *next;
  1092. char *plugin_name;
  1093. int i, new_num_devices;
  1094. num_devices = 0;
  1095. devices = NULL;
  1096. cur = OFFLOAD_TARGETS;
  1097. if (*cur)
  1098. do
  1099. {
  1100. struct gomp_device_descr current_device;
  1101. next = strchr (cur, ',');
  1102. plugin_name = (char *) malloc (1 + (next ? next - cur : strlen (cur))
  1103. + strlen (prefix) + strlen (suffix));
  1104. if (!plugin_name)
  1105. {
  1106. num_devices = 0;
  1107. break;
  1108. }
  1109. strcpy (plugin_name, prefix);
  1110. strncat (plugin_name, cur, next ? next - cur : strlen (cur));
  1111. strcat (plugin_name, suffix);
  1112. if (gomp_load_plugin_for_device (&current_device, plugin_name))
  1113. {
  1114. new_num_devices = current_device.get_num_devices_func ();
  1115. if (new_num_devices >= 1)
  1116. {
  1117. /* Augment DEVICES and NUM_DEVICES. */
  1118. devices = realloc (devices, (num_devices + new_num_devices)
  1119. * sizeof (struct gomp_device_descr));
  1120. if (!devices)
  1121. {
  1122. num_devices = 0;
  1123. free (plugin_name);
  1124. break;
  1125. }
  1126. current_device.name = current_device.get_name_func ();
  1127. /* current_device.capabilities has already been set. */
  1128. current_device.type = current_device.get_type_func ();
  1129. current_device.mem_map.root = NULL;
  1130. current_device.is_initialized = false;
  1131. current_device.openacc.data_environ = NULL;
  1132. for (i = 0; i < new_num_devices; i++)
  1133. {
  1134. current_device.target_id = i;
  1135. devices[num_devices] = current_device;
  1136. gomp_mutex_init (&devices[num_devices].lock);
  1137. num_devices++;
  1138. }
  1139. }
  1140. }
  1141. free (plugin_name);
  1142. cur = next + 1;
  1143. }
  1144. while (next);
  1145. /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set
  1146. NUM_DEVICES_OPENMP. */
  1147. struct gomp_device_descr *devices_s
  1148. = malloc (num_devices * sizeof (struct gomp_device_descr));
  1149. if (!devices_s)
  1150. {
  1151. num_devices = 0;
  1152. free (devices);
  1153. devices = NULL;
  1154. }
  1155. num_devices_openmp = 0;
  1156. for (i = 0; i < num_devices; i++)
  1157. if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
  1158. devices_s[num_devices_openmp++] = devices[i];
  1159. int num_devices_after_openmp = num_devices_openmp;
  1160. for (i = 0; i < num_devices; i++)
  1161. if (!(devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
  1162. devices_s[num_devices_after_openmp++] = devices[i];
  1163. free (devices);
  1164. devices = devices_s;
  1165. for (i = 0; i < num_devices; i++)
  1166. {
  1167. /* The 'devices' array can be moved (by the realloc call) until we have
  1168. found all the plugins, so registering with the OpenACC runtime (which
  1169. takes a copy of the pointer argument) must be delayed until now. */
  1170. if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
  1171. goacc_register (&devices[i]);
  1172. }
  1173. }
  1174. #else /* PLUGIN_SUPPORT */
  1175. /* If dlfcn.h is unavailable we always fallback to host execution.
  1176. GOMP_target* routines are just stubs for this case. */
  1177. static void
  1178. gomp_target_init (void)
  1179. {
  1180. }
  1181. #endif /* PLUGIN_SUPPORT */